@inproceedings{2025_marlin, author={Elias Frantar and Roberto L. Castro and Jiale Chen and Torsten Hoefler and Dan Alistarh}, title={{MARLIN: Mixed-Precision Auto-Regressive Parallel Inference on Large Language Models}}, year={2025}, month={Feb.}, booktitle={PPoPP '25: Proceedings of the 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming}, doi={10.1145/3710848.371087}, }