@inproceedings{
  author={Saleh Ashkboos and Ilia Markov and Elias Frantar and Tingxuan Zhong and Xincheng Wang and Jie Ren and Torsten Hoefler and Dan Alistarh},
  title={{QUIK: Towards End-to-End 4-Bit Inference on Generative Large Language Models}},
  year={2024},
  month={Nov.},
  pages={3355-3371},
  booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP'24)},
  location={Miami, FL, USA},
  publisher={Association for Computational Linguistics},
  doi={10.48550/arXiv.2310.09259},
}