@inproceedings{heo2024neupims,title={Neupims: Npu-pim heterogeneous acceleration for batched llm inferencing},author={Heo, Guseul and Lee, Sangyeop and Cho, Jaehong and Choi, Hyunmin and Lee, Sanghyeon and Ham, Hyungkyu and Kim, Gwangsun and Mahajan, Divya and Park, Jongse},booktitle={Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3},pages={722--737},year={2024}}