@article{fu2025vita,
title={VITA-1.5: Towards GPT-4o Level Real-Time Vision and Speech Interaction},
author={Fu, Chaoyou and Lin, Haojia and Wang, Xiong and Zhang, Yi-Fan and Shen, Yunhang and Liu, Xiaoyu and Li, Yangze and Long, Zuwei and Gao, Heting and Li, Ke and others},
journal={arXiv preprint arXiv:2501.01957},
year={2025}
}
@article{fu2024vita,
title={Vita: Towards open-source interactive omni multimodal llm},
author={Fu, Chaoyou and Lin, Haojia and Long, Zuwei and Shen, Yunhang and Zhao, Meng and Zhang, Yifan and Dong, Shaoqi and Wang, Xiong and Yin, Di and Ma, Long and others},
journal={arXiv preprint arXiv:2408.05211},
year={2024}
}