Authors: InternVL team
Affiliation: Shanghai AI Laboratory & Sensetime & Tsinghua University
Email: czcz94cz@gmail.com
Description: InternVL Family: Closing the Gap to Commercial Multimodal Models with Open-Source Suites —— A Pioneering Open-Source Alternative to GPT-4V
Demo: https://internvl.opengvlab.com/
Code: https://github.com/OpenGVLab/InternVL
Model: https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5
@article{chen2023internvl,
title={Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks},
author={Chen, Zhe and Wu, Jiannan and Wang, Wenhai and Su, Weijie and Chen, Guo and Xing, Sen and Muyan, Zhong and Zhang, Qinglong and Zhu, Xizhou and Lu, Lewei and others},
journal={arXiv preprint arXiv:2312.14238},
year={2023}
}
@article{chen2024far,
title={How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites},
author={Chen, Zhe and Wang, Weiyun and Tian, Hao and Ye, Shenglong and Gao, Zhangwei and Cui, Erfei and Tong, Wenwen and Hu, Kongzhi and Luo, Jiapeng and Ma, Zheng and others},
journal={arXiv preprint arXiv:2404.16821},
year={2024}
}
Source code