license: apache-2.0 base_model: - Qwen/Qwen2.5-VL-3B-Instruct - Qwen/Qwen2.5-VL-7B-Instruct - lmms-lab/llava-onevision-qwen2-7b-ov tags: - robotics - vision-language-action-model - vision-language-model library_name: transformers # Collection Metadata (Referencing InternRobotics/VLN-PE style) repo: InternRobotics/RoboInter-VLM type: "checkpoint-collection" description: "Collection of RoboInterVLM checkpoints and configs fine-tuned on RoboInter-VQA." checkpoints: - name: RoboInterVLM_qwenvl25_3b path: RoboInterVLM_qwenvl25_3b/ notes: "Lightweight Qwen2.5-VL model" - name: RoboInterVLM_qwenvl25_7b path: RoboInterVLM_qwenvl25_7b/ notes: "Stronger performance Qwen2.5-VL backbone" - name: RoboInterVLM_llava_one_vision_7B path: RoboInterVLM_llava_one_vision_7B/ notes: "LLaVA-OneVision (SigLIP + Qwen2) backbone"