Instructions to use OpenGVLab/InternVL3-78B-hf with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OpenGVLab/InternVL3-78B-hf with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="OpenGVLab/InternVL3-78B-hf") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL3-78B-hf") model = AutoModelForImageTextToText.from_pretrained("OpenGVLab/InternVL3-78B-hf") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use OpenGVLab/InternVL3-78B-hf with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "OpenGVLab/InternVL3-78B-hf" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL3-78B-hf", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/OpenGVLab/InternVL3-78B-hf
- SGLang
How to use OpenGVLab/InternVL3-78B-hf with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "OpenGVLab/InternVL3-78B-hf" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL3-78B-hf", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "OpenGVLab/InternVL3-78B-hf" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL3-78B-hf", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use OpenGVLab/InternVL3-78B-hf with Docker Model Runner:
docker model run hf.co/OpenGVLab/InternVL3-78B-hf
AttributeError: Qwen2TokenizerFast has no attribute video_token
You have video processor config saved in preprocessor.json file which is deprecated. Video processor configs should be saved in their own video_preprocessor.json file. You can rename the file or load and save the processor back which renames it automatically. Loading from preprocessor.json will be removed in v5.0.
AttributeError Traceback (most recent call last)
Cell In[5], line 6
4 torch_device = "cuda"
5 model_checkpoint = "/media/cfs/transaction-ctr-offline/InternVL/InternVL3-78B-hf"
----> 6 processor = AutoProcessor.from_pretrained(model_checkpoint)
7 model = AutoModelForImageTextToText.from_pretrained(model_checkpoint, device_map=torch_device, torch_dtype=torch.bfloat16)
File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
372 return processor_class.from_pretrained(
373 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
374 )
375 elif processor_class is not None:
--> 376 return processor_class.from_pretrained(
377 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
378 )
379 # Last try: we use the PROCESSOR_MAPPING.
380 elif type(config) in PROCESSOR_MAPPING:
File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:1187, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
-> 1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:982, in ProcessorMixin.from_args_and_dict(cls, args, processor_dict, **kwargs)
979 del processor_dict["auto_map"]
981 unused_kwargs = cls.validate_init_kwargs(processor_config=processor_dict, valid_kwargs=cls.valid_kwargs)
--> 982 processor = cls(*args, **processor_dict)
984 # Update processor with kwargs if needed
985 for key in set(kwargs.keys()):
File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/internvl/processing_internvl.py:98, in InternVLProcessor.init(self, image_processor, tokenizer, video_processor, image_seq_length, chat_template, **kwargs)
96 self.end_image_token = tokenizer.end_image_token
97 self.image_token = tokenizer.context_image_token
---> 98 self.video_token = tokenizer.video_token
99 self.image_token_id = tokenizer.context_image_token_id
101 super().init(image_processor, tokenizer, video_processor, chat_template=chat_template, **kwargs)
File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1111, in SpecialTokensMixin.getattr(self, key)
1108 return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
1110 if key not in self.dict:
-> 1111 raise AttributeError(f"{self.class.name} has no attribute {key}")
1112 else:
1113 return super().getattr(key)
AttributeError: Qwen2TokenizerFast has no attribute video_token