Instructions to use Salesforce/instructblip-vicuna-7b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Salesforce/instructblip-vicuna-7b with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="Salesforce/instructblip-vicuna-7b")# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b") model = AutoModelForImageTextToText.from_pretrained("Salesforce/instructblip-vicuna-7b") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Salesforce/instructblip-vicuna-7b with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Salesforce/instructblip-vicuna-7b" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Salesforce/instructblip-vicuna-7b", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Salesforce/instructblip-vicuna-7b
- SGLang
How to use Salesforce/instructblip-vicuna-7b with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Salesforce/instructblip-vicuna-7b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Salesforce/instructblip-vicuna-7b", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Salesforce/instructblip-vicuna-7b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Salesforce/instructblip-vicuna-7b", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Salesforce/instructblip-vicuna-7b with Docker Model Runner:
docker model run hf.co/Salesforce/instructblip-vicuna-7b
processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b") got error
I got the following error when i load processor, I assume it's during load tokenizer from pretrain.
Exception Traceback (most recent call last)
Cell In[2], line 7
4 import requests
6 model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b")
----> 7 processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
9 device = "cuda" if torch.cuda.is_available() else "cpu"
10 model.to(device)
File ~/.local/lib/python3.9/site-packages/transformers/models/instructblip/processing_instructblip.py:170, in InstructBlipProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
167 @classmethod
168 def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
169 qformer_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="qformer_tokenizer")
--> 170 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
171 args.append(qformer_tokenizer)
172 return cls(*args)
File ~/.local/lib/python3.9/site-packages/transformers/processing_utils.py:259, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
256 else:
257 attribute_class = getattr(transformers_module, class_name)
--> 259 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
260 return args
File ~/.local/lib/python3.9/site-packages/transformers/models/auto/tokenization_auto.py:692, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
688 if tokenizer_class is None:
689 raise ValueError(
690 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
691 )
--> 692 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
694 # Otherwise we have to be creative.
695 # if model is an encoder decoder, the encoder tokenizer class is used by default
696 if isinstance(config, EncoderDecoderConfig):
File ~/.local/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:1846, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
1843 else:
1844 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1846 return cls._from_pretrained(
1847 resolved_vocab_files,
1848 pretrained_model_name_or_path,
1849 init_configuration,
1850 *init_inputs,
1851 use_auth_token=token,
1852 cache_dir=cache_dir,
1853 local_files_only=local_files_only,
1854 _commit_hash=commit_hash,
1855 _is_local=is_local,
1856 **kwargs,
1857 )
File ~/.local/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:2009, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2007 # Instantiate tokenizer.
2008 try:
-> 2009 tokenizer = cls(*init_inputs, **init_kwargs)
2010 except OSError:
2011 raise OSError(
2012 "Unable to load vocabulary from file. "
2013 "Please check that the provided vocabulary is accessible and not corrupted."
2014 )
File ~/.local/lib/python3.9/site-packages/transformers/models/llama/tokenization_llama_fast.py:100, in LlamaTokenizerFast.init(self, vocab_file, tokenizer_file, clean_up_tokenization_spaces, unk_token, bos_token, eos_token, add_bos_token, add_eos_token, **kwargs)
88 def init(
89 self,
90 vocab_file=None,
(...)
98 **kwargs,
99 ):
--> 100 super().init(
101 vocab_file=vocab_file,
102 tokenizer_file=tokenizer_file,
103 clean_up_tokenization_spaces=clean_up_tokenization_spaces,
104 unk_token=unk_token,
105 bos_token=bos_token,
106 eos_token=eos_token,
107 **kwargs,
108 )
109 self._add_bos_token = add_bos_token
110 self._add_eos_token = add_eos_token
File ~/.local/lib/python3.9/site-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
Exception: data did not match any variant of untagged enum PyNormalizerTypeWrapper at line 58 column 3
Hi @handing2412
Thanks for the issue, can you try to re-run your snippet on the latest transformers and tokenizers versions?
pip install -U transformers tokenizers