Spaces:
Runtime error
Runtime error
| from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig | |
| import torch | |
| from typing import Optional | |
| def load_llava_model(): | |
| """Load LLaVA model with 4-bit quantization for HF Spaces""" | |
| model_id = "llava-hf/llava-1.5-7b-hf" | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4" | |
| ) | |
| return pipeline( | |
| "image-to-text", | |
| model=model_id, | |
| tokenizer=model_id, | |
| device_map="auto", | |
| model_kwargs={ | |
| "torch_dtype": torch.float16, | |
| "quantization_config": quant_config | |
| } | |
| ) | |
| def load_caption_model(): | |
| """BLIP-2 with efficient loading""" | |
| return pipeline( | |
| "image-to-text", | |
| model="Salesforce/blip2-opt-2.7b", | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| model_kwargs={"cache_dir": "/tmp/models"} | |
| ) | |
| def load_retrieval_models(): | |
| """Load encoders with shared weights""" | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoModel | |
| models = {} | |
| models['text_encoder'] = SentenceTransformer( | |
| 'sentence-transformers/all-MiniLM-L6-v2', | |
| device="cuda" if torch.cuda.is_available() else "cpu" | |
| ) | |
| models['image_encoder'] = AutoModel.from_pretrained( | |
| "openai/clip-vit-base-patch32", | |
| device_map="auto", | |
| torch_dtype=torch.float16 | |
| ) | |
| return models |