Spaces:
Runtime error
Runtime error
File size: 1,503 Bytes
a558a96 d06533b a72f088 d06533b a72f088 a558a96 d06533b a72f088 a558a96 d06533b a72f088 a558a96 a72f088 d06533b a72f088 d06533b a72f088 a558a96 a72f088 a558a96 a72f088 a558a96 a72f088 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
import torch
from typing import Optional
def load_llava_model():
"""Load LLaVA model with 4-bit quantization for HF Spaces"""
model_id = "llava-hf/llava-1.5-7b-hf"
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
return pipeline(
"image-to-text",
model=model_id,
tokenizer=model_id,
device_map="auto",
model_kwargs={
"torch_dtype": torch.float16,
"quantization_config": quant_config
}
)
def load_caption_model():
"""BLIP-2 with efficient loading"""
return pipeline(
"image-to-text",
model="Salesforce/blip2-opt-2.7b",
device_map="auto",
torch_dtype=torch.float16,
model_kwargs={"cache_dir": "/tmp/models"}
)
def load_retrieval_models():
"""Load encoders with shared weights"""
from sentence_transformers import SentenceTransformer
from transformers import AutoModel
models = {}
models['text_encoder'] = SentenceTransformer(
'sentence-transformers/all-MiniLM-L6-v2',
device="cuda" if torch.cuda.is_available() else "cpu"
)
models['image_encoder'] = AutoModel.from_pretrained(
"openai/clip-vit-base-patch32",
device_map="auto",
torch_dtype=torch.float16
)
return models |