Spaces:
Sleeping
Sleeping
| """loading the models to be used by the Mulltimodal RAG system.""" | |
| import torch | |
| import gc | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoProcessor, Gemma3ForConditionalGeneration | |
| # from accelerate import disk_offload | |
| from utils import clear_gpu_cache | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Embedding model | |
| embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Gemma3 quantization config | |
| model_name = "google/gemma-3-4b-it" | |
| # Load Gemma3 | |
| model = Gemma3ForConditionalGeneration.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.bfloat16, | |
| device_map="cpu", # Avoid meta errors | |
| ) | |
| # disk_offload(model=model, offload_dir="offload") | |
| model.to("cpu") | |
| model.eval() | |
| # Processor | |
| processor = AutoProcessor.from_pretrained(model_name, use_fast=True) | |
| clear_gpu_cache() | |
| gc.collect() | |