Spaces:

Saint5
/

multimodal_rag_system

Sleeping

multimodal_rag_system / model_setup.py

device map fix

bff5090 verified 5 months ago

868 Bytes

	"""loading the models to be used by the Mulltimodal RAG system."""

	import torch
	import gc

	from sentence_transformers import SentenceTransformer
	from transformers import AutoProcessor, Gemma3ForConditionalGeneration
	# from accelerate import disk_offload
	from utils import clear_gpu_cache

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Embedding model
	embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# Gemma3 quantization config
	model_name = "google/gemma-3-4b-it"

	# Load Gemma3
	model = Gemma3ForConditionalGeneration.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="cpu", # Avoid meta errors
	)
	# disk_offload(model=model, offload_dir="offload")
	model.to("cpu")
	model.eval()

	# Processor
	processor = AutoProcessor.from_pretrained(model_name, use_fast=True)

	clear_gpu_cache()
	gc.collect()