Spaces:

TurkishCodeMan
/

multimodal-recipe-rag

Runtime error

TurkishCodeMan commited on 18 days ago

Commit

7c0c1c8

verified ·

1 Parent(s): 77d2f66

Upload folder using huggingface_hub

Files changed (2) hide show

models/model_loader.py CHANGED Viewed

@@ -7,19 +7,23 @@ def load_embed_model(model_path: str = "nvidia/llama-nemotron-embed-vl-1b-v2"):
     """Load embedding model (cached)."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
     config._attn_implementation = "sdpa"
     if hasattr(config, 'llm_config'):
         config.llm_config._attn_implementation = "sdpa"
     model = AutoModel.from_pretrained(
         model_path,
         config=config,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
         trust_remote_code=True,
-    ).eval()
     return model, device
@@ -28,13 +32,15 @@ def load_rerank_model(model_path: str = "nvidia/llama-nemotron-rerank-vl-1b-v2")
     """Load reranking model (cached)."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForSequenceClassification.from_pretrained(
         model_path,
-        torch_dtype=torch.bfloat16,
         trust_remote_code=True,
         attn_implementation="eager",
-        device_map="auto"
-    ).eval()
     processor = AutoProcessor.from_pretrained(
         model_path,
@@ -44,4 +50,5 @@ def load_rerank_model(model_path: str = "nvidia/llama-nemotron-rerank-vl-1b-v2")
         rerank_max_length=2048
     )
     return model, processor, device

     """Load embedding model (cached)."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"🔄 Loading embedding model on {device}...")
     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
     config._attn_implementation = "sdpa"
     if hasattr(config, 'llm_config'):
         config.llm_config._attn_implementation = "sdpa"
+    # ✅ FIX: Use manual device instead of device_map="auto"
     model = AutoModel.from_pretrained(
         model_path,
         config=config,
+        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         trust_remote_code=True,
+        low_cpu_mem_usage=True,  # ✅ CPU optimization
+    ).to(device).eval()
+    print(f"✅ Embedding model loaded on {device}")
     return model, device
     """Load reranking model (cached)."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"🔄 Loading reranking model on {device}...")
+    # ✅ FIX: Use manual device instead of device_map="auto"
     model = AutoModelForSequenceClassification.from_pretrained(
         model_path,
+        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         trust_remote_code=True,
         attn_implementation="eager",
+    ).to(device).eval()
     processor = AutoProcessor.from_pretrained(
         model_path,
         rerank_max_length=2048
     )
+    print(f"✅ Reranking model loaded on {device}")
     return model, processor, device

requirements.txt CHANGED Viewed

@@ -4,5 +4,4 @@ transformers>=4.35.0
 safetensors>=0.4.0
 Pillow>=10.0.0
 matplotlib>=3.7.0
-datasets>=2.14.0
-torchvision>=0.16.0

 safetensors>=0.4.0
 Pillow>=10.0.0
 matplotlib>=3.7.0
+accelerate>=0.24.0