TurkishCodeMan commited on
Commit
bffd49d
·
verified ·
1 Parent(s): 5f7597f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. models/model_loader.py +2 -3
models/model_loader.py CHANGED
@@ -10,9 +10,7 @@ def load_embed_model(model_path: str = "nvidia/llama-nemotron-embed-vl-1b-v2"):
10
  print(f"🔄 Loading embedding model on {device}...")
11
 
12
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
13
- config._attn_implementation = "sdpa"
14
- if hasattr(config, 'llm_config'):
15
- config.llm_config._attn_implementation = "sdpa"
16
 
17
  # ✅ FIX: Use manual device instead of device_map="auto"
18
  model = AutoModel.from_pretrained(
@@ -21,6 +19,7 @@ def load_embed_model(model_path: str = "nvidia/llama-nemotron-embed-vl-1b-v2"):
21
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
22
  trust_remote_code=True,
23
  low_cpu_mem_usage=True, # ✅ CPU optimization
 
24
  ).to(device).eval()
25
 
26
  print(f"✅ Embedding model loaded on {device}")
 
10
  print(f"🔄 Loading embedding model on {device}...")
11
 
12
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
13
+ # ✅ FIX: Removed SDPA config override which causes issues in HF Spaces
 
 
14
 
15
  # ✅ FIX: Use manual device instead of device_map="auto"
16
  model = AutoModel.from_pretrained(
 
19
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
20
  trust_remote_code=True,
21
  low_cpu_mem_usage=True, # ✅ CPU optimization
22
+ attn_implementation="eager", # ✅ FIX: Force eager execution
23
  ).to(device).eval()
24
 
25
  print(f"✅ Embedding model loaded on {device}")