TurkishCodeMan commited on
Commit
7c0c1c8
·
verified ·
1 Parent(s): 77d2f66

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. models/model_loader.py +13 -6
  2. requirements.txt +1 -2
models/model_loader.py CHANGED
@@ -7,19 +7,23 @@ def load_embed_model(model_path: str = "nvidia/llama-nemotron-embed-vl-1b-v2"):
7
  """Load embedding model (cached)."""
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
 
 
10
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
11
  config._attn_implementation = "sdpa"
12
  if hasattr(config, 'llm_config'):
13
  config.llm_config._attn_implementation = "sdpa"
14
 
 
15
  model = AutoModel.from_pretrained(
16
  model_path,
17
  config=config,
18
- torch_dtype=torch.bfloat16,
19
- device_map="auto",
20
  trust_remote_code=True,
21
- ).eval()
 
22
 
 
23
  return model, device
24
 
25
 
@@ -28,13 +32,15 @@ def load_rerank_model(model_path: str = "nvidia/llama-nemotron-rerank-vl-1b-v2")
28
  """Load reranking model (cached)."""
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
 
 
 
 
31
  model = AutoModelForSequenceClassification.from_pretrained(
32
  model_path,
33
- torch_dtype=torch.bfloat16,
34
  trust_remote_code=True,
35
  attn_implementation="eager",
36
- device_map="auto"
37
- ).eval()
38
 
39
  processor = AutoProcessor.from_pretrained(
40
  model_path,
@@ -44,4 +50,5 @@ def load_rerank_model(model_path: str = "nvidia/llama-nemotron-rerank-vl-1b-v2")
44
  rerank_max_length=2048
45
  )
46
 
 
47
  return model, processor, device
 
7
  """Load embedding model (cached)."""
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ print(f"🔄 Loading embedding model on {device}...")
11
+
12
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
13
  config._attn_implementation = "sdpa"
14
  if hasattr(config, 'llm_config'):
15
  config.llm_config._attn_implementation = "sdpa"
16
 
17
+ # ✅ FIX: Use manual device instead of device_map="auto"
18
  model = AutoModel.from_pretrained(
19
  model_path,
20
  config=config,
21
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
 
22
  trust_remote_code=True,
23
+ low_cpu_mem_usage=True, # ✅ CPU optimization
24
+ ).to(device).eval()
25
 
26
+ print(f"✅ Embedding model loaded on {device}")
27
  return model, device
28
 
29
 
 
32
  """Load reranking model (cached)."""
33
  device = "cuda" if torch.cuda.is_available() else "cpu"
34
 
35
+ print(f"🔄 Loading reranking model on {device}...")
36
+
37
+ # ✅ FIX: Use manual device instead of device_map="auto"
38
  model = AutoModelForSequenceClassification.from_pretrained(
39
  model_path,
40
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
41
  trust_remote_code=True,
42
  attn_implementation="eager",
43
+ ).to(device).eval()
 
44
 
45
  processor = AutoProcessor.from_pretrained(
46
  model_path,
 
50
  rerank_max_length=2048
51
  )
52
 
53
+ print(f"✅ Reranking model loaded on {device}")
54
  return model, processor, device
requirements.txt CHANGED
@@ -4,5 +4,4 @@ transformers>=4.35.0
4
  safetensors>=0.4.0
5
  Pillow>=10.0.0
6
  matplotlib>=3.7.0
7
- datasets>=2.14.0
8
- torchvision>=0.16.0
 
4
  safetensors>=0.4.0
5
  Pillow>=10.0.0
6
  matplotlib>=3.7.0
7
+ accelerate>=0.24.0