Saint5 commited on
Commit
bff5090
·
verified ·
1 Parent(s): 8b28692

device map fix

Browse files
Files changed (1) hide show
  1. model_setup.py +5 -14
model_setup.py CHANGED
@@ -4,8 +4,8 @@ import torch
4
  import gc
5
 
6
  from sentence_transformers import SentenceTransformer
7
- from transformers import AutoProcessor, Gemma3ForConditionalGeneration, BitsAndBytesConfig
8
- from accelerate import disk_offload
9
  from utils import clear_gpu_cache
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -15,24 +15,15 @@ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
15
 
16
  # Gemma3 quantization config
17
  model_name = "google/gemma-3-4b-it"
18
- bnb_config = BitsAndBytesConfig(
19
- load_in_4bit=True,
20
- bnb_4bit_compute_dtype=torch.bfloat16,
21
- bnb_4bit_use_double_quant=True,
22
- bnb_4bit_quant_type="nf4",
23
- # llm_int8_enable_fp32_cpu_offload=True # Allow offloading
24
- )
25
 
26
  # Load Gemma3
27
  model = Gemma3ForConditionalGeneration.from_pretrained(
28
  model_name,
29
  torch_dtype=torch.bfloat16,
30
- device_map="cpu", # not "auto" since there is no GPU
31
- quantization_config=bnb_config,
32
- low_cpu_mem_usage=True,
33
- # attn_implementation="sdpa"
34
  )
35
- disk_offload(model=model, offload_dir="offload")
 
36
  model.eval()
37
 
38
  # Processor
 
4
  import gc
5
 
6
  from sentence_transformers import SentenceTransformer
7
+ from transformers import AutoProcessor, Gemma3ForConditionalGeneration
8
+ # from accelerate import disk_offload
9
  from utils import clear_gpu_cache
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
15
 
16
  # Gemma3 quantization config
17
  model_name = "google/gemma-3-4b-it"
 
 
 
 
 
 
 
18
 
19
  # Load Gemma3
20
  model = Gemma3ForConditionalGeneration.from_pretrained(
21
  model_name,
22
  torch_dtype=torch.bfloat16,
23
+ device_map="cpu", # Avoid meta errors
 
 
 
24
  )
25
+ # disk_offload(model=model, offload_dir="offload")
26
+ model.to("cpu")
27
  model.eval()
28
 
29
  # Processor