MuhammadNoman7600 commited on
Commit
0a618c7
Β·
verified Β·
1 Parent(s): 9f45529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -30,7 +30,7 @@ from transformers import (
30
  )
31
 
32
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━ CONFIG ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
33
- BASE_MODEL_NAME = "unsloth/qwen2.5-0.5b-unsloth-bnb-4bit"
34
  ADAPTER_NAME = "MuhammadNoman7600/mermaid"
35
  DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
36
  HOST = "0.0.0.0"
@@ -168,9 +168,18 @@ def load_model():
168
  )
169
  else:
170
  print(" No CUDA β€” loading base model in float32 on CPU.")
 
 
 
 
 
 
 
171
  base = AutoModelForCausalLM.from_pretrained(
172
  BASE_MODEL_NAME,
173
- torch_dtype=torch.float32,
 
 
174
  device_map="cpu",
175
  trust_remote_code=True,
176
  )
 
30
  )
31
 
32
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━ CONFIG ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
33
+ BASE_MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" # CPU-safe (float32); unsloth 4-bit needs CUDA
34
  ADAPTER_NAME = "MuhammadNoman7600/mermaid"
35
  DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
36
  HOST = "0.0.0.0"
 
168
  )
169
  else:
170
  print(" No CUDA β€” loading base model in float32 on CPU.")
171
+ # unsloth/qwen2.5-0.5b-unsloth-bnb-4bit has a bnb-4bit quantization_config
172
+ # baked into its model config. On CPU we MUST strip it so that transformers
173
+ # does not attempt to invoke bitsandbytes (which requires CUDA).
174
+ from transformers import AutoConfig
175
+ cfg = AutoConfig.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
176
+ if hasattr(cfg, "quantization_config"):
177
+ del cfg.quantization_config
178
  base = AutoModelForCausalLM.from_pretrained(
179
  BASE_MODEL_NAME,
180
+ config=cfg,
181
+ quantization_config=None,
182
+ dtype=torch.float32,
183
  device_map="cpu",
184
  trust_remote_code=True,
185
  )