Spaces:

MuhammadNoman7600
/

model

Sleeping

MuhammadNoman7600 commited on 6 days ago

Commit

0a618c7

verified ·

1 Parent(s): 9f45529

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ from transformers import (
 )
 # ━━━━━━━━━━━━━━━━━━━━━━━━━━ CONFIG ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-BASE_MODEL_NAME    = "unsloth/qwen2.5-0.5b-unsloth-bnb-4bit"
 ADAPTER_NAME       = "MuhammadNoman7600/mermaid"
 DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
 HOST               = "0.0.0.0"
@@ -168,9 +168,18 @@ def load_model():
         )
     else:
         print("    No CUDA — loading base model in float32 on CPU.")
         base = AutoModelForCausalLM.from_pretrained(
             BASE_MODEL_NAME,
-            torch_dtype=torch.float32,
             device_map="cpu",
             trust_remote_code=True,
         )

 )
 # ━━━━━━━━━━━━━━━━━━━━━━━━━━ CONFIG ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+BASE_MODEL_NAME    = "Qwen/Qwen2.5-0.5B-Instruct"   # CPU-safe (float32); unsloth 4-bit needs CUDA
 ADAPTER_NAME       = "MuhammadNoman7600/mermaid"
 DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
 HOST               = "0.0.0.0"
         )
     else:
         print("    No CUDA — loading base model in float32 on CPU.")
+        # unsloth/qwen2.5-0.5b-unsloth-bnb-4bit has a bnb-4bit quantization_config
+        # baked into its model config. On CPU we MUST strip it so that transformers
+        # does not attempt to invoke bitsandbytes (which requires CUDA).
+        from transformers import AutoConfig
+        cfg = AutoConfig.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
+        if hasattr(cfg, "quantization_config"):
+            del cfg.quantization_config
         base = AutoModelForCausalLM.from_pretrained(
             BASE_MODEL_NAME,
+            config=cfg,
+            quantization_config=None,
+            dtype=torch.float32,
             device_map="cpu",
             trust_remote_code=True,
         )