Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,7 @@ from transformers import (
|
|
| 30 |
)
|
| 31 |
|
| 32 |
# ββββββββββββββββββββββββββ CONFIG ββββββββββββββββββββββββββββ
|
| 33 |
-
BASE_MODEL_NAME = "
|
| 34 |
ADAPTER_NAME = "MuhammadNoman7600/mermaid"
|
| 35 |
DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
|
| 36 |
HOST = "0.0.0.0"
|
|
@@ -168,9 +168,18 @@ def load_model():
|
|
| 168 |
)
|
| 169 |
else:
|
| 170 |
print(" No CUDA β loading base model in float32 on CPU.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
base = AutoModelForCausalLM.from_pretrained(
|
| 172 |
BASE_MODEL_NAME,
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
device_map="cpu",
|
| 175 |
trust_remote_code=True,
|
| 176 |
)
|
|
|
|
| 30 |
)
|
| 31 |
|
| 32 |
# ββββββββββββββββββββββββββ CONFIG ββββββββββββββββββββββββββββ
|
| 33 |
+
BASE_MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" # CPU-safe (float32); unsloth 4-bit needs CUDA
|
| 34 |
ADAPTER_NAME = "MuhammadNoman7600/mermaid"
|
| 35 |
DISPLAY_MODEL_NAME = "MuhammadNoman7600/mermaid"
|
| 36 |
HOST = "0.0.0.0"
|
|
|
|
| 168 |
)
|
| 169 |
else:
|
| 170 |
print(" No CUDA β loading base model in float32 on CPU.")
|
| 171 |
+
# unsloth/qwen2.5-0.5b-unsloth-bnb-4bit has a bnb-4bit quantization_config
|
| 172 |
+
# baked into its model config. On CPU we MUST strip it so that transformers
|
| 173 |
+
# does not attempt to invoke bitsandbytes (which requires CUDA).
|
| 174 |
+
from transformers import AutoConfig
|
| 175 |
+
cfg = AutoConfig.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
|
| 176 |
+
if hasattr(cfg, "quantization_config"):
|
| 177 |
+
del cfg.quantization_config
|
| 178 |
base = AutoModelForCausalLM.from_pretrained(
|
| 179 |
BASE_MODEL_NAME,
|
| 180 |
+
config=cfg,
|
| 181 |
+
quantization_config=None,
|
| 182 |
+
dtype=torch.float32,
|
| 183 |
device_map="cpu",
|
| 184 |
trust_remote_code=True,
|
| 185 |
)
|