Spaces:
Running on Zero
Running on Zero
Mehdi commited on
Commit ·
56cd204
1
Parent(s): 12c4c0f
fix: skip bitsandbytes on HF Spaces (ZeroGPU), only use 4-bit locally
Browse files- model/llm.py +5 -1
model/llm.py
CHANGED
|
@@ -47,7 +47,11 @@ _preload_nvjitlink()
|
|
| 47 |
|
| 48 |
|
| 49 |
def _build_quantization_config(vram_gb: float):
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
try:
|
| 52 |
import bitsandbytes # noqa: F401
|
| 53 |
return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def _build_quantization_config(vram_gb: float):
|
| 50 |
+
# HF Spaces (ZeroGPU A10G = 24 GB): skip quantization, use bfloat16 directly
|
| 51 |
+
if os.environ.get("SPACE_ID") or os.environ.get("SPACE_AUTHOR_NAME"):
|
| 52 |
+
return None
|
| 53 |
+
# Locally: 4-bit when VRAM is detected and is < 17 GB
|
| 54 |
+
if 0 < vram_gb < 17:
|
| 55 |
try:
|
| 56 |
import bitsandbytes # noqa: F401
|
| 57 |
return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
|