Spaces:

build-small-hackathon
/

PaperProf

Running on Zero

Mehdi commited on 23 days ago

Commit

56cd204

1 Parent(s): 12c4c0f

fix: skip bitsandbytes on HF Spaces (ZeroGPU), only use 4-bit locally

Files changed (1) hide show

model/llm.py CHANGED Viewed

@@ -47,7 +47,11 @@ _preload_nvjitlink()
 def _build_quantization_config(vram_gb: float):
-    if vram_gb < 17:
         try:
             import bitsandbytes  # noqa: F401
             return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)

 def _build_quantization_config(vram_gb: float):
+    # HF Spaces (ZeroGPU A10G = 24 GB): skip quantization, use bfloat16 directly
+    if os.environ.get("SPACE_ID") or os.environ.get("SPACE_AUTHOR_NAME"):
+        return None
+    # Locally: 4-bit when VRAM is detected and is < 17 GB
+    if 0 < vram_gb < 17:
         try:
             import bitsandbytes  # noqa: F401
             return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)