Mehdi commited on
Commit
56cd204
·
1 Parent(s): 12c4c0f

fix: skip bitsandbytes on HF Spaces (ZeroGPU), only use 4-bit locally

Browse files
Files changed (1) hide show
  1. model/llm.py +5 -1
model/llm.py CHANGED
@@ -47,7 +47,11 @@ _preload_nvjitlink()
47
 
48
 
49
  def _build_quantization_config(vram_gb: float):
50
- if vram_gb < 17:
 
 
 
 
51
  try:
52
  import bitsandbytes # noqa: F401
53
  return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
 
47
 
48
 
49
  def _build_quantization_config(vram_gb: float):
50
+ # HF Spaces (ZeroGPU A10G = 24 GB): skip quantization, use bfloat16 directly
51
+ if os.environ.get("SPACE_ID") or os.environ.get("SPACE_AUTHOR_NAME"):
52
+ return None
53
+ # Locally: 4-bit when VRAM is detected and is < 17 GB
54
+ if 0 < vram_gb < 17:
55
  try:
56
  import bitsandbytes # noqa: F401
57
  return BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)