Spaces:

serenichron
/

opencode-zerogpu

Sleeping

serenichron commited on 20 days ago

Commit

6d6c01e

1 Parent(s): 22dca62

Fix device handling: check GPU availability before device_map

Files changed (1) hide show

models.py CHANGED Viewed

@@ -127,13 +127,20 @@ def load_model(
     model_kwargs = {
         "token": config.hf_token,
         "trust_remote_code": True,
-        "device_map": "auto",
     }
-    if quant_config is not None:
-        model_kwargs["quantization_config"] = quant_config
     else:
-        model_kwargs["torch_dtype"] = torch.bfloat16
     model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)

     model_kwargs = {
         "token": config.hf_token,
         "trust_remote_code": True,
     }
+    # On ZeroGPU, use device_map only when GPU is available
+    # Otherwise load to CPU for local testing
+    if torch.cuda.is_available():
+        model_kwargs["device_map"] = "auto"
+        if quant_config is not None:
+            model_kwargs["quantization_config"] = quant_config
+        else:
+            model_kwargs["torch_dtype"] = torch.bfloat16
     else:
+        # CPU mode - no quantization, float32
+        model_kwargs["device_map"] = "cpu"
+        model_kwargs["torch_dtype"] = torch.float32
     model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)