Spaces:

silveroxides
/

convert_to_quant

Build error

silveroxides commited on Jan 14

Commit

28030ea

1 Parent(s): aee2946

fix: Use GPU for MXFP8/NVFP4 formats, CPU for others

- gpu_quantize() with 600s duration for CUDA-requiring formats
- cpu_quantize() for FP8 tensorwise/block and INT8

Files changed (1) hide show

app.py CHANGED Viewed

@@ -147,11 +147,15 @@ def upload_model_as_pr(
     except Exception as e:
         return f"❌ Upload failed: {str(e)}"
-@spaces.GPU(duration=30)
-def gpu_check():
-    """Minimal GPU function to satisfy ZeroGPU space requirements."""
-    import torch
-    return torch.cuda.is_available()
 def quantize_model(
@@ -230,7 +234,13 @@ def quantize_model(
     )
     try:
-        result = convert(config)
         if not result.success:
             status_log.append(f"❌ Quantization failed: {result.error}")
@@ -254,9 +264,6 @@ def quantize_model(
             )
             status_log.append(upload_status)
-        # Brief GPU check to satisfy ZeroGPU requirements
-        gpu_check()
         return result.output_path, "\n\n".join(status_log)
     except Exception as e:

     except Exception as e:
         return f"❌ Upload failed: {str(e)}"
+@spaces.GPU(duration=600)
+def gpu_quantize(config):
+    """Run quantization on GPU for formats that require CUDA (MXFP8, NVFP4)."""
+    return convert(config)
+def cpu_quantize(config):
+    """Run quantization on CPU for formats that don't require CUDA."""
+    return convert(config)
 def quantize_model(
     )
     try:
+        # Use GPU for formats that require CUDA, CPU for others
+        requires_gpu = format_config["format"] in ("mxfp8", "nvfp4")
+        if requires_gpu:
+            status_log.append("🖥️ Using GPU for quantization...")
+            result = gpu_quantize(config)
+        else:
+            result = cpu_quantize(config)
         if not result.success:
             status_log.append(f"❌ Quantization failed: {result.error}")
             )
             status_log.append(upload_status)
         return result.output_path, "\n\n".join(status_log)
     except Exception as e: