silveroxides commited on
Commit
4127859
·
1 Parent(s): aaee40e

perf: Run quantization on CPU, remove ZeroGPU timeout issue

Browse files

- Remove @spaces.GPU decorator - simple mode is pure tensor math
- Remove low_memory flag - 192GB RAM available
- Remove unused spaces import

Files changed (1) hide show
  1. app.py +0 -4
app.py CHANGED
@@ -7,7 +7,6 @@ FP8/INT8 formats for ComfyUI inference, with HuggingFace Hub integration.
7
  import os
8
  import tempfile
9
  import gradio as gr
10
- import spaces # ZeroGPU
11
  from huggingface_hub import hf_hub_download, HfApi, create_commit, CommitOperationAdd
12
 
13
  from convert_to_quant import convert, ConversionConfig
@@ -147,8 +146,6 @@ def upload_model_as_pr(
147
  except Exception as e:
148
  return f"❌ Upload failed: {str(e)}"
149
 
150
-
151
- @spaces.GPU
152
  def quantize_model(
153
  source_repo: str,
154
  file_path: str,
@@ -217,7 +214,6 @@ def quantize_model(
217
  save_quant_metadata=True,
218
  simple=True,
219
  verbose="VERBOSE",
220
- low_memory=True,
221
  scaling_mode=format_config.get("scaling_mode") or "tensor",
222
  block_size=format_config.get("block_size"),
223
  filter_flags=filter_flags,
 
7
  import os
8
  import tempfile
9
  import gradio as gr
 
10
  from huggingface_hub import hf_hub_download, HfApi, create_commit, CommitOperationAdd
11
 
12
  from convert_to_quant import convert, ConversionConfig
 
146
  except Exception as e:
147
  return f"❌ Upload failed: {str(e)}"
148
 
 
 
149
  def quantize_model(
150
  source_repo: str,
151
  file_path: str,
 
214
  save_quant_metadata=True,
215
  simple=True,
216
  verbose="VERBOSE",
 
217
  scaling_mode=format_config.get("scaling_mode") or "tensor",
218
  block_size=format_config.get("block_size"),
219
  filter_flags=filter_flags,