Sambhavnoobcoder commited on
Commit
c5dc4f2
·
1 Parent(s): c4018e3

Deploy Auto-Quantization MVP

Browse files
Files changed (1) hide show
  1. quantizer.py +12 -1
quantizer.py CHANGED
@@ -45,6 +45,11 @@ async def quantize_model(job: Dict) -> Dict:
45
  print(f"📋 Step 1/5: Validating model...")
46
  api = HfApi(token=HF_TOKEN)
47
 
 
 
 
 
 
48
  try:
49
  model_info = api.model_info(model_id)
50
  print(f"✓ Model found: {model_id}")
@@ -143,7 +148,13 @@ async def quantize_model(job: Dict) -> Dict:
143
  if not HF_TOKEN:
144
  raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
145
 
146
- output_repo = f"{model_id}-Quanto-int8"
 
 
 
 
 
 
147
 
148
  try:
149
  # Create repo
 
45
  print(f"📋 Step 1/5: Validating model...")
46
  api = HfApi(token=HF_TOKEN)
47
 
48
+ # Check if model is already quantized
49
+ quantization_suffixes = ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF", "-quantized"]
50
+ if any(model_id.endswith(suffix) for suffix in quantization_suffixes):
51
+ raise Exception(f"Model appears to be already quantized: {model_id}. Skipping re-quantization.")
52
+
53
  try:
54
  model_info = api.model_info(model_id)
55
  print(f"✓ Model found: {model_id}")
 
148
  if not HF_TOKEN:
149
  raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
150
 
151
+ # Strip any existing quantization suffix to avoid duplication
152
+ base_model_id = model_id
153
+ for suffix in ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF"]:
154
+ if base_model_id.endswith(suffix):
155
+ base_model_id = base_model_id[:-len(suffix)]
156
+
157
+ output_repo = f"{base_model_id}-Quanto-int8"
158
 
159
  try:
160
  # Create repo