Spaces:

Sambhavnoobcoder
/

quantization-mvp

Sleeping

Sambhavnoobcoder commited on Jan 10

Commit

c5dc4f2

1 Parent(s): c4018e3

Deploy Auto-Quantization MVP

Files changed (1) hide show

quantizer.py CHANGED Viewed

@@ -45,6 +45,11 @@ async def quantize_model(job: Dict) -> Dict:
         print(f"📋 Step 1/5: Validating model...")
         api = HfApi(token=HF_TOKEN)
         try:
             model_info = api.model_info(model_id)
             print(f"✓ Model found: {model_id}")
@@ -143,7 +148,13 @@ async def quantize_model(job: Dict) -> Dict:
         if not HF_TOKEN:
             raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
-        output_repo = f"{model_id}-Quanto-int8"
         try:
             # Create repo

         print(f"📋 Step 1/5: Validating model...")
         api = HfApi(token=HF_TOKEN)
+        # Check if model is already quantized
+        quantization_suffixes = ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF", "-quantized"]
+        if any(model_id.endswith(suffix) for suffix in quantization_suffixes):
+            raise Exception(f"Model appears to be already quantized: {model_id}. Skipping re-quantization.")
         try:
             model_info = api.model_info(model_id)
             print(f"✓ Model found: {model_id}")
         if not HF_TOKEN:
             raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
+        # Strip any existing quantization suffix to avoid duplication
+        base_model_id = model_id
+        for suffix in ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF"]:
+            if base_model_id.endswith(suffix):
+                base_model_id = base_model_id[:-len(suffix)]
+        output_repo = f"{base_model_id}-Quanto-int8"
         try:
             # Create repo