Spaces:
Sleeping
Sleeping
Commit
·
c5dc4f2
1
Parent(s):
c4018e3
Deploy Auto-Quantization MVP
Browse files- quantizer.py +12 -1
quantizer.py
CHANGED
|
@@ -45,6 +45,11 @@ async def quantize_model(job: Dict) -> Dict:
|
|
| 45 |
print(f"📋 Step 1/5: Validating model...")
|
| 46 |
api = HfApi(token=HF_TOKEN)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
try:
|
| 49 |
model_info = api.model_info(model_id)
|
| 50 |
print(f"✓ Model found: {model_id}")
|
|
@@ -143,7 +148,13 @@ async def quantize_model(job: Dict) -> Dict:
|
|
| 143 |
if not HF_TOKEN:
|
| 144 |
raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
|
| 145 |
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
try:
|
| 149 |
# Create repo
|
|
|
|
| 45 |
print(f"📋 Step 1/5: Validating model...")
|
| 46 |
api = HfApi(token=HF_TOKEN)
|
| 47 |
|
| 48 |
+
# Check if model is already quantized
|
| 49 |
+
quantization_suffixes = ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF", "-quantized"]
|
| 50 |
+
if any(model_id.endswith(suffix) for suffix in quantization_suffixes):
|
| 51 |
+
raise Exception(f"Model appears to be already quantized: {model_id}. Skipping re-quantization.")
|
| 52 |
+
|
| 53 |
try:
|
| 54 |
model_info = api.model_info(model_id)
|
| 55 |
print(f"✓ Model found: {model_id}")
|
|
|
|
| 148 |
if not HF_TOKEN:
|
| 149 |
raise Exception("HF_TOKEN not set. Cannot upload to Hub.")
|
| 150 |
|
| 151 |
+
# Strip any existing quantization suffix to avoid duplication
|
| 152 |
+
base_model_id = model_id
|
| 153 |
+
for suffix in ["-Quanto-int8", "-Quanto-int4", "-GPTQ", "-AWQ", "-GGUF"]:
|
| 154 |
+
if base_model_id.endswith(suffix):
|
| 155 |
+
base_model_id = base_model_id[:-len(suffix)]
|
| 156 |
+
|
| 157 |
+
output_repo = f"{base_model_id}-Quanto-int8"
|
| 158 |
|
| 159 |
try:
|
| 160 |
# Create repo
|