Spaces:
Running
Running
| """ | |
| Quantize the sentiment model to INT8 using ONNX Runtime dynamic quantization. | |
| Usage: | |
| python scripts/quantize_model.py | |
| Output: | |
| backend/models/quantized/ — contains model_quantized.onnx + tokenizer files | |
| """ | |
| import shutil | |
| import tempfile | |
| from pathlib import Path | |
| from optimum.onnxruntime import ORTModelForSequenceClassification, ORTQuantizer | |
| from optimum.onnxruntime.configuration import AutoQuantizationConfig | |
| from transformers import AutoTokenizer | |
| MODEL_ID = "uer/roberta-base-finetuned-jd-binary-chinese" | |
| OUTPUT_DIR = Path(__file__).resolve().parent.parent / "backend" / "models" / "quantized" | |
| def main() -> None: | |
| print(f"[1/4] Exporting {MODEL_ID} to ONNX...") | |
| tmp_dir = tempfile.mkdtemp(prefix="onnx_export_") | |
| try: | |
| model = ORTModelForSequenceClassification.from_pretrained( | |
| MODEL_ID, export=True | |
| ) | |
| model.save_pretrained(tmp_dir) | |
| print("[2/4] Applying INT8 dynamic quantization (AVX2)...") | |
| quantizer = ORTQuantizer.from_pretrained(tmp_dir) | |
| qconfig = AutoQuantizationConfig.avx2(is_static=False) | |
| OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| quantizer.quantize(save_dir=OUTPUT_DIR, quantization_config=qconfig) | |
| print("[3/4] Copying tokenizer files...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| tokenizer.save_pretrained(str(OUTPUT_DIR)) | |
| print(f"[4/4] Done! Quantized model saved to: {OUTPUT_DIR}") | |
| print("Files:") | |
| for f in sorted(OUTPUT_DIR.iterdir()): | |
| size_mb = f.stat().st_size / (1024 * 1024) | |
| print(f" {f.name} ({size_mb:.1f} MB)") | |
| finally: | |
| shutil.rmtree(tmp_dir, ignore_errors=True) | |
| if __name__ == "__main__": | |
| main() | |