Apex-1-Instruct-350M / quantize_onnx.py
LH-Tech-AI's picture
Upload 3 files
55754f6 verified
raw
history blame contribute delete
427 Bytes
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
# Pfade
model_fp32 = "SmaLLMPro_350M.onnx"
model_int8 = "SmaLLMPro_350M_int8.onnx"
print(f"📦 Quantisiere {model_fp32} zu INT8...")
quantize_dynamic(
model_input=model_fp32,
model_output=model_int8,
weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision
)
print(f"✅ Fertig! Quantisiertes Modell: {model_int8}")