| import onnx | |
| from onnxruntime.quantization import quantize_dynamic, QuantType | |
| # Pfade | |
| model_fp32 = "SmaLLMPro_350M.onnx" | |
| model_int8 = "SmaLLMPro_350M_int8.onnx" | |
| print(f"📦 Quantisiere {model_fp32} zu INT8...") | |
| quantize_dynamic( | |
| model_input=model_fp32, | |
| model_output=model_int8, | |
| weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision | |
| ) | |
| print(f"✅ Fertig! Quantisiertes Modell: {model_int8}") | |