import onnx from onnxruntime.quantization import quantize_dynamic, QuantType # Pfade model_fp32 = "SmaLLMPro_350M.onnx" model_int8 = "SmaLLMPro_350M_int8.onnx" print(f"📦 Quantisiere {model_fp32} zu INT8...") quantize_dynamic( model_input=model_fp32, model_output=model_int8, weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision ) print(f"✅ Fertig! Quantisiertes Modell: {model_int8}")