File size: 427 Bytes
55754f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

# Pfade
model_fp32 = "SmaLLMPro_350M.onnx"
model_int8 = "SmaLLMPro_350M_int8.onnx"

print(f"📦 Quantisiere {model_fp32} zu INT8...")

quantize_dynamic(
    model_input=model_fp32,
    model_output=model_int8,
    weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision
)

print(f"✅ Fertig! Quantisiertes Modell: {model_int8}")