Upload 3 files

Browse files

Files changed (3) hide show

config.json +15 -0
export_onnx.py +52 -0
quantize_onnx.py +16 -0

config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "vocab_size": 50304,
+    "dropout": 0.0,
+    "model_type": "gpt2",
+    "architectures": [
+        "GPT"
+    ],
+    "tokenizer_class": "GPT2Tokenizer",
+    "model_name": "SmaLLMPro-350M"
+}

export_onnx.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import os
+import onnx  # Wichtig für den Single-File Fix
+from model import GPTConfig, GPT
+# Pfade definieren
+ckpt_path = '/media/leo/Data/checkpoints/350m_SmaLLMPro_Final/SmaLLMPro_Final.pt'
+out_path_full = 'SmaLLMPro_350M.onnx'
+device = 'cpu'
+# 1. Checkpoint laden
+print(f"Lade Checkpoint: {ckpt_path}")
+checkpoint = torch.load(ckpt_path, map_location=device)
+gptconf = GPTConfig(**checkpoint['model_args'])
+model = GPT(gptconf)
+state_dict = checkpoint['model']
+unwanted_prefix = '_orig_mod.'
+for k, v in list(state_dict.items()):
+    if k.startswith(unwanted_prefix):
+        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
+model.load_state_dict(state_dict)
+model.eval()
+# 2. Dummy-Input
+x = torch.randint(0, gptconf.vocab_size, (1, gptconf.block_size), dtype=torch.long)
+# 3. ONNX Export
+print("Exportiere sauberes ONNX Modell (Opset 18)...")
+torch.onnx.export(
+    model,
+    (x,),
+    out_path_full,
+    export_params=True,
+    opset_version=18,
+    do_constant_folding=True,
+    input_names=['input'],
+    output_names=['logits'],
+    # Wir lassen dynamic_axes hier weg, um den Exporter nicht zu zwingen,
+    # den Graph unnötig komplex zu machen, was oft zum Split führt.
+)
+# 4. Der Single-File Fix
+print("Erzwinge Speicherung in einer einzelnen Datei...")
+try:
+    model_proto = onnx.load(out_path_full)
+    # onnx.save ohne 'location' Parameter versucht alles in ein .onnx File zu schreiben
+    onnx.save(model_proto, "SmaLLMPro_350M_Final.onnx")
+    print("✅ Full Precision Modell erfolgreich als Einzeldokument gespeichert: SmaLLMPro_350M_Final.onnx")
+except Exception as e:
+    print(f"⚠️ Hinweis: Single-File Save fehlgeschlagen (evtl. doch über 2GB?). Fehler: {e}")

quantize_onnx.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import onnx
+from onnxruntime.quantization import quantize_dynamic, QuantType
+# Pfade
+model_fp32 = "SmaLLMPro_350M.onnx"
+model_int8 = "SmaLLMPro_350M_int8.onnx"
+print(f"📦 Quantisiere {model_fp32} zu INT8...")
+quantize_dynamic(
+    model_input=model_fp32,
+    model_output=model_int8,
+    weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision
+)
+print(f"✅ Fertig! Quantisiertes Modell: {model_int8}")