LH-Tech-AI commited on
Commit
55754f6
·
verified ·
1 Parent(s): c7e0fe8

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.json +15 -0
  2. export_onnx.py +52 -0
  3. quantize_onnx.py +16 -0
config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_layer": 24,
3
+ "n_head": 16,
4
+ "n_embd": 1024,
5
+ "block_size": 1024,
6
+ "bias": false,
7
+ "vocab_size": 50304,
8
+ "dropout": 0.0,
9
+ "model_type": "gpt2",
10
+ "architectures": [
11
+ "GPT"
12
+ ],
13
+ "tokenizer_class": "GPT2Tokenizer",
14
+ "model_name": "SmaLLMPro-350M"
15
+ }
export_onnx.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ import onnx # Wichtig für den Single-File Fix
4
+ from model import GPTConfig, GPT
5
+
6
+ # Pfade definieren
7
+ ckpt_path = '/media/leo/Data/checkpoints/350m_SmaLLMPro_Final/SmaLLMPro_Final.pt'
8
+ out_path_full = 'SmaLLMPro_350M.onnx'
9
+ device = 'cpu'
10
+
11
+ # 1. Checkpoint laden
12
+ print(f"Lade Checkpoint: {ckpt_path}")
13
+ checkpoint = torch.load(ckpt_path, map_location=device)
14
+ gptconf = GPTConfig(**checkpoint['model_args'])
15
+ model = GPT(gptconf)
16
+
17
+ state_dict = checkpoint['model']
18
+ unwanted_prefix = '_orig_mod.'
19
+ for k, v in list(state_dict.items()):
20
+ if k.startswith(unwanted_prefix):
21
+ state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
22
+
23
+ model.load_state_dict(state_dict)
24
+ model.eval()
25
+
26
+ # 2. Dummy-Input
27
+ x = torch.randint(0, gptconf.vocab_size, (1, gptconf.block_size), dtype=torch.long)
28
+
29
+ # 3. ONNX Export
30
+ print("Exportiere sauberes ONNX Modell (Opset 18)...")
31
+ torch.onnx.export(
32
+ model,
33
+ (x,),
34
+ out_path_full,
35
+ export_params=True,
36
+ opset_version=18,
37
+ do_constant_folding=True,
38
+ input_names=['input'],
39
+ output_names=['logits'],
40
+ # Wir lassen dynamic_axes hier weg, um den Exporter nicht zu zwingen,
41
+ # den Graph unnötig komplex zu machen, was oft zum Split führt.
42
+ )
43
+
44
+ # 4. Der Single-File Fix
45
+ print("Erzwinge Speicherung in einer einzelnen Datei...")
46
+ try:
47
+ model_proto = onnx.load(out_path_full)
48
+ # onnx.save ohne 'location' Parameter versucht alles in ein .onnx File zu schreiben
49
+ onnx.save(model_proto, "SmaLLMPro_350M_Final.onnx")
50
+ print("✅ Full Precision Modell erfolgreich als Einzeldokument gespeichert: SmaLLMPro_350M_Final.onnx")
51
+ except Exception as e:
52
+ print(f"⚠️ Hinweis: Single-File Save fehlgeschlagen (evtl. doch über 2GB?). Fehler: {e}")
quantize_onnx.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnx
2
+ from onnxruntime.quantization import quantize_dynamic, QuantType
3
+
4
+ # Pfade
5
+ model_fp32 = "SmaLLMPro_350M.onnx"
6
+ model_int8 = "SmaLLMPro_350M_int8.onnx"
7
+
8
+ print(f"📦 Quantisiere {model_fp32} zu INT8...")
9
+
10
+ quantize_dynamic(
11
+ model_input=model_fp32,
12
+ model_output=model_int8,
13
+ weight_type=QuantType.QInt8 # Empfohlen für beste Performance/Präzision
14
+ )
15
+
16
+ print(f"✅ Fertig! Quantisiertes Modell: {model_int8}")