MiniMax-M2.5-tiny / prune_metadata.json
morriszjm's picture
Initial upload: MiniMax-M2.5 dev rig — 8/62 layers + 32/256 experts/layer (combined layer + expert prune for single-A100 dev work)
4f04929 verified
{
"wallclock": 1778964219.3718772,
"src": "/scratch/MiniMax-M2.5",
"schema": "layer_and_expert_prune.v1",
"K_layers": 8,
"K_experts": 32,
"src_num_layers": 62,
"src_num_experts": 256,
"shards_in": 125,
"shards_out": 2,
"kept_keys": 1651,
"dropped_keys": 94452,
"total_kept_bytes": 6438923264,
"metadata_json_source": "/scratch/work/layer_prune_metadata_devrig.json",
"metadata_json_raw": {
"schema": "layer_and_expert_prune.v1",
"method": "frobenius_norm + first/last-K boundary preservation (ShortGPT-inspired magnitude heuristic, calibration-free)",
"src": "/scratch/MiniMax-M2.5",
"src_num_layers": 62,
"K_layers": 8,
"compression_pct": 87.09677419354838,
"boundary_keep": 2,
"layers_kept_old_indices": [
0,
1,
5,
6,
10,
13,
60,
61
],
"layers_dropped_old_indices": [
2,
3,
4,
7,
8,
9,
11,
12,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"old_to_new_layer_index": {
"0": 0,
"1": 1,
"5": 2,
"6": 3,
"10": 4,
"13": 5,
"60": 6,
"61": 7
},
"wallclock": 1778964011.358336,
"stats": {
"min_importance_kept": 1653015.7493949968,
"max_importance_dropped": 2134916.627236538
},
"K_experts": 32,
"src_num_experts": 256,
"per_old_layer_expert_keep": {
"0": [
12,
25,
35,
40,
42,
50,
68,
80,
87,
88,
91,
102,
110,
121,
127,
154,
171,
173,
181,
182,
194,
198,
199,
200,
221,
223,
226,
228,
229,
234,
248,
252
],
"1": [
6,
8,
9,
11,
14,
16,
17,
43,
48,
60,
66,
77,
79,
84,
96,
104,
108,
111,
114,
120,
130,
146,
148,
157,
165,
166,
180,
208,
236,
245,
249,
252
],
"5": [
7,
9,
12,
15,
28,
31,
33,
35,
37,
45,
47,
48,
58,
63,
71,
74,
95,
103,
110,
114,
115,
125,
147,
156,
158,
169,
192,
193,
198,
208,
216,
229
],
"6": [
20,
32,
34,
38,
51,
60,
83,
85,
98,
111,
116,
123,
124,
129,
135,
138,
141,
169,
178,
187,
191,
194,
198,
208,
214,
215,
217,
220,
222,
224,
238,
239
],
"10": [
8,
15,
27,
30,
34,
37,
39,
43,
48,
49,
52,
59,
64,
74,
75,
80,
96,
105,
114,
119,
122,
129,
130,
148,
173,
207,
215,
221,
229,
236,
251,
254
],
"13": [
4,
5,
12,
17,
23,
24,
46,
50,
51,
57,
62,
71,
103,
104,
120,
128,
137,
147,
150,
154,
173,
176,
181,
195,
196,
200,
205,
209,
232,
246,
248,
251
],
"60": [
2,
11,
12,
30,
44,
45,
47,
63,
65,
72,
80,
88,
99,
106,
109,
121,
127,
132,
145,
146,
173,
181,
199,
205,
209,
210,
216,
223,
226,
238,
241,
250
],
"61": [
0,
1,
13,
20,
35,
40,
44,
45,
53,
54,
78,
80,
87,
102,
105,
115,
117,
121,
130,
131,
162,
168,
169,
175,
183,
193,
200,
216,
218,
237,
238,
251
]
},
"expert_metadata_source": "/scratch/work/expert_metadata/prune_metadata_K32.json",
"expert_method": "magnitude/weight_scale_inv sum-of-squares",
"importance_source": "/scratch/work/layer_importance.json"
}
}