samuelt0207 commited on
Commit
bfc1601
·
verified ·
1 Parent(s): 63789ae

Upload quantized model

Browse files
config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
3
+ "dtype": "bfloat16",
4
+ "epochs_per_block": 15,
5
+ "lr_scale": 0.005,
6
+ "lr_clip": 0.05,
7
+ "lambda_min": 0.5,
8
+ "lambda_max": 1.0,
9
+ "use_std": true,
10
+ "attn_chunk_size": 1024,
11
+ "seed": 42,
12
+ "calibration": {
13
+ "model_id": "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
14
+ "height": 720,
15
+ "width": 1280,
16
+ "num_frames": 81,
17
+ "num_inference_steps": 50,
18
+ "num_selected": 40,
19
+ "seed": 42,
20
+ "dtype": "bfloat16",
21
+ "num_prompts_used": 16,
22
+ "total_candidates_t1": 416,
23
+ "total_candidates_t2": 1184,
24
+ "total_candidates": 1600,
25
+ "distribution": {
26
+ "num_selected": 40,
27
+ "unique_prompts": 16,
28
+ "prompts_coverage": 1.0,
29
+ "samples_per_prompt": {
30
+ "a captivating scene featuring a spiral galaxy shining brilliantly in the night sky": 2,
31
+ "an alley way in an old european city": 6,
32
+ "a large rhino grazing in the grass near a bush": 2,
33
+ "a very large waterfall in the middle of the day": 2,
34
+ "a man in a mexican outfit holding an acoustic guitar": 2,
35
+ "a person riding a dirt bike": 2,
36
+ "a large wave crashes into a lighthouse on a stormy day": 2,
37
+ "three geishas walking down the street in traditional clothing": 2,
38
+ "the town of hallstatt is surrounded by mountains and water": 2,
39
+ "two people in a canoe on a lake with mountains in the background": 2,
40
+ "A group of people in a yellow raft is rowing through turbulent waters": 2,
41
+ "a snow covered street": 2,
42
+ "two people performing a sword fight in front of a forest": 2,
43
+ "a man riding a mountain bike on top of a rocky hill": 3,
44
+ "a sailboat is drifting on the ocean": 1,
45
+ "two men are standing next to each other with a bicycle": 6
46
+ },
47
+ "unique_steps": 9,
48
+ "steps_coverage": 0.18,
49
+ "samples_per_step": {
50
+ "0": 31,
51
+ "24": 1,
52
+ "22": 1,
53
+ "20": 1,
54
+ "18": 1,
55
+ "72": 2,
56
+ "70": 1,
57
+ "68": 1,
58
+ "66": 1
59
+ },
60
+ "transformer_stage_counts": {
61
+ "1": 20,
62
+ "2": 20
63
+ },
64
+ "score_min": 5.4020860016944115e-05,
65
+ "score_max": 1.0,
66
+ "score_mean": 0.2315939660683576
67
+ },
68
+ "stage1_count": 20,
69
+ "stage2_count": 20,
70
+ "collection_method": "forward_hooks"
71
+ }
72
+ }
training_log.json ADDED
@@ -0,0 +1,885 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer": [],
3
+ "transformer_2": [
4
+ {
5
+ "block_idx": 0,
6
+ "initial_loss": 0.00016968250274658204,
7
+ "final_loss": 6.217360496520996e-05,
8
+ "loss_history": [
9
+ 0.00016968250274658204,
10
+ 7.883310317993164e-05,
11
+ 6.74605369567871e-05,
12
+ 6.38127326965332e-05,
13
+ 6.275177001953125e-05,
14
+ 6.236433982849121e-05,
15
+ 6.218552589416504e-05,
16
+ 6.211400032043456e-05,
17
+ 6.217360496520996e-05,
18
+ 6.217360496520996e-05,
19
+ 6.217360496520996e-05,
20
+ 6.217360496520996e-05,
21
+ 6.217360496520996e-05,
22
+ 6.217360496520996e-05,
23
+ 6.217360496520996e-05
24
+ ]
25
+ },
26
+ {
27
+ "block_idx": 1,
28
+ "initial_loss": 4.522204399108887e-05,
29
+ "final_loss": 2.709627151489258e-05,
30
+ "loss_history": [
31
+ 4.522204399108887e-05,
32
+ 3.0422210693359374e-05,
33
+ 2.8467178344726564e-05,
34
+ 2.7573108673095704e-05,
35
+ 2.7239322662353516e-05,
36
+ 2.7108192443847657e-05,
37
+ 2.7108192443847657e-05,
38
+ 2.709627151489258e-05,
39
+ 2.709627151489258e-05,
40
+ 2.709627151489258e-05,
41
+ 2.709627151489258e-05,
42
+ 2.709627151489258e-05,
43
+ 2.709627151489258e-05,
44
+ 2.709627151489258e-05,
45
+ 2.709627151489258e-05
46
+ ]
47
+ },
48
+ {
49
+ "block_idx": 2,
50
+ "initial_loss": 8.33749771118164e-05,
51
+ "final_loss": 4.69207763671875e-05,
52
+ "loss_history": [
53
+ 8.33749771118164e-05,
54
+ 5.266666412353516e-05,
55
+ 4.8995018005371094e-05,
56
+ 4.7779083251953126e-05,
57
+ 4.7326087951660156e-05,
58
+ 4.696846008300781e-05,
59
+ 4.69207763671875e-05,
60
+ 4.69207763671875e-05,
61
+ 4.69207763671875e-05,
62
+ 4.69207763671875e-05,
63
+ 4.69207763671875e-05,
64
+ 4.69207763671875e-05,
65
+ 4.69207763671875e-05,
66
+ 4.69207763671875e-05,
67
+ 4.69207763671875e-05
68
+ ]
69
+ },
70
+ {
71
+ "block_idx": 3,
72
+ "initial_loss": 0.0001368284225463867,
73
+ "final_loss": 6.513595581054687e-05,
74
+ "loss_history": [
75
+ 0.0001368284225463867,
76
+ 7.736682891845703e-05,
77
+ 6.905794143676757e-05,
78
+ 6.647109985351563e-05,
79
+ 6.554126739501953e-05,
80
+ 6.526708602905273e-05,
81
+ 6.517171859741211e-05,
82
+ 6.513595581054687e-05,
83
+ 6.513595581054687e-05,
84
+ 6.513595581054687e-05,
85
+ 6.513595581054687e-05,
86
+ 6.513595581054687e-05,
87
+ 6.513595581054687e-05,
88
+ 6.513595581054687e-05,
89
+ 6.513595581054687e-05
90
+ ]
91
+ },
92
+ {
93
+ "block_idx": 4,
94
+ "initial_loss": 0.00012941360473632811,
95
+ "final_loss": 7.268190383911133e-05,
96
+ "loss_history": [
97
+ 0.00012941360473632811,
98
+ 8.521080017089843e-05,
99
+ 7.581710815429688e-05,
100
+ 7.352828979492187e-05,
101
+ 7.29680061340332e-05,
102
+ 7.275342941284179e-05,
103
+ 7.268190383911133e-05,
104
+ 7.268190383911133e-05,
105
+ 7.268190383911133e-05,
106
+ 7.268190383911133e-05,
107
+ 7.268190383911133e-05,
108
+ 7.268190383911133e-05,
109
+ 7.268190383911133e-05,
110
+ 7.268190383911133e-05,
111
+ 7.268190383911133e-05
112
+ ]
113
+ },
114
+ {
115
+ "block_idx": 5,
116
+ "initial_loss": 0.0001756906509399414,
117
+ "final_loss": 9.338855743408203e-05,
118
+ "loss_history": [
119
+ 0.0001756906509399414,
120
+ 0.00010859966278076172,
121
+ 9.770393371582032e-05,
122
+ 9.465217590332031e-05,
123
+ 9.379386901855468e-05,
124
+ 9.343624114990234e-05,
125
+ 9.338855743408203e-05,
126
+ 9.338855743408203e-05,
127
+ 9.338855743408203e-05,
128
+ 9.338855743408203e-05,
129
+ 9.338855743408203e-05,
130
+ 9.338855743408203e-05,
131
+ 9.338855743408203e-05,
132
+ 9.338855743408203e-05,
133
+ 9.338855743408203e-05
134
+ ]
135
+ },
136
+ {
137
+ "block_idx": 6,
138
+ "initial_loss": 0.0002610445022583008,
139
+ "final_loss": 0.00013077259063720703,
140
+ "loss_history": [
141
+ 0.0002610445022583008,
142
+ 0.00015676021575927734,
143
+ 0.0001371622085571289,
144
+ 0.00013275146484375,
145
+ 0.00013129711151123048,
146
+ 0.0001308441162109375,
147
+ 0.00013077259063720703,
148
+ 0.00013077259063720703,
149
+ 0.00013077259063720703,
150
+ 0.00013077259063720703,
151
+ 0.00013077259063720703,
152
+ 0.00013077259063720703,
153
+ 0.00013077259063720703,
154
+ 0.00013077259063720703,
155
+ 0.00013077259063720703
156
+ ]
157
+ },
158
+ {
159
+ "block_idx": 7,
160
+ "initial_loss": 0.0005000591278076172,
161
+ "final_loss": 0.00024805068969726565,
162
+ "loss_history": [
163
+ 0.0005000591278076172,
164
+ 0.0002982854843139648,
165
+ 0.0002639532089233398,
166
+ 0.0002520561218261719,
167
+ 0.00024919509887695314,
168
+ 0.00024814605712890624,
169
+ 0.00024805068969726565,
170
+ 0.00024805068969726565,
171
+ 0.00024805068969726565,
172
+ 0.00024805068969726565,
173
+ 0.00024805068969726565,
174
+ 0.00024805068969726565,
175
+ 0.00024805068969726565,
176
+ 0.00024805068969726565,
177
+ 0.00024805068969726565
178
+ ]
179
+ },
180
+ {
181
+ "block_idx": 8,
182
+ "initial_loss": 0.0005391120910644531,
183
+ "final_loss": 0.0002680063247680664,
184
+ "loss_history": [
185
+ 0.0005391120910644531,
186
+ 0.0003233194351196289,
187
+ 0.0002843379974365234,
188
+ 0.00027234554290771483,
189
+ 0.00026962757110595705,
190
+ 0.0002681970596313477,
191
+ 0.0002681970596313477,
192
+ 0.0002680063247680664,
193
+ 0.0002680063247680664,
194
+ 0.0002680063247680664,
195
+ 0.0002680063247680664,
196
+ 0.0002680063247680664,
197
+ 0.0002680063247680664,
198
+ 0.0002680063247680664,
199
+ 0.0002680063247680664
200
+ ]
201
+ },
202
+ {
203
+ "block_idx": 9,
204
+ "initial_loss": 0.0009508132934570312,
205
+ "final_loss": 0.0004020214080810547,
206
+ "loss_history": [
207
+ 0.0009508132934570312,
208
+ 0.0005316257476806641,
209
+ 0.00044226646423339844,
210
+ 0.0004161357879638672,
211
+ 0.00040678977966308595,
212
+ 0.00040373802185058596,
213
+ 0.0004023075103759766,
214
+ 0.0004023075103759766,
215
+ 0.0004020214080810547,
216
+ 0.0004021167755126953,
217
+ 0.0004020214080810547,
218
+ 0.0004020214080810547,
219
+ 0.0004020214080810547,
220
+ 0.0004020214080810547,
221
+ 0.0004020214080810547
222
+ ]
223
+ },
224
+ {
225
+ "block_idx": 10,
226
+ "initial_loss": 0.0011842727661132812,
227
+ "final_loss": 0.0005385875701904297,
228
+ "loss_history": [
229
+ 0.0011842727661132812,
230
+ 0.0007007598876953125,
231
+ 0.0005944252014160156,
232
+ 0.0005599498748779297,
233
+ 0.0005486488342285156,
234
+ 0.0005418300628662109,
235
+ 0.0005389690399169922,
236
+ 0.0005385875701904297,
237
+ 0.0005385875701904297,
238
+ 0.0005385875701904297,
239
+ 0.0005385875701904297,
240
+ 0.0005385875701904297,
241
+ 0.0005385875701904297,
242
+ 0.0005385875701904297,
243
+ 0.0005385875701904297
244
+ ]
245
+ },
246
+ {
247
+ "block_idx": 11,
248
+ "initial_loss": 0.0012407302856445312,
249
+ "final_loss": 0.000545501708984375,
250
+ "loss_history": [
251
+ 0.0012407302856445312,
252
+ 0.0006963729858398438,
253
+ 0.0005844593048095703,
254
+ 0.000556182861328125,
255
+ 0.0005496025085449218,
256
+ 0.0005462646484375,
257
+ 0.0005456924438476563,
258
+ 0.000545501708984375,
259
+ 0.000545501708984375,
260
+ 0.000545501708984375,
261
+ 0.000545501708984375,
262
+ 0.000545501708984375,
263
+ 0.000545501708984375,
264
+ 0.000545501708984375,
265
+ 0.000545501708984375
266
+ ]
267
+ },
268
+ {
269
+ "block_idx": 12,
270
+ "initial_loss": 0.0037998199462890626,
271
+ "final_loss": 0.0008504867553710937,
272
+ "loss_history": [
273
+ 0.0037998199462890626,
274
+ 0.001897430419921875,
275
+ 0.0011272430419921875,
276
+ 0.0008975028991699219,
277
+ 0.0008566856384277343,
278
+ 0.0008512496948242188,
279
+ 0.0008504867553710937,
280
+ 0.0008504867553710937,
281
+ 0.0008504867553710937,
282
+ 0.0008504867553710937,
283
+ 0.0008504867553710937,
284
+ 0.0008504867553710937,
285
+ 0.0008504867553710937,
286
+ 0.0008504867553710937,
287
+ 0.0008504867553710937
288
+ ]
289
+ },
290
+ {
291
+ "block_idx": 13,
292
+ "initial_loss": 0.007428932189941406,
293
+ "final_loss": 0.0014537811279296876,
294
+ "loss_history": [
295
+ 0.007428932189941406,
296
+ 0.00346221923828125,
297
+ 0.00222015380859375,
298
+ 0.0017164230346679687,
299
+ 0.0015130043029785156,
300
+ 0.0014655113220214844,
301
+ 0.0014542579650878907,
302
+ 0.0014537811279296876,
303
+ 0.0014537811279296876,
304
+ 0.0014537811279296876,
305
+ 0.0014537811279296876,
306
+ 0.0014537811279296876,
307
+ 0.0014537811279296876,
308
+ 0.0014537811279296876,
309
+ 0.0014537811279296876
310
+ ]
311
+ },
312
+ {
313
+ "block_idx": 14,
314
+ "initial_loss": 0.003780555725097656,
315
+ "final_loss": 0.0009485721588134765,
316
+ "loss_history": [
317
+ 0.003780555725097656,
318
+ 0.001812744140625,
319
+ 0.0012543678283691406,
320
+ 0.0010514259338378906,
321
+ 0.0009890079498291015,
322
+ 0.0009590625762939453,
323
+ 0.0009423732757568359,
324
+ 0.0009485721588134765,
325
+ 0.0009485721588134765,
326
+ 0.0009485721588134765,
327
+ 0.0009485721588134765,
328
+ 0.0009485721588134765,
329
+ 0.0009485721588134765,
330
+ 0.0009485721588134765,
331
+ 0.0009485721588134765
332
+ ]
333
+ },
334
+ {
335
+ "block_idx": 15,
336
+ "initial_loss": 0.004898500442504883,
337
+ "final_loss": 0.0010559558868408203,
338
+ "loss_history": [
339
+ 0.004898500442504883,
340
+ 0.0024252414703369142,
341
+ 0.0015625476837158204,
342
+ 0.0012410640716552734,
343
+ 0.0011109352111816407,
344
+ 0.0010650634765625,
345
+ 0.0010560989379882813,
346
+ 0.0010560035705566406,
347
+ 0.0010559558868408203,
348
+ 0.0010559558868408203,
349
+ 0.0010559558868408203,
350
+ 0.0010559558868408203,
351
+ 0.0010559558868408203,
352
+ 0.0010559558868408203,
353
+ 0.0010559558868408203
354
+ ]
355
+ },
356
+ {
357
+ "block_idx": 16,
358
+ "initial_loss": 0.593157958984375,
359
+ "final_loss": 0.0256744384765625,
360
+ "loss_history": [
361
+ 0.593157958984375,
362
+ 0.053570556640625,
363
+ 0.0313751220703125,
364
+ 0.0288818359375,
365
+ 0.02759246826171875,
366
+ 0.02664031982421875,
367
+ 0.0261138916015625,
368
+ 0.0258331298828125,
369
+ 0.0257080078125,
370
+ 0.025689697265625,
371
+ 0.0256744384765625,
372
+ 0.0256744384765625,
373
+ 0.0256744384765625,
374
+ 0.0256744384765625,
375
+ 0.0256744384765625
376
+ ]
377
+ },
378
+ {
379
+ "block_idx": 17,
380
+ "initial_loss": 0.0005576133728027344,
381
+ "final_loss": 0.00028228759765625,
382
+ "loss_history": [
383
+ 0.0005576133728027344,
384
+ 0.00034313201904296876,
385
+ 0.0003024101257324219,
386
+ 0.00028896331787109375,
387
+ 0.00028486251831054686,
388
+ 0.0002833366394042969,
389
+ 0.0002825736999511719,
390
+ 0.00028228759765625,
391
+ 0.00028228759765625,
392
+ 0.00028228759765625,
393
+ 0.00028228759765625,
394
+ 0.00028228759765625,
395
+ 0.00028228759765625,
396
+ 0.00028228759765625,
397
+ 0.00028228759765625
398
+ ]
399
+ },
400
+ {
401
+ "block_idx": 18,
402
+ "initial_loss": 0.0009852409362792968,
403
+ "final_loss": 0.00041103363037109375,
404
+ "loss_history": [
405
+ 0.0009852409362792968,
406
+ 0.0005949020385742187,
407
+ 0.000475311279296875,
408
+ 0.0004314422607421875,
409
+ 0.00041832923889160154,
410
+ 0.0004123687744140625,
411
+ 0.00041189193725585935,
412
+ 0.0004111289978027344,
413
+ 0.00041103363037109375,
414
+ 0.00041103363037109375,
415
+ 0.00041103363037109375,
416
+ 0.00041103363037109375,
417
+ 0.00041103363037109375,
418
+ 0.00041103363037109375,
419
+ 0.00041103363037109375
420
+ ]
421
+ },
422
+ {
423
+ "block_idx": 19,
424
+ "initial_loss": 0.007128143310546875,
425
+ "final_loss": 0.0023443222045898436,
426
+ "loss_history": [
427
+ 0.007128143310546875,
428
+ 0.004366302490234375,
429
+ 0.0033519744873046877,
430
+ 0.002814483642578125,
431
+ 0.002552223205566406,
432
+ 0.0024021148681640627,
433
+ 0.0023515701293945314,
434
+ 0.002345848083496094,
435
+ 0.0023443222045898436,
436
+ 0.0023443222045898436,
437
+ 0.0023443222045898436,
438
+ 0.0023443222045898436,
439
+ 0.0023443222045898436,
440
+ 0.0023443222045898436,
441
+ 0.0023443222045898436
442
+ ]
443
+ },
444
+ {
445
+ "block_idx": 20,
446
+ "initial_loss": 0.03213958740234375,
447
+ "final_loss": 0.005490875244140625,
448
+ "loss_history": [
449
+ 0.03213958740234375,
450
+ 0.006449127197265625,
451
+ 0.0060546875,
452
+ 0.005767059326171875,
453
+ 0.005628204345703125,
454
+ 0.00556182861328125,
455
+ 0.005522918701171875,
456
+ 0.005496978759765625,
457
+ 0.005495452880859375,
458
+ 0.005490875244140625,
459
+ 0.005490875244140625,
460
+ 0.005490875244140625,
461
+ 0.005490875244140625,
462
+ 0.005490875244140625,
463
+ 0.005490875244140625
464
+ ]
465
+ },
466
+ {
467
+ "block_idx": 21,
468
+ "initial_loss": 0.0010260581970214845,
469
+ "final_loss": 0.00032563209533691405,
470
+ "loss_history": [
471
+ 0.0010260581970214845,
472
+ 0.00041527748107910155,
473
+ 0.00034208297729492186,
474
+ 0.00033268928527832033,
475
+ 0.0003280162811279297,
476
+ 0.0003264904022216797,
477
+ 0.0003257274627685547,
478
+ 0.00032563209533691405,
479
+ 0.00032563209533691405,
480
+ 0.00032563209533691405,
481
+ 0.00032563209533691405,
482
+ 0.00032563209533691405,
483
+ 0.00032563209533691405,
484
+ 0.00032563209533691405,
485
+ 0.00032563209533691405
486
+ ]
487
+ },
488
+ {
489
+ "block_idx": 22,
490
+ "initial_loss": 0.0003208637237548828,
491
+ "final_loss": 0.00026960372924804686,
492
+ "loss_history": [
493
+ 0.0003208637237548828,
494
+ 0.00028743743896484373,
495
+ 0.0002779960632324219,
496
+ 0.0002737998962402344,
497
+ 0.0002718925476074219,
498
+ 0.0002713203430175781,
499
+ 0.0002704620361328125,
500
+ 0.00026988983154296875,
501
+ 0.00026960372924804686,
502
+ 0.00026960372924804686,
503
+ 0.00026960372924804686,
504
+ 0.00026960372924804686,
505
+ 0.00026960372924804686,
506
+ 0.00026960372924804686,
507
+ 0.00026960372924804686
508
+ ]
509
+ },
510
+ {
511
+ "block_idx": 23,
512
+ "initial_loss": 0.0004250526428222656,
513
+ "final_loss": 0.0003468036651611328,
514
+ "loss_history": [
515
+ 0.0004250526428222656,
516
+ 0.00036602020263671876,
517
+ 0.00035543441772460936,
518
+ 0.0003505706787109375,
519
+ 0.00034880638122558594,
520
+ 0.00034766197204589845,
521
+ 0.0003472805023193359,
522
+ 0.0003471851348876953,
523
+ 0.00034689903259277344,
524
+ 0.0003468036651611328,
525
+ 0.0003468036651611328,
526
+ 0.0003468036651611328,
527
+ 0.0003468036651611328,
528
+ 0.0003468036651611328,
529
+ 0.0003468036651611328
530
+ ]
531
+ },
532
+ {
533
+ "block_idx": 24,
534
+ "initial_loss": 0.014990234375,
535
+ "final_loss": 0.0033138275146484377,
536
+ "loss_history": [
537
+ 0.014990234375,
538
+ 0.006096649169921875,
539
+ 0.004583740234375,
540
+ 0.003978347778320313,
541
+ 0.0035839080810546875,
542
+ 0.0033908843994140624,
543
+ 0.0033336639404296874,
544
+ 0.0033153533935546876,
545
+ 0.0033138275146484377,
546
+ 0.0033138275146484377,
547
+ 0.0033138275146484377,
548
+ 0.0033138275146484377,
549
+ 0.0033138275146484377,
550
+ 0.0033138275146484377,
551
+ 0.0033138275146484377
552
+ ]
553
+ },
554
+ {
555
+ "block_idx": 25,
556
+ "initial_loss": 0.00021805763244628907,
557
+ "final_loss": 0.00019254684448242188,
558
+ "loss_history": [
559
+ 0.00021805763244628907,
560
+ 0.0001994609832763672,
561
+ 0.0001957416534423828,
562
+ 0.000194549560546875,
563
+ 0.00019321441650390624,
564
+ 0.00019254684448242188,
565
+ 0.00019254684448242188,
566
+ 0.00019254684448242188,
567
+ 0.00019254684448242188,
568
+ 0.00019254684448242188,
569
+ 0.00019254684448242188,
570
+ 0.00019254684448242188,
571
+ 0.00019254684448242188,
572
+ 0.00019254684448242188,
573
+ 0.00019254684448242188
574
+ ]
575
+ },
576
+ {
577
+ "block_idx": 26,
578
+ "initial_loss": 0.0005462169647216797,
579
+ "final_loss": 0.00026702880859375,
580
+ "loss_history": [
581
+ 0.0005462169647216797,
582
+ 0.0003365993499755859,
583
+ 0.0002868175506591797,
584
+ 0.000272369384765625,
585
+ 0.0002685070037841797,
586
+ 0.0002670764923095703,
587
+ 0.00026702880859375,
588
+ 0.00026702880859375,
589
+ 0.00026702880859375,
590
+ 0.00026702880859375,
591
+ 0.00026702880859375,
592
+ 0.00026702880859375,
593
+ 0.00026702880859375,
594
+ 0.00026702880859375,
595
+ 0.00026702880859375
596
+ ]
597
+ },
598
+ {
599
+ "block_idx": 27,
600
+ "initial_loss": 0.0007673263549804688,
601
+ "final_loss": 0.0002560615539550781,
602
+ "loss_history": [
603
+ 0.0007673263549804688,
604
+ 0.00043430328369140623,
605
+ 0.00033512115478515623,
606
+ 0.0002941131591796875,
607
+ 0.00027298927307128906,
608
+ 0.0002627372741699219,
609
+ 0.00025811195373535154,
610
+ 0.0002574443817138672,
611
+ 0.0002564430236816406,
612
+ 0.0002560615539550781,
613
+ 0.0002560615539550781,
614
+ 0.0002560615539550781,
615
+ 0.0002560615539550781,
616
+ 0.0002560615539550781,
617
+ 0.0002560615539550781
618
+ ]
619
+ },
620
+ {
621
+ "block_idx": 28,
622
+ "initial_loss": 0.0002861499786376953,
623
+ "final_loss": 0.00020728111267089843,
624
+ "loss_history": [
625
+ 0.0002861499786376953,
626
+ 0.00023488998413085939,
627
+ 0.00021734237670898438,
628
+ 0.0002105236053466797,
629
+ 0.00020794868469238282,
630
+ 0.00020751953125,
631
+ 0.00020732879638671876,
632
+ 0.00020728111267089843,
633
+ 0.00020728111267089843,
634
+ 0.00020728111267089843,
635
+ 0.00020728111267089843,
636
+ 0.00020728111267089843,
637
+ 0.00020728111267089843,
638
+ 0.00020728111267089843,
639
+ 0.00020728111267089843
640
+ ]
641
+ },
642
+ {
643
+ "block_idx": 29,
644
+ "initial_loss": 0.00020117759704589843,
645
+ "final_loss": 0.00017614364624023436,
646
+ "loss_history": [
647
+ 0.00020117759704589843,
648
+ 0.00018296241760253906,
649
+ 0.00017867088317871093,
650
+ 0.0001769542694091797,
651
+ 0.00017638206481933593,
652
+ 0.0001762866973876953,
653
+ 0.00017614364624023436,
654
+ 0.00017614364624023436,
655
+ 0.00017614364624023436,
656
+ 0.00017614364624023436,
657
+ 0.00017614364624023436,
658
+ 0.00017614364624023436,
659
+ 0.00017614364624023436,
660
+ 0.00017614364624023436,
661
+ 0.00017614364624023436
662
+ ]
663
+ },
664
+ {
665
+ "block_idx": 30,
666
+ "initial_loss": 0.00020401477813720704,
667
+ "final_loss": 0.00012984275817871094,
668
+ "loss_history": [
669
+ 0.00020401477813720704,
670
+ 0.000142669677734375,
671
+ 0.0001349925994873047,
672
+ 0.0001321077346801758,
673
+ 0.00013036727905273436,
674
+ 0.00012993812561035156,
675
+ 0.00012979507446289062,
676
+ 0.00012984275817871094,
677
+ 0.00012984275817871094,
678
+ 0.00012984275817871094,
679
+ 0.00012984275817871094,
680
+ 0.00012984275817871094,
681
+ 0.00012984275817871094,
682
+ 0.00012984275817871094,
683
+ 0.00012984275817871094
684
+ ]
685
+ },
686
+ {
687
+ "block_idx": 31,
688
+ "initial_loss": 0.004956626892089843,
689
+ "final_loss": 0.0012099266052246094,
690
+ "loss_history": [
691
+ 0.004956626892089843,
692
+ 0.002651214599609375,
693
+ 0.0018392562866210937,
694
+ 0.0014147758483886719,
695
+ 0.0012822151184082031,
696
+ 0.0012298583984375,
697
+ 0.001214599609375,
698
+ 0.0012095451354980468,
699
+ 0.0012099266052246094,
700
+ 0.0012099266052246094,
701
+ 0.0012099266052246094,
702
+ 0.0012099266052246094,
703
+ 0.0012099266052246094,
704
+ 0.0012099266052246094,
705
+ 0.0012099266052246094
706
+ ]
707
+ },
708
+ {
709
+ "block_idx": 32,
710
+ "initial_loss": 0.0003849029541015625,
711
+ "final_loss": 0.00016980171203613282,
712
+ "loss_history": [
713
+ 0.0003849029541015625,
714
+ 0.0001994609832763672,
715
+ 0.0001784086227416992,
716
+ 0.00017294883728027343,
717
+ 0.00017170906066894532,
718
+ 0.00017042160034179686,
719
+ 0.00016989707946777344,
720
+ 0.00016980171203613282,
721
+ 0.00016980171203613282,
722
+ 0.00016980171203613282,
723
+ 0.00016980171203613282,
724
+ 0.00016980171203613282,
725
+ 0.00016980171203613282,
726
+ 0.00016980171203613282,
727
+ 0.00016980171203613282
728
+ ]
729
+ },
730
+ {
731
+ "block_idx": 33,
732
+ "initial_loss": 0.0009334564208984375,
733
+ "final_loss": 0.0005759716033935547,
734
+ "loss_history": [
735
+ 0.0009334564208984375,
736
+ 0.0007002830505371093,
737
+ 0.0006319999694824219,
738
+ 0.0006050586700439454,
739
+ 0.0005857467651367188,
740
+ 0.0005797386169433594,
741
+ 0.0005763053894042968,
742
+ 0.0005759716033935547,
743
+ 0.0005759716033935547,
744
+ 0.0005759716033935547,
745
+ 0.0005759716033935547,
746
+ 0.0005759716033935547,
747
+ 0.0005759716033935547,
748
+ 0.0005759716033935547,
749
+ 0.0005759716033935547
750
+ ]
751
+ },
752
+ {
753
+ "block_idx": 34,
754
+ "initial_loss": 0.007792282104492188,
755
+ "final_loss": 0.0014382362365722655,
756
+ "loss_history": [
757
+ 0.007792282104492188,
758
+ 0.0023235321044921876,
759
+ 0.00167999267578125,
760
+ 0.0015173912048339843,
761
+ 0.0014451026916503906,
762
+ 0.0014444351196289062,
763
+ 0.0014398574829101562,
764
+ 0.0014382362365722655,
765
+ 0.0014382362365722655,
766
+ 0.0014382362365722655,
767
+ 0.0014382362365722655,
768
+ 0.0014382362365722655,
769
+ 0.0014382362365722655,
770
+ 0.0014382362365722655,
771
+ 0.0014382362365722655
772
+ ]
773
+ },
774
+ {
775
+ "block_idx": 35,
776
+ "initial_loss": 0.00018830299377441405,
777
+ "final_loss": 0.00016567707061767578,
778
+ "loss_history": [
779
+ 0.00018830299377441405,
780
+ 0.00017404556274414062,
781
+ 0.00017006397247314452,
782
+ 0.00016727447509765626,
783
+ 0.00016624927520751952,
784
+ 0.00016596317291259766,
785
+ 0.0001657247543334961,
786
+ 0.00016574859619140626,
787
+ 0.00016567707061767578,
788
+ 0.00016567707061767578,
789
+ 0.00016567707061767578,
790
+ 0.00016567707061767578,
791
+ 0.00016567707061767578,
792
+ 0.00016567707061767578,
793
+ 0.00016567707061767578
794
+ ]
795
+ },
796
+ {
797
+ "block_idx": 36,
798
+ "initial_loss": 0.03123321533203125,
799
+ "final_loss": 0.00424652099609375,
800
+ "loss_history": [
801
+ 0.03123321533203125,
802
+ 0.007398223876953125,
803
+ 0.0052978515625,
804
+ 0.004742431640625,
805
+ 0.004537200927734375,
806
+ 0.004396820068359375,
807
+ 0.00431976318359375,
808
+ 0.00427703857421875,
809
+ 0.004259490966796875,
810
+ 0.004247283935546875,
811
+ 0.00424652099609375,
812
+ 0.00424652099609375,
813
+ 0.00424652099609375,
814
+ 0.00424652099609375,
815
+ 0.00424652099609375
816
+ ]
817
+ },
818
+ {
819
+ "block_idx": 37,
820
+ "initial_loss": 0.003260040283203125,
821
+ "final_loss": 0.0008220672607421875,
822
+ "loss_history": [
823
+ 0.003260040283203125,
824
+ 0.0015760421752929687,
825
+ 0.001082611083984375,
826
+ 0.0009029388427734375,
827
+ 0.0008604049682617188,
828
+ 0.0008335113525390625,
829
+ 0.0008257865905761719,
830
+ 0.00082244873046875,
831
+ 0.0008220672607421875,
832
+ 0.0008220672607421875,
833
+ 0.0008220672607421875,
834
+ 0.0008220672607421875,
835
+ 0.0008220672607421875,
836
+ 0.0008220672607421875,
837
+ 0.0008220672607421875
838
+ ]
839
+ },
840
+ {
841
+ "block_idx": 38,
842
+ "initial_loss": 0.0037703514099121094,
843
+ "final_loss": 0.0013566970825195312,
844
+ "loss_history": [
845
+ 0.0037703514099121094,
846
+ 0.0023791313171386717,
847
+ 0.0018778800964355468,
848
+ 0.0016117095947265625,
849
+ 0.0014848709106445312,
850
+ 0.0014117240905761718,
851
+ 0.0013762474060058593,
852
+ 0.0013663291931152344,
853
+ 0.0013625144958496094,
854
+ 0.0013580322265625,
855
+ 0.0013582229614257813,
856
+ 0.0013566970825195312,
857
+ 0.0013566970825195312,
858
+ 0.0013566970825195312,
859
+ 0.0013566970825195312
860
+ ]
861
+ },
862
+ {
863
+ "block_idx": 39,
864
+ "initial_loss": 0.008080291748046874,
865
+ "final_loss": 0.0030500411987304686,
866
+ "loss_history": [
867
+ 0.008080291748046874,
868
+ 0.003968238830566406,
869
+ 0.0035762786865234375,
870
+ 0.003378486633300781,
871
+ 0.003237152099609375,
872
+ 0.0031444549560546873,
873
+ 0.0030731201171875,
874
+ 0.00305328369140625,
875
+ 0.0030496597290039064,
876
+ 0.0030500411987304686,
877
+ 0.0030500411987304686,
878
+ 0.0030500411987304686,
879
+ 0.0030500411987304686,
880
+ 0.0030500411987304686,
881
+ 0.0030500411987304686
882
+ ]
883
+ }
884
+ ]
885
+ }
transformer_2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4398642dbd90dff7308311a594d4bb047816c9dace236616c906a28bf6df4992
3
+ size 41221753936
transformer_2/quant_params.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e033d4759e02757508e64ea5a9980d2d0880a85eda8055a6eb6a49fbdbe39b86
3
+ size 12585666845