cyzero-kim commited on
Commit
6b9f115
·
verified ·
1 Parent(s): 17bf541

Upload folder using huggingface_hub

Browse files
act_dict.json CHANGED
@@ -1,2290 +1,3 @@
1
- {
2
- "model.layers.0.input_layernorm": {
3
- "input": [
4
- -0.22867023944854736,
5
- 1.1115107536315918
6
- ],
7
- "output": [
8
- -2.1261632442474365,
9
- 3.831766366958618
10
- ]
11
- },
12
- "model.layers.0.mlp.act_fn": {
13
- "input2": [
14
- 2.046371889719012e-07,
15
- 0.7579811811447144
16
- ],
17
- "output": [
18
- -0.2726189196109772,
19
- 1.9698854684829712
20
- ]
21
- },
22
- "model.layers.0.mlp.w1": {
23
- "output": [
24
- -1.3973281383514404,
25
- 1.1478052139282227
26
- ]
27
- },
28
- "model.layers.0.mlp.w2": {
29
- "input": [
30
- -0.19404251873493195,
31
- 0.20824076235294342
32
- ],
33
- "output": [
34
- -0.6336596012115479,
35
- 0.544675350189209
36
- ]
37
- },
38
- "model.layers.0.mlp.w3": {
39
- "output": [
40
- -0.6615440845489502,
41
- 0.7680639028549194
42
- ]
43
- },
44
- "model.layers.0.post_attention_layernorm": {
45
- "input": [
46
- -0.5048871040344238,
47
- 0.5236737728118896
48
- ],
49
- "output": [
50
- -0.925523042678833,
51
- 0.7021209001541138
52
- ]
53
- },
54
- "model.layers.0.self_attn.k_proj": {
55
- "output": [
56
- -16.452835083007812,
57
- 6.599212646484375
58
- ]
59
- },
60
- "model.layers.0.self_attn.o_proj": {
61
- "output": [
62
- -0.5184494256973267,
63
- 0.46981656551361084
64
- ]
65
- },
66
- "model.layers.0.self_attn.pv_bmm": {
67
- "input": [
68
- -5.756418258728857e-11,
69
- 4.316623210906982
70
- ],
71
- "input2": [
72
- -0.23561592400074005,
73
- 0.33119601011276245
74
- ],
75
- "output": [
76
- -0.21975915133953094,
77
- 0.3089066743850708
78
- ]
79
- },
80
- "model.layers.0.self_attn.q_proj": {
81
- "output": [
82
- -9.491658210754395,
83
- 14.472428321838379
84
- ]
85
- },
86
- "model.layers.0.self_attn.qk_bmm": {
87
- "input": [
88
- -15.129252433776855,
89
- 15.248381614685059
90
- ],
91
- "input2": [
92
- -16.292478561401367,
93
- 16.165193557739258
94
- ],
95
- "output": [
96
- -350.88092041015625,
97
- 296.008544921875
98
- ]
99
- },
100
- "model.layers.0.self_attn.v_proj": {
101
- "output": [
102
- -0.23130042850971222,
103
- 0.3251298666000366
104
- ]
105
- },
106
- "model.layers.1.input_layernorm": {
107
- "input": [
108
- -0.6882500052452087,
109
- 0.5670577883720398
110
- ],
111
- "output": [
112
- -1.9846813678741455,
113
- 1.2388458251953125
114
- ]
115
- },
116
- "model.layers.1.mlp.act_fn": {
117
- "input2": [
118
- 0.0,
119
- 0.7920277714729309
120
- ],
121
- "output": [
122
- -0.2325526624917984,
123
- 1.414695143699646
124
- ]
125
- },
126
- "model.layers.1.mlp.w1": {
127
- "output": [
128
- -1.9397541284561157,
129
- 1.1908618211746216
130
- ]
131
- },
132
- "model.layers.1.mlp.w2": {
133
- "input": [
134
- -0.19818152487277985,
135
- 0.3571622967720032
136
- ],
137
- "output": [
138
- -0.4769497513771057,
139
- 0.557122528553009
140
- ]
141
- },
142
- "model.layers.1.mlp.w3": {
143
- "output": [
144
- -0.8463128209114075,
145
- 0.9521018862724304
146
- ]
147
- },
148
- "model.layers.1.post_attention_layernorm": {
149
- "input": [
150
- -0.4584429860115051,
151
- 0.3836226463317871
152
- ],
153
- "output": [
154
- -0.4750775098800659,
155
- 0.45680534839630127
156
- ]
157
- },
158
- "model.layers.1.self_attn.k_proj": {
159
- "output": [
160
- -10.099939346313477,
161
- 9.865056991577148
162
- ]
163
- },
164
- "model.layers.1.self_attn.o_proj": {
165
- "output": [
166
- -0.19676999747753143,
167
- 0.458579957485199
168
- ]
169
- },
170
- "model.layers.1.self_attn.pv_bmm": {
171
- "input": [
172
- -2.383558073137948e-10,
173
- 3.8818066120147705
174
- ],
175
- "input2": [
176
- -0.5660212635993958,
177
- 0.6268333792686462
178
- ],
179
- "output": [
180
- -0.2186438888311386,
181
- 0.2836461067199707
182
- ]
183
- },
184
- "model.layers.1.self_attn.q_proj": {
185
- "output": [
186
- -6.189110279083252,
187
- 6.045177936553955
188
- ]
189
- },
190
- "model.layers.1.self_attn.qk_bmm": {
191
- "input": [
192
- -6.714675426483154,
193
- 6.662216663360596
194
- ],
195
- "input2": [
196
- -9.725031852722168,
197
- 9.801608085632324
198
- ],
199
- "output": [
200
- -283.8371887207031,
201
- 185.15756225585938
202
- ]
203
- },
204
- "model.layers.1.self_attn.v_proj": {
205
- "output": [
206
- -0.556850016117096,
207
- 0.6166768670082092
208
- ]
209
- },
210
- "model.layers.10.input_layernorm": {
211
- "input": [
212
- -99.1098861694336,
213
- 11.07666015625
214
- ],
215
- "output": [
216
- -1.4399017095565796,
217
- 1.4974979162216187
218
- ]
219
- },
220
- "model.layers.10.mlp.act_fn": {
221
- "input2": [
222
- 3.426980811127578e-06,
223
- 0.9152793288230896
224
- ],
225
- "output": [
226
- -0.2603260576725006,
227
- 4.84480619430542
228
- ]
229
- },
230
- "model.layers.10.mlp.w1": {
231
- "output": [
232
- -3.465219497680664,
233
- 2.4651899337768555
234
- ]
235
- },
236
- "model.layers.10.mlp.w2": {
237
- "input": [
238
- -0.282131165266037,
239
- 0.384011834859848
240
- ],
241
- "output": [
242
- -1.5551173686981201,
243
- 1.1471128463745117
244
- ]
245
- },
246
- "model.layers.10.mlp.w3": {
247
- "output": [
248
- -0.7345466017723083,
249
- 0.6632845997810364
250
- ]
251
- },
252
- "model.layers.10.post_attention_layernorm": {
253
- "input": [
254
- -44.71582794189453,
255
- 4.991600036621094
256
- ],
257
- "output": [
258
- -0.597541868686676,
259
- 0.8133209347724915
260
- ]
261
- },
262
- "model.layers.10.self_attn.k_proj": {
263
- "output": [
264
- -18.501943588256836,
265
- 14.959020614624023
266
- ]
267
- },
268
- "model.layers.10.self_attn.o_proj": {
269
- "output": [
270
- -1.499884009361267,
271
- 0.9227837324142456
272
- ]
273
- },
274
- "model.layers.10.self_attn.pv_bmm": {
275
- "input": [
276
- 0.0,
277
- 10.06771469116211
278
- ],
279
- "input2": [
280
- -0.9095252752304077,
281
- 1.0232160091400146
282
- ],
283
- "output": [
284
- -0.5237104892730713,
285
- 0.6686661243438721
286
- ]
287
- },
288
- "model.layers.10.self_attn.q_proj": {
289
- "output": [
290
- -9.523984909057617,
291
- 10.38270378112793
292
- ]
293
- },
294
- "model.layers.10.self_attn.qk_bmm": {
295
- "input": [
296
- -10.853270530700684,
297
- 9.348071098327637
298
- ],
299
- "input2": [
300
- -18.354463577270508,
301
- 17.64851951599121
302
- ],
303
- "output": [
304
- -338.5840148925781,
305
- 253.34640502929688
306
- ]
307
- },
308
- "model.layers.10.self_attn.v_proj": {
309
- "output": [
310
- -0.9095847010612488,
311
- 1.023282766342163
312
- ]
313
- },
314
- "model.layers.11.input_layernorm": {
315
- "input": [
316
- -101.14524841308594,
317
- 11.28887939453125
318
- ],
319
- "output": [
320
- -1.5151044130325317,
321
- 1.873944878578186
322
- ]
323
- },
324
- "model.layers.11.mlp.act_fn": {
325
- "input2": [
326
- 3.2362688671128126e-06,
327
- 0.9267445206642151
328
- ],
329
- "output": [
330
- -0.2646248936653137,
331
- 4.923959732055664
332
- ]
333
- },
334
- "model.layers.11.mlp.w1": {
335
- "output": [
336
- -3.655752658843994,
337
- 2.5590267181396484
338
- ]
339
- },
340
- "model.layers.11.mlp.w2": {
341
- "input": [
342
- -0.3750685453414917,
343
- 0.3230517506599426
344
- ],
345
- "output": [
346
- -1.2398120164871216,
347
- 0.937217116355896
348
- ]
349
- },
350
- "model.layers.11.mlp.w3": {
351
- "output": [
352
- -0.6163890361785889,
353
- 0.6410446166992188
354
- ]
355
- },
356
- "model.layers.11.post_attention_layernorm": {
357
- "input": [
358
- -77.12692260742188,
359
- 8.598007202148438
360
- ],
361
- "output": [
362
- -0.8170561790466309,
363
- 0.9191882610321045
364
- ]
365
- },
366
- "model.layers.11.self_attn.k_proj": {
367
- "output": [
368
- -22.057157516479492,
369
- 19.91728401184082
370
- ]
371
- },
372
- "model.layers.11.self_attn.o_proj": {
373
- "output": [
374
- -1.0617130994796753,
375
- 1.217335820198059
376
- ]
377
- },
378
- "model.layers.11.self_attn.pv_bmm": {
379
- "input": [
380
- 0.0,
381
- 20.30547332763672
382
- ],
383
- "input2": [
384
- -1.1099920272827148,
385
- 1.2887194156646729
386
- ],
387
- "output": [
388
- -0.921210527420044,
389
- 0.7567086219787598
390
- ]
391
- },
392
- "model.layers.11.self_attn.q_proj": {
393
- "output": [
394
- -11.205497741699219,
395
- 10.441486358642578
396
- ]
397
- },
398
- "model.layers.11.self_attn.qk_bmm": {
399
- "input": [
400
- -10.428231239318848,
401
- 10.510344505310059
402
- ],
403
- "input2": [
404
- -22.121395111083984,
405
- 19.97528839111328
406
- ],
407
- "output": [
408
- -381.7677917480469,
409
- 238.34701538085938
410
- ]
411
- },
412
- "model.layers.11.self_attn.v_proj": {
413
- "output": [
414
- -1.1099989414215088,
415
- 1.2887275218963623
416
- ]
417
- },
418
- "model.layers.12.input_layernorm": {
419
- "input": [
420
- -99.4482650756836,
421
- 11.078857421875
422
- ],
423
- "output": [
424
- -1.569658875465393,
425
- 1.8513926267623901
426
- ]
427
- },
428
- "model.layers.12.mlp.act_fn": {
429
- "input2": [
430
- 2.2228719842587452e-07,
431
- 0.9314825534820557
432
- ],
433
- "output": [
434
- -0.2631029188632965,
435
- 6.441186904907227
436
- ]
437
- },
438
- "model.layers.12.mlp.w1": {
439
- "output": [
440
- -3.6479241847991943,
441
- 3.3462159633636475
442
- ]
443
- },
444
- "model.layers.12.mlp.w2": {
445
- "input": [
446
- -0.4353678822517395,
447
- 0.3869936466217041
448
- ],
449
- "output": [
450
- -1.3589602708816528,
451
- 1.455013632774353
452
- ]
453
- },
454
- "model.layers.12.mlp.w3": {
455
- "output": [
456
- -0.8044908046722412,
457
- 0.5105422735214233
458
- ]
459
- },
460
- "model.layers.12.post_attention_layernorm": {
461
- "input": [
462
- -62.470062255859375,
463
- 6.949943542480469
464
- ],
465
- "output": [
466
- -1.0635490417480469,
467
- 1.0388154983520508
468
- ]
469
- },
470
- "model.layers.12.self_attn.k_proj": {
471
- "output": [
472
- -21.62252426147461,
473
- 17.76136016845703
474
- ]
475
- },
476
- "model.layers.12.self_attn.o_proj": {
477
- "output": [
478
- -0.8174425363540649,
479
- 0.9816339015960693
480
- ]
481
- },
482
- "model.layers.12.self_attn.pv_bmm": {
483
- "input": [
484
- 0.0,
485
- 6.8579325675964355
486
- ],
487
- "input2": [
488
- -1.3389040231704712,
489
- 1.4144874811172485
490
- ],
491
- "output": [
492
- -0.7504150867462158,
493
- 0.6463429927825928
494
- ]
495
- },
496
- "model.layers.12.self_attn.q_proj": {
497
- "output": [
498
- -7.999295234680176,
499
- 9.738272666931152
500
- ]
501
- },
502
- "model.layers.12.self_attn.qk_bmm": {
503
- "input": [
504
- -7.916897296905518,
505
- 9.486627578735352
506
- ],
507
- "input2": [
508
- -21.433029174804688,
509
- 17.886558532714844
510
- ],
511
- "output": [
512
- -259.88922119140625,
513
- 216.72589111328125
514
- ]
515
- },
516
- "model.layers.12.self_attn.v_proj": {
517
- "output": [
518
- -1.3148822784423828,
519
- 1.3891096115112305
520
- ]
521
- },
522
- "model.layers.13.input_layernorm": {
523
- "input": [
524
- -86.26165771484375,
525
- 9.588691711425781
526
- ],
527
- "output": [
528
- -1.649336576461792,
529
- 1.9763603210449219
530
- ]
531
- },
532
- "model.layers.13.mlp.act_fn": {
533
- "input2": [
534
- 2.544876906540594e-06,
535
- 0.9145498871803284
536
- ],
537
- "output": [
538
- -0.2592918276786804,
539
- 7.993622779846191
540
- ]
541
- },
542
- "model.layers.13.mlp.w1": {
543
- "output": [
544
- -5.687346458435059,
545
- 5.135588645935059
546
- ]
547
- },
548
- "model.layers.13.mlp.w2": {
549
- "input": [
550
- -0.5235582590103149,
551
- 0.4653850793838501
552
- ],
553
- "output": [
554
- -1.4153391122817993,
555
- 1.788828730583191
556
- ]
557
- },
558
- "model.layers.13.mlp.w3": {
559
- "output": [
560
- -0.7486878633499146,
561
- 1.123031735420227
562
- ]
563
- },
564
- "model.layers.13.post_attention_layernorm": {
565
- "input": [
566
- -63.23237609863281,
567
- 7.0157012939453125
568
- ],
569
- "output": [
570
- -0.8964369297027588,
571
- 1.1264958381652832
572
- ]
573
- },
574
- "model.layers.13.self_attn.k_proj": {
575
- "output": [
576
- -18.75926971435547,
577
- 22.478782653808594
578
- ]
579
- },
580
- "model.layers.13.self_attn.o_proj": {
581
- "output": [
582
- -0.9660946130752563,
583
- 1.3633023500442505
584
- ]
585
- },
586
- "model.layers.13.self_attn.pv_bmm": {
587
- "input": [
588
- 0.0,
589
- 14.002142906188965
590
- ],
591
- "input2": [
592
- -1.2294055223464966,
593
- 1.3193620443344116
594
- ],
595
- "output": [
596
- -0.8531197309494019,
597
- 0.7348039150238037
598
- ]
599
- },
600
- "model.layers.13.self_attn.q_proj": {
601
- "output": [
602
- -8.40859603881836,
603
- 10.566553115844727
604
- ]
605
- },
606
- "model.layers.13.self_attn.qk_bmm": {
607
- "input": [
608
- -9.85166072845459,
609
- 9.03686237335205
610
- ],
611
- "input2": [
612
- -18.746200561523438,
613
- 22.11090087890625
614
- ],
615
- "output": [
616
- -304.88323974609375,
617
- 275.91217041015625
618
- ]
619
- },
620
- "model.layers.13.self_attn.v_proj": {
621
- "output": [
622
- -1.229308843612671,
623
- 1.319258451461792
624
- ]
625
- },
626
- "model.layers.14.input_layernorm": {
627
- "input": [
628
- -92.7020492553711,
629
- 10.2801513671875
630
- ],
631
- "output": [
632
- -1.568450927734375,
633
- 2.1694462299346924
634
- ]
635
- },
636
- "model.layers.14.mlp.act_fn": {
637
- "input2": [
638
- 6.2880517361918464e-06,
639
- 0.9631829261779785
640
- ],
641
- "output": [
642
- -0.2625121474266052,
643
- 6.425864219665527
644
- ]
645
- },
646
- "model.layers.14.mlp.w1": {
647
- "output": [
648
- -3.615870475769043,
649
- 3.3168134689331055
650
- ]
651
- },
652
- "model.layers.14.mlp.w2": {
653
- "input": [
654
- -0.854144811630249,
655
- 0.49869322776794434
656
- ],
657
- "output": [
658
- -2.835049867630005,
659
- 2.239513635635376
660
- ]
661
- },
662
- "model.layers.14.mlp.w3": {
663
- "output": [
664
- -1.0184581279754639,
665
- 1.0759518146514893
666
- ]
667
- },
668
- "model.layers.14.post_attention_layernorm": {
669
- "input": [
670
- -62.98443603515625,
671
- 6.968025207519531
672
- ],
673
- "output": [
674
- -0.8999509811401367,
675
- 1.0614806413650513
676
- ]
677
- },
678
- "model.layers.14.self_attn.k_proj": {
679
- "output": [
680
- -15.748926162719727,
681
- 20.759946823120117
682
- ]
683
- },
684
- "model.layers.14.self_attn.o_proj": {
685
- "output": [
686
- -1.1212328672409058,
687
- 1.5860103368759155
688
- ]
689
- },
690
- "model.layers.14.self_attn.pv_bmm": {
691
- "input": [
692
- 0.0,
693
- 25.853843688964844
694
- ],
695
- "input2": [
696
- -1.298612117767334,
697
- 1.2684118747711182
698
- ],
699
- "output": [
700
- -0.9813904166221619,
701
- 1.0367913246154785
702
- ]
703
- },
704
- "model.layers.14.self_attn.q_proj": {
705
- "output": [
706
- -7.34971284866333,
707
- 11.581365585327148
708
- ]
709
- },
710
- "model.layers.14.self_attn.qk_bmm": {
711
- "input": [
712
- -12.523106575012207,
713
- 10.125065803527832
714
- ],
715
- "input2": [
716
- -21.095430374145508,
717
- 20.93062400817871
718
- ],
719
- "output": [
720
- -370.3804626464844,
721
- 339.1870422363281
722
- ]
723
- },
724
- "model.layers.14.self_attn.v_proj": {
725
- "output": [
726
- -1.2987051010131836,
727
- 1.2685027122497559
728
- ]
729
- },
730
- "model.layers.15.input_layernorm": {
731
- "input": [
732
- -96.00485229492188,
733
- 10.613883972167969
734
- ],
735
- "output": [
736
- -1.5501868724822998,
737
- 1.7997932434082031
738
- ]
739
- },
740
- "model.layers.15.mlp.act_fn": {
741
- "input2": [
742
- 1.4850113529973896e-06,
743
- 0.9971228241920471
744
- ],
745
- "output": [
746
- -0.2568572461605072,
747
- 9.069701194763184
748
- ]
749
- },
750
- "model.layers.15.mlp.w1": {
751
- "output": [
752
- -4.972715377807617,
753
- 5.011870384216309
754
- ]
755
- },
756
- "model.layers.15.mlp.w2": {
757
- "input": [
758
- -0.5734550952911377,
759
- 0.7439417839050293
760
- ],
761
- "output": [
762
- -2.318004608154297,
763
- 2.227762222290039
764
- ]
765
- },
766
- "model.layers.15.mlp.w3": {
767
- "output": [
768
- -0.6860116124153137,
769
- 0.545910656452179
770
- ]
771
- },
772
- "model.layers.15.post_attention_layernorm": {
773
- "input": [
774
- -49.25102996826172,
775
- 5.433860778808594
776
- ],
777
- "output": [
778
- -0.9355494976043701,
779
- 0.9282405376434326
780
- ]
781
- },
782
- "model.layers.15.self_attn.k_proj": {
783
- "output": [
784
- -18.496917724609375,
785
- 17.78549575805664
786
- ]
787
- },
788
- "model.layers.15.self_attn.o_proj": {
789
- "output": [
790
- -1.700047492980957,
791
- 1.5626873970031738
792
- ]
793
- },
794
- "model.layers.15.self_attn.pv_bmm": {
795
- "input": [
796
- 0.0,
797
- 17.365955352783203
798
- ],
799
- "input2": [
800
- -1.4967215061187744,
801
- 1.2101151943206787
802
- ],
803
- "output": [
804
- -1.2445505857467651,
805
- 0.9747527837753296
806
- ]
807
- },
808
- "model.layers.15.self_attn.q_proj": {
809
- "output": [
810
- -12.66657829284668,
811
- 15.178054809570312
812
- ]
813
- },
814
- "model.layers.15.self_attn.qk_bmm": {
815
- "input": [
816
- -13.573946952819824,
817
- 13.897137641906738
818
- ],
819
- "input2": [
820
- -21.92919921875,
821
- 17.450702667236328
822
- ],
823
- "output": [
824
- -510.9229431152344,
825
- 232.58938598632812
826
- ]
827
- },
828
- "model.layers.15.self_attn.v_proj": {
829
- "output": [
830
- -1.496742606163025,
831
- 1.2101324796676636
832
- ]
833
- },
834
- "model.layers.16.input_layernorm": {
835
- "input": [
836
- -100.52581787109375,
837
- 11.0853271484375
838
- ],
839
- "output": [
840
- -1.5624299049377441,
841
- 1.8722219467163086
842
- ]
843
- },
844
- "model.layers.16.mlp.act_fn": {
845
- "input2": [
846
- 1.0168923836317845e-06,
847
- 0.9851000905036926
848
- ],
849
- "output": [
850
- -0.2558925151824951,
851
- 10.573112487792969
852
- ]
853
- },
854
- "model.layers.16.mlp.w1": {
855
- "output": [
856
- -4.58713960647583,
857
- 6.344828128814697
858
- ]
859
- },
860
- "model.layers.16.mlp.w2": {
861
- "input": [
862
- -0.9895568490028381,
863
- 0.8258172869682312
864
- ],
865
- "output": [
866
- -2.62292742729187,
867
- 3.516347646713257
868
- ]
869
- },
870
- "model.layers.16.mlp.w3": {
871
- "output": [
872
- -0.651694118976593,
873
- 0.710453450679779
874
- ]
875
- },
876
- "model.layers.16.post_attention_layernorm": {
877
- "input": [
878
- -62.668067932128906,
879
- 6.895301818847656
880
- ],
881
- "output": [
882
- -0.9683569073677063,
883
- 1.0723953247070312
884
- ]
885
- },
886
- "model.layers.16.self_attn.k_proj": {
887
- "output": [
888
- -19.975080490112305,
889
- 19.20680809020996
890
- ]
891
- },
892
- "model.layers.16.self_attn.o_proj": {
893
- "output": [
894
- -1.0730109214782715,
895
- 0.9763514995574951
896
- ]
897
- },
898
- "model.layers.16.self_attn.pv_bmm": {
899
- "input": [
900
- 0.0,
901
- 10.039031028747559
902
- ],
903
- "input2": [
904
- -1.5520944595336914,
905
- 1.1964061260223389
906
- ],
907
- "output": [
908
- -1.0620307922363281,
909
- 0.9147418737411499
910
- ]
911
- },
912
- "model.layers.16.self_attn.q_proj": {
913
- "output": [
914
- -9.887508392333984,
915
- 10.283008575439453
916
- ]
917
- },
918
- "model.layers.16.self_attn.qk_bmm": {
919
- "input": [
920
- -9.462581634521484,
921
- 10.154966354370117
922
- ],
923
- "input2": [
924
- -19.422739028930664,
925
- 19.575674057006836
926
- ],
927
- "output": [
928
- -327.813720703125,
929
- 297.045166015625
930
- ]
931
- },
932
- "model.layers.16.self_attn.v_proj": {
933
- "output": [
934
- -1.5520508289337158,
935
- 1.1963725090026855
936
- ]
937
- },
938
- "model.layers.17.input_layernorm": {
939
- "input": [
940
- -69.5985107421875,
941
- 7.65130615234375
942
- ],
943
- "output": [
944
- -1.562252163887024,
945
- 1.9631842374801636
946
- ]
947
- },
948
- "model.layers.17.mlp.act_fn": {
949
- "input2": [
950
- 3.43927308676939e-06,
951
- 0.9986145496368408
952
- ],
953
- "output": [
954
- -0.2538253664970398,
955
- 10.493779182434082
956
- ]
957
- },
958
- "model.layers.17.mlp.w1": {
959
- "output": [
960
- -8.031157493591309,
961
- 7.366927146911621
962
- ]
963
- },
964
- "model.layers.17.mlp.w2": {
965
- "input": [
966
- -0.7215433716773987,
967
- 1.100175142288208
968
- ],
969
- "output": [
970
- -1.9865412712097168,
971
- 2.7032275199890137
972
- ]
973
- },
974
- "model.layers.17.mlp.w3": {
975
- "output": [
976
- -0.8852017521858215,
977
- 0.6933049559593201
978
- ]
979
- },
980
- "model.layers.17.post_attention_layernorm": {
981
- "input": [
982
- -60.717594146728516,
983
- 6.662433624267578
984
- ],
985
- "output": [
986
- -1.0898340940475464,
987
- 0.8811424970626831
988
- ]
989
- },
990
- "model.layers.17.self_attn.k_proj": {
991
- "output": [
992
- -17.243209838867188,
993
- 20.019657135009766
994
- ]
995
- },
996
- "model.layers.17.self_attn.o_proj": {
997
- "output": [
998
- -1.8149468898773193,
999
- 1.7124121189117432
1000
- ]
1001
- },
1002
- "model.layers.17.self_attn.pv_bmm": {
1003
- "input": [
1004
- -1.9625843172832447e-09,
1005
- 24.50324058532715
1006
- ],
1007
- "input2": [
1008
- -1.8348536491394043,
1009
- 1.6309809684753418
1010
- ],
1011
- "output": [
1012
- -1.139207124710083,
1013
- 1.0449869632720947
1014
- ]
1015
- },
1016
- "model.layers.17.self_attn.q_proj": {
1017
- "output": [
1018
- -11.556499481201172,
1019
- 10.272443771362305
1020
- ]
1021
- },
1022
- "model.layers.17.self_attn.qk_bmm": {
1023
- "input": [
1024
- -9.140716552734375,
1025
- 11.486564636230469
1026
- ],
1027
- "input2": [
1028
- -19.73405647277832,
1029
- 20.20391273498535
1030
- ],
1031
- "output": [
1032
- -374.51507568359375,
1033
- 302.01531982421875
1034
- ]
1035
- },
1036
- "model.layers.17.self_attn.v_proj": {
1037
- "output": [
1038
- -1.8348592519760132,
1039
- 1.6309860944747925
1040
- ]
1041
- },
1042
- "model.layers.18.input_layernorm": {
1043
- "input": [
1044
- -99.9640121459961,
1045
- 10.972625732421875
1046
- ],
1047
- "output": [
1048
- -1.8908522129058838,
1049
- 2.1609737873077393
1050
- ]
1051
- },
1052
- "model.layers.18.mlp.act_fn": {
1053
- "input2": [
1054
- 2.1923647182120476e-06,
1055
- 0.9952065944671631
1056
- ],
1057
- "output": [
1058
- -0.2591128349304199,
1059
- 9.149469375610352
1060
- ]
1061
- },
1062
- "model.layers.18.mlp.w1": {
1063
- "output": [
1064
- -4.569403171539307,
1065
- 5.83414888381958
1066
- ]
1067
- },
1068
- "model.layers.18.mlp.w2": {
1069
- "input": [
1070
- -1.471362829208374,
1071
- 1.2874424457550049
1072
- ],
1073
- "output": [
1074
- -1.6179473400115967,
1075
- 2.5631320476531982
1076
- ]
1077
- },
1078
- "model.layers.18.mlp.w3": {
1079
- "output": [
1080
- -0.9652486443519592,
1081
- 1.1750853061676025
1082
- ]
1083
- },
1084
- "model.layers.18.post_attention_layernorm": {
1085
- "input": [
1086
- -81.42882537841797,
1087
- 8.924331665039062
1088
- ],
1089
- "output": [
1090
- -1.0480235815048218,
1091
- 0.9463497400283813
1092
- ]
1093
- },
1094
- "model.layers.18.self_attn.k_proj": {
1095
- "output": [
1096
- -20.276676177978516,
1097
- 13.96539306640625
1098
- ]
1099
- },
1100
- "model.layers.18.self_attn.o_proj": {
1101
- "output": [
1102
- -1.4689831733703613,
1103
- 3.1518197059631348
1104
- ]
1105
- },
1106
- "model.layers.18.self_attn.pv_bmm": {
1107
- "input": [
1108
- -2.13810147364768e-09,
1109
- 9.233264923095703
1110
- ],
1111
- "input2": [
1112
- -2.1445376873016357,
1113
- 2.1277835369110107
1114
- ],
1115
- "output": [
1116
- -1.4730726480484009,
1117
- 1.3943589925765991
1118
- ]
1119
- },
1120
- "model.layers.18.self_attn.q_proj": {
1121
- "output": [
1122
- -11.622716903686523,
1123
- 11.89944839477539
1124
- ]
1125
- },
1126
- "model.layers.18.self_attn.qk_bmm": {
1127
- "input": [
1128
- -10.726187705993652,
1129
- 11.693303108215332
1130
- ],
1131
- "input2": [
1132
- -18.65675163269043,
1133
- 17.384702682495117
1134
- ],
1135
- "output": [
1136
- -323.7893371582031,
1137
- 281.9984436035156
1138
- ]
1139
- },
1140
- "model.layers.18.self_attn.v_proj": {
1141
- "output": [
1142
- -2.1445248126983643,
1143
- 2.1277706623077393
1144
- ]
1145
- },
1146
- "model.layers.19.input_layernorm": {
1147
- "input": [
1148
- -78.49484252929688,
1149
- 8.620475769042969
1150
- ],
1151
- "output": [
1152
- -1.798056960105896,
1153
- 1.8699792623519897
1154
- ]
1155
- },
1156
- "model.layers.19.mlp.act_fn": {
1157
- "input2": [
1158
- 1.7838705446138192e-07,
1159
- 1.007856011390686
1160
- ],
1161
- "output": [
1162
- -0.2515661120414734,
1163
- 12.509735107421875
1164
- ]
1165
- },
1166
- "model.layers.19.mlp.w1": {
1167
- "output": [
1168
- -7.22218656539917,
1169
- 8.253927230834961
1170
- ]
1171
- },
1172
- "model.layers.19.mlp.w2": {
1173
- "input": [
1174
- -1.2253239154815674,
1175
- 1.445253849029541
1176
- ],
1177
- "output": [
1178
- -4.075857639312744,
1179
- 5.675578594207764
1180
- ]
1181
- },
1182
- "model.layers.19.mlp.w3": {
1183
- "output": [
1184
- -0.8933815360069275,
1185
- 0.7455558180809021
1186
- ]
1187
- },
1188
- "model.layers.19.post_attention_layernorm": {
1189
- "input": [
1190
- -105.57499694824219,
1191
- 11.590507507324219
1192
- ],
1193
- "output": [
1194
- -1.3605623245239258,
1195
- 0.863433837890625
1196
- ]
1197
- },
1198
- "model.layers.19.self_attn.k_proj": {
1199
- "output": [
1200
- -21.901290893554688,
1201
- 20.40802001953125
1202
- ]
1203
- },
1204
- "model.layers.19.self_attn.o_proj": {
1205
- "output": [
1206
- -1.9629877805709839,
1207
- 1.8841489553451538
1208
- ]
1209
- },
1210
- "model.layers.19.self_attn.pv_bmm": {
1211
- "input": [
1212
- -6.853270173579062e-10,
1213
- 21.860992431640625
1214
- ],
1215
- "input2": [
1216
- -2.382417678833008,
1217
- 2.4391417503356934
1218
- ],
1219
- "output": [
1220
- -1.411903738975525,
1221
- 1.1782792806625366
1222
- ]
1223
- },
1224
- "model.layers.19.self_attn.q_proj": {
1225
- "output": [
1226
- -15.740455627441406,
1227
- 13.135919570922852
1228
- ]
1229
- },
1230
- "model.layers.19.self_attn.qk_bmm": {
1231
- "input": [
1232
- -17.215229034423828,
1233
- 15.545093536376953
1234
- ],
1235
- "input2": [
1236
- -20.727087020874023,
1237
- 19.313875198364258
1238
- ],
1239
- "output": [
1240
- -431.6451721191406,
1241
- 358.5186462402344
1242
- ]
1243
- },
1244
- "model.layers.19.self_attn.v_proj": {
1245
- "output": [
1246
- -2.3823821544647217,
1247
- 2.439105272293091
1248
- ]
1249
- },
1250
- "model.layers.2.input_layernorm": {
1251
- "input": [
1252
- -0.8590171933174133,
1253
- 1.0888631343841553
1254
- ],
1255
- "output": [
1256
- -1.7529939413070679,
1257
- 1.168662667274475
1258
- ]
1259
- },
1260
- "model.layers.2.mlp.act_fn": {
1261
- "input2": [
1262
- 3.966258191212546e-06,
1263
- 0.9987732768058777
1264
- ],
1265
- "output": [
1266
- -0.18402370810508728,
1267
- 11.531486511230469
1268
- ]
1269
- },
1270
- "model.layers.2.mlp.w1": {
1271
- "output": [
1272
- -19.380216598510742,
1273
- 14.468793869018555
1274
- ]
1275
- },
1276
- "model.layers.2.mlp.w2": {
1277
- "input": [
1278
- -0.6558618545532227,
1279
- 8.63491439819336
1280
- ],
1281
- "output": [
1282
- -75.92266082763672,
1283
- 11.691207885742188
1284
- ]
1285
- },
1286
- "model.layers.2.mlp.w3": {
1287
- "output": [
1288
- -9.166142463684082,
1289
- 2.6387386322021484
1290
- ]
1291
- },
1292
- "model.layers.2.post_attention_layernorm": {
1293
- "input": [
1294
- -0.7533947229385376,
1295
- 0.8142738342285156
1296
- ],
1297
- "output": [
1298
- -0.4843924641609192,
1299
- 0.5449416041374207
1300
- ]
1301
- },
1302
- "model.layers.2.self_attn.k_proj": {
1303
- "output": [
1304
- -9.543099403381348,
1305
- 10.241374015808105
1306
- ]
1307
- },
1308
- "model.layers.2.self_attn.o_proj": {
1309
- "output": [
1310
- -0.726354718208313,
1311
- 0.5078719854354858
1312
- ]
1313
- },
1314
- "model.layers.2.self_attn.pv_bmm": {
1315
- "input": [
1316
- -6.111011696674495e-11,
1317
- 1.5878328084945679
1318
- ],
1319
- "input2": [
1320
- -0.338035523891449,
1321
- 0.36276984214782715
1322
- ],
1323
- "output": [
1324
- -0.20621609687805176,
1325
- 0.22130507230758667
1326
- ]
1327
- },
1328
- "model.layers.2.self_attn.q_proj": {
1329
- "output": [
1330
- -6.53659200668335,
1331
- 5.810304164886475
1332
- ]
1333
- },
1334
- "model.layers.2.self_attn.qk_bmm": {
1335
- "input": [
1336
- -6.120059967041016,
1337
- 6.072246551513672
1338
- ],
1339
- "input2": [
1340
- -10.258339881896973,
1341
- 10.33911418914795
1342
- ],
1343
- "output": [
1344
- -225.0656280517578,
1345
- 169.3107452392578
1346
- ]
1347
- },
1348
- "model.layers.2.self_attn.v_proj": {
1349
- "output": [
1350
- -0.33430781960487366,
1351
- 0.35876932740211487
1352
- ]
1353
- },
1354
- "model.layers.20.input_layernorm": {
1355
- "input": [
1356
- -115.20829772949219,
1357
- 12.693580627441406
1358
- ],
1359
- "output": [
1360
- -1.69486403465271,
1361
- 1.5065457820892334
1362
- ]
1363
- },
1364
- "model.layers.20.mlp.act_fn": {
1365
- "input2": [
1366
- 2.50033565407648e-07,
1367
- 0.9959600567817688
1368
- ],
1369
- "output": [
1370
- -0.2553894817829132,
1371
- 10.56139850616455
1372
- ]
1373
- },
1374
- "model.layers.20.mlp.w1": {
1375
- "output": [
1376
- -5.983047962188721,
1377
- 7.761791706085205
1378
- ]
1379
- },
1380
- "model.layers.20.mlp.w2": {
1381
- "input": [
1382
- -7.271407604217529,
1383
- 2.8058152198791504
1384
- ],
1385
- "output": [
1386
- -11.011977195739746,
1387
- 6.784623146057129
1388
- ]
1389
- },
1390
- "model.layers.20.mlp.w3": {
1391
- "output": [
1392
- -3.3427953720092773,
1393
- 2.2285304069519043
1394
- ]
1395
- },
1396
- "model.layers.20.post_attention_layernorm": {
1397
- "input": [
1398
- -113.40253448486328,
1399
- 12.49249267578125
1400
- ],
1401
- "output": [
1402
- -1.8028979301452637,
1403
- 0.9336435794830322
1404
- ]
1405
- },
1406
- "model.layers.20.self_attn.k_proj": {
1407
- "output": [
1408
- -21.391155242919922,
1409
- 19.014358520507812
1410
- ]
1411
- },
1412
- "model.layers.20.self_attn.o_proj": {
1413
- "output": [
1414
- -2.64984393119812,
1415
- 3.5715410709381104
1416
- ]
1417
- },
1418
- "model.layers.20.self_attn.pv_bmm": {
1419
- "input": [
1420
- -1.4543530824084883e-09,
1421
- 17.021509170532227
1422
- ],
1423
- "input2": [
1424
- -2.695246458053589,
1425
- 2.2139523029327393
1426
- ],
1427
- "output": [
1428
- -2.0278091430664062,
1429
- 1.692272424697876
1430
- ]
1431
- },
1432
- "model.layers.20.self_attn.q_proj": {
1433
- "output": [
1434
- -17.26282501220703,
1435
- 15.835071563720703
1436
- ]
1437
- },
1438
- "model.layers.20.self_attn.qk_bmm": {
1439
- "input": [
1440
- -15.574592590332031,
1441
- 14.286468505859375
1442
- ],
1443
- "input2": [
1444
- -18.195837020874023,
1445
- 19.527238845825195
1446
- ],
1447
- "output": [
1448
- -459.4617919921875,
1449
- 400.9951171875
1450
- ]
1451
- },
1452
- "model.layers.20.self_attn.v_proj": {
1453
- "output": [
1454
- -2.695233106613159,
1455
- 2.2139413356781006
1456
- ]
1457
- },
1458
- "model.layers.21.input_layernorm": {
1459
- "input": [
1460
- -146.54869079589844,
1461
- 16.176971435546875
1462
- ],
1463
- "output": [
1464
- -1.708757758140564,
1465
- 1.6174501180648804
1466
- ]
1467
- },
1468
- "model.layers.21.mlp.act_fn": {
1469
- "input2": [
1470
- -8.81209771819158e-09,
1471
- 0.9954389333724976
1472
- ],
1473
- "output": [
1474
- -0.26087066531181335,
1475
- 13.050216674804688
1476
- ]
1477
- },
1478
- "model.layers.21.mlp.w1": {
1479
- "output": [
1480
- -107.37953186035156,
1481
- 14.317276000976562
1482
- ]
1483
- },
1484
- "model.layers.21.mlp.w2": {
1485
- "input": [
1486
- -4.829046249389648,
1487
- 10.758381843566895
1488
- ],
1489
- "output": [
1490
- -43.75098419189453,
1491
- 143.9803466796875
1492
- ]
1493
- },
1494
- "model.layers.21.mlp.w3": {
1495
- "output": [
1496
- -7.738009452819824,
1497
- 2.0302696228027344
1498
- ]
1499
- },
1500
- "model.layers.21.post_attention_layernorm": {
1501
- "input": [
1502
- -179.94740295410156,
1503
- 22.532821655273438
1504
- ],
1505
- "output": [
1506
- -2.6346614360809326,
1507
- 1.5907390117645264
1508
- ]
1509
- },
1510
- "model.layers.21.self_attn.k_proj": {
1511
- "output": [
1512
- -26.96712875366211,
1513
- 25.128459930419922
1514
- ]
1515
- },
1516
- "model.layers.21.self_attn.o_proj": {
1517
- "output": [
1518
- -6.288361072540283,
1519
- 10.057985305786133
1520
- ]
1521
- },
1522
- "model.layers.21.self_attn.pv_bmm": {
1523
- "input": [
1524
- -2.280847288815835e-09,
1525
- 7.910551071166992
1526
- ],
1527
- "input2": [
1528
- -4.1691460609436035,
1529
- 3.3707995414733887
1530
- ],
1531
- "output": [
1532
- -3.265413761138916,
1533
- 2.725093364715576
1534
- ]
1535
- },
1536
- "model.layers.21.self_attn.q_proj": {
1537
- "output": [
1538
- -15.797935485839844,
1539
- 16.689754486083984
1540
- ]
1541
- },
1542
- "model.layers.21.self_attn.qk_bmm": {
1543
- "input": [
1544
- -16.69371223449707,
1545
- 16.825159072875977
1546
- ],
1547
- "input2": [
1548
- -24.006988525390625,
1549
- 24.578582763671875
1550
- ],
1551
- "output": [
1552
- -675.0399780273438,
1553
- 528.8992309570312
1554
- ]
1555
- },
1556
- "model.layers.21.self_attn.v_proj": {
1557
- "output": [
1558
- -4.104138374328613,
1559
- 3.318239212036133
1560
- ]
1561
- },
1562
- "model.layers.3.input_layernorm": {
1563
- "input": [
1564
- -72.91954803466797,
1565
- 11.20062255859375
1566
- ],
1567
- "output": [
1568
- -1.0544137954711914,
1569
- 0.9825220108032227
1570
- ]
1571
- },
1572
- "model.layers.3.mlp.act_fn": {
1573
- "input2": [
1574
- 0.0,
1575
- 0.9181910157203674
1576
- ],
1577
- "output": [
1578
- -0.2546980381011963,
1579
- 2.1507670879364014
1580
- ]
1581
- },
1582
- "model.layers.3.mlp.w1": {
1583
- "output": [
1584
- -2.0251128673553467,
1585
- 1.6373252868652344
1586
- ]
1587
- },
1588
- "model.layers.3.mlp.w2": {
1589
- "input": [
1590
- -0.2622146010398865,
1591
- 0.26427924633026123
1592
- ],
1593
- "output": [
1594
- -0.4150099754333496,
1595
- 0.240339994430542
1596
- ]
1597
- },
1598
- "model.layers.3.mlp.w3": {
1599
- "output": [
1600
- -0.5215353965759277,
1601
- 0.5596965551376343
1602
- ]
1603
- },
1604
- "model.layers.3.post_attention_layernorm": {
1605
- "input": [
1606
- -15.465083122253418,
1607
- 2.37296199798584
1608
- ],
1609
- "output": [
1610
- -0.4020472466945648,
1611
- 0.3408661186695099
1612
- ]
1613
- },
1614
- "model.layers.3.self_attn.k_proj": {
1615
- "output": [
1616
- -15.118288040161133,
1617
- 10.582801818847656
1618
- ]
1619
- },
1620
- "model.layers.3.self_attn.o_proj": {
1621
- "output": [
1622
- -0.24997998774051666,
1623
- 0.40536999702453613
1624
- ]
1625
- },
1626
- "model.layers.3.self_attn.pv_bmm": {
1627
- "input": [
1628
- -7.779653032891076e-10,
1629
- 10.15169906616211
1630
- ],
1631
- "input2": [
1632
- -0.5302785634994507,
1633
- 0.46399378776550293
1634
- ],
1635
- "output": [
1636
- -0.2814144492149353,
1637
- 0.2462376356124878
1638
- ]
1639
- },
1640
- "model.layers.3.self_attn.q_proj": {
1641
- "output": [
1642
- -6.861732482910156,
1643
- 7.025106430053711
1644
- ]
1645
- },
1646
- "model.layers.3.self_attn.qk_bmm": {
1647
- "input": [
1648
- -6.992506980895996,
1649
- 7.272207260131836
1650
- ],
1651
- "input2": [
1652
- -15.041023254394531,
1653
- 14.462522506713867
1654
- ],
1655
- "output": [
1656
- -235.2082061767578,
1657
- 207.6825714111328
1658
- ]
1659
- },
1660
- "model.layers.3.self_attn.v_proj": {
1661
- "output": [
1662
- -0.5320653319358826,
1663
- 0.4655570983886719
1664
- ]
1665
- },
1666
- "model.layers.4.input_layernorm": {
1667
- "input": [
1668
- -69.28654479980469,
1669
- 10.645393371582031
1670
- ],
1671
- "output": [
1672
- -1.145024061203003,
1673
- 1.0339395999908447
1674
- ]
1675
- },
1676
- "model.layers.4.mlp.act_fn": {
1677
- "input2": [
1678
- 6.099214715504786e-07,
1679
- 0.8969005942344666
1680
- ],
1681
- "output": [
1682
- -0.2554253041744232,
1683
- 3.815385580062866
1684
- ]
1685
- },
1686
- "model.layers.4.mlp.w1": {
1687
- "output": [
1688
- -3.362837076187134,
1689
- 2.0645267963409424
1690
- ]
1691
- },
1692
- "model.layers.4.mlp.w2": {
1693
- "input": [
1694
- -0.2999441921710968,
1695
- 0.2929687201976776
1696
- ],
1697
- "output": [
1698
- -0.8785357475280762,
1699
- 0.474479079246521
1700
- ]
1701
- },
1702
- "model.layers.4.mlp.w3": {
1703
- "output": [
1704
- -0.5457059144973755,
1705
- 0.7548081874847412
1706
- ]
1707
- },
1708
- "model.layers.4.post_attention_layernorm": {
1709
- "input": [
1710
- -25.324718475341797,
1711
- 3.8873672485351562
1712
- ],
1713
- "output": [
1714
- -0.4368909001350403,
1715
- 0.35887467861175537
1716
- ]
1717
- },
1718
- "model.layers.4.self_attn.k_proj": {
1719
- "output": [
1720
- -17.51902961730957,
1721
- 9.888040542602539
1722
- ]
1723
- },
1724
- "model.layers.4.self_attn.o_proj": {
1725
- "output": [
1726
- -1.472687005996704,
1727
- 0.4930652379989624
1728
- ]
1729
- },
1730
- "model.layers.4.self_attn.pv_bmm": {
1731
- "input": [
1732
- -9.67083635394772e-10,
1733
- 3.160961151123047
1734
- ],
1735
- "input2": [
1736
- -0.6620192527770996,
1737
- 0.5792667865753174
1738
- ],
1739
- "output": [
1740
- -0.42088180780410767,
1741
- 0.3983919620513916
1742
- ]
1743
- },
1744
- "model.layers.4.self_attn.q_proj": {
1745
- "output": [
1746
- -8.15958309173584,
1747
- 7.139636039733887
1748
- ]
1749
- },
1750
- "model.layers.4.self_attn.qk_bmm": {
1751
- "input": [
1752
- -7.773740768432617,
1753
- 7.834951400756836
1754
- ],
1755
- "input2": [
1756
- -18.371265411376953,
1757
- 17.944026947021484
1758
- ],
1759
- "output": [
1760
- -286.5303649902344,
1761
- 196.67303466796875
1762
- ]
1763
- },
1764
- "model.layers.4.self_attn.v_proj": {
1765
- "output": [
1766
- -0.6622653603553772,
1767
- 0.5794822573661804
1768
- ]
1769
- },
1770
- "model.layers.5.input_layernorm": {
1771
- "input": [
1772
- -72.35042572021484,
1773
- 11.107322692871094
1774
- ],
1775
- "output": [
1776
- -1.012054443359375,
1777
- 1.0041477680206299
1778
- ]
1779
- },
1780
- "model.layers.5.mlp.act_fn": {
1781
- "input2": [
1782
- 3.7543460962297104e-07,
1783
- 0.8639066815376282
1784
- ],
1785
- "output": [
1786
- -0.2671676576137543,
1787
- 2.9769954681396484
1788
- ]
1789
- },
1790
- "model.layers.5.mlp.w1": {
1791
- "output": [
1792
- -2.161452531814575,
1793
- 1.832535982131958
1794
- ]
1795
- },
1796
- "model.layers.5.mlp.w2": {
1797
- "input": [
1798
- -0.2855253517627716,
1799
- 0.2660577595233917
1800
- ],
1801
- "output": [
1802
- -1.3505405187606812,
1803
- 0.5793709754943848
1804
- ]
1805
- },
1806
- "model.layers.5.mlp.w3": {
1807
- "output": [
1808
- -0.7771786451339722,
1809
- 0.5618791580200195
1810
- ]
1811
- },
1812
- "model.layers.5.post_attention_layernorm": {
1813
- "input": [
1814
- -16.259342193603516,
1815
- 2.4912033081054688
1816
- ],
1817
- "output": [
1818
- -0.6607040166854858,
1819
- 0.47767114639282227
1820
- ]
1821
- },
1822
- "model.layers.5.self_attn.k_proj": {
1823
- "output": [
1824
- -15.729736328125,
1825
- 17.695953369140625
1826
- ]
1827
- },
1828
- "model.layers.5.self_attn.o_proj": {
1829
- "output": [
1830
- -0.7585533261299133,
1831
- 0.7648529410362244
1832
- ]
1833
- },
1834
- "model.layers.5.self_attn.pv_bmm": {
1835
- "input": [
1836
- 0.0,
1837
- 4.858019828796387
1838
- ],
1839
- "input2": [
1840
- -1.006867527961731,
1841
- 0.48587214946746826
1842
- ],
1843
- "output": [
1844
- -0.42437875270843506,
1845
- 0.33770984411239624
1846
- ]
1847
- },
1848
- "model.layers.5.self_attn.q_proj": {
1849
- "output": [
1850
- -7.287756443023682,
1851
- 7.699162006378174
1852
- ]
1853
- },
1854
- "model.layers.5.self_attn.qk_bmm": {
1855
- "input": [
1856
- -7.554513931274414,
1857
- 7.1508378982543945
1858
- ],
1859
- "input2": [
1860
- -18.049354553222656,
1861
- 16.043872833251953
1862
- ],
1863
- "output": [
1864
- -262.8944396972656,
1865
- 207.77316284179688
1866
- ]
1867
- },
1868
- "model.layers.5.self_attn.v_proj": {
1869
- "output": [
1870
- -1.0068272352218628,
1871
- 0.48585259914398193
1872
- ]
1873
- },
1874
- "model.layers.6.input_layernorm": {
1875
- "input": [
1876
- -76.28170013427734,
1877
- 11.689178466796875
1878
- ],
1879
- "output": [
1880
- -1.3052244186401367,
1881
- 1.2748703956604004
1882
- ]
1883
- },
1884
- "model.layers.6.mlp.act_fn": {
1885
- "input2": [
1886
- 5.687860493708286e-07,
1887
- 0.8548686504364014
1888
- ],
1889
- "output": [
1890
- -0.268370658159256,
1891
- 3.5335166454315186
1892
- ]
1893
- },
1894
- "model.layers.6.mlp.w1": {
1895
- "output": [
1896
- -2.3736090660095215,
1897
- 1.772078037261963
1898
- ]
1899
- },
1900
- "model.layers.6.mlp.w2": {
1901
- "input": [
1902
- -0.26148250699043274,
1903
- 0.2762435972690582
1904
- ],
1905
- "output": [
1906
- -1.1666910648345947,
1907
- 0.9592397212982178
1908
- ]
1909
- },
1910
- "model.layers.6.mlp.w3": {
1911
- "output": [
1912
- -0.5539140701293945,
1913
- 0.6330446004867554
1914
- ]
1915
- },
1916
- "model.layers.6.post_attention_layernorm": {
1917
- "input": [
1918
- -44.39370346069336,
1919
- 6.7775421142578125
1920
- ],
1921
- "output": [
1922
- -0.5560799241065979,
1923
- 0.5263656973838806
1924
- ]
1925
- },
1926
- "model.layers.6.self_attn.k_proj": {
1927
- "output": [
1928
- -17.042301177978516,
1929
- 18.578903198242188
1930
- ]
1931
- },
1932
- "model.layers.6.self_attn.o_proj": {
1933
- "output": [
1934
- -0.4106226861476898,
1935
- 0.5218151807785034
1936
- ]
1937
- },
1938
- "model.layers.6.self_attn.pv_bmm": {
1939
- "input": [
1940
- 0.0,
1941
- 10.370061874389648
1942
- ],
1943
- "input2": [
1944
- -0.6833804845809937,
1945
- 0.7449966669082642
1946
- ],
1947
- "output": [
1948
- -0.5550251603126526,
1949
- 0.47056490182876587
1950
- ]
1951
- },
1952
- "model.layers.6.self_attn.q_proj": {
1953
- "output": [
1954
- -7.373525619506836,
1955
- 8.426886558532715
1956
- ]
1957
- },
1958
- "model.layers.6.self_attn.qk_bmm": {
1959
- "input": [
1960
- -6.766021251678467,
1961
- 8.368499755859375
1962
- ],
1963
- "input2": [
1964
- -18.750553131103516,
1965
- 19.50057601928711
1966
- ],
1967
- "output": [
1968
- -284.78106689453125,
1969
- 221.37881469726562
1970
- ]
1971
- },
1972
- "model.layers.6.self_attn.v_proj": {
1973
- "output": [
1974
- -0.6834059953689575,
1975
- 0.745024561882019
1976
- ]
1977
- },
1978
- "model.layers.7.input_layernorm": {
1979
- "input": [
1980
- -76.0637435913086,
1981
- 11.614128112792969
1982
- ],
1983
- "output": [
1984
- -1.5509907007217407,
1985
- 1.56320321559906
1986
- ]
1987
- },
1988
- "model.layers.7.mlp.act_fn": {
1989
- "input2": [
1990
- -9.137278880189115e-07,
1991
- 0.9197782278060913
1992
- ],
1993
- "output": [
1994
- -0.22567085921764374,
1995
- 18.931161880493164
1996
- ]
1997
- },
1998
- "model.layers.7.mlp.w1": {
1999
- "output": [
2000
- -6.370095729827881,
2001
- 20.25897789001465
2002
- ]
2003
- },
2004
- "model.layers.7.mlp.w2": {
2005
- "input": [
2006
- -10.818718910217285,
2007
- 1.442495346069336
2008
- ],
2009
- "output": [
2010
- -68.0284194946289,
2011
- 5.880210876464844
2012
- ]
2013
- },
2014
- "model.layers.7.mlp.w3": {
2015
- "output": [
2016
- -4.026743412017822,
2017
- 3.9330978393554688
2018
- ]
2019
- },
2020
- "model.layers.7.post_attention_layernorm": {
2021
- "input": [
2022
- -44.210880279541016,
2023
- 6.738880157470703
2024
- ],
2025
- "output": [
2026
- -1.1832901239395142,
2027
- 0.6792961359024048
2028
- ]
2029
- },
2030
- "model.layers.7.self_attn.k_proj": {
2031
- "output": [
2032
- -17.08796501159668,
2033
- 17.494821548461914
2034
- ]
2035
- },
2036
- "model.layers.7.self_attn.o_proj": {
2037
- "output": [
2038
- -0.3788287341594696,
2039
- 0.7155746221542358
2040
- ]
2041
- },
2042
- "model.layers.7.self_attn.pv_bmm": {
2043
- "input": [
2044
- 0.0,
2045
- 14.152711868286133
2046
- ],
2047
- "input2": [
2048
- -0.9549614191055298,
2049
- 0.9624807834625244
2050
- ],
2051
- "output": [
2052
- -0.523362398147583,
2053
- 0.712354302406311
2054
- ]
2055
- },
2056
- "model.layers.7.self_attn.q_proj": {
2057
- "output": [
2058
- -9.399189949035645,
2059
- 10.246659278869629
2060
- ]
2061
- },
2062
- "model.layers.7.self_attn.qk_bmm": {
2063
- "input": [
2064
- -9.251260757446289,
2065
- 10.74087142944336
2066
- ],
2067
- "input2": [
2068
- -17.5736083984375,
2069
- 16.897701263427734
2070
- ],
2071
- "output": [
2072
- -341.1612548828125,
2073
- 198.99151611328125
2074
- ]
2075
- },
2076
- "model.layers.7.self_attn.v_proj": {
2077
- "output": [
2078
- -0.9533576965332031,
2079
- 0.9608644247055054
2080
- ]
2081
- },
2082
- "model.layers.8.input_layernorm": {
2083
- "input": [
2084
- -61.41298294067383,
2085
- 6.890247344970703
2086
- ],
2087
- "output": [
2088
- -2.4262712001800537,
2089
- 2.645033597946167
2090
- ]
2091
- },
2092
- "model.layers.8.mlp.act_fn": {
2093
- "input2": [
2094
- 2.2724009340890916e-06,
2095
- 0.9171198010444641
2096
- ],
2097
- "output": [
2098
- -0.2660808563232422,
2099
- 4.578679084777832
2100
- ]
2101
- },
2102
- "model.layers.8.mlp.w1": {
2103
- "output": [
2104
- -4.160639762878418,
2105
- 2.4291930198669434
2106
- ]
2107
- },
2108
- "model.layers.8.mlp.w2": {
2109
- "input": [
2110
- -0.30407947301864624,
2111
- 0.3113194704055786
2112
- ],
2113
- "output": [
2114
- -0.49675124883651733,
2115
- 0.44169193506240845
2116
- ]
2117
- },
2118
- "model.layers.8.mlp.w3": {
2119
- "output": [
2120
- -0.8565957546234131,
2121
- 0.6816571950912476
2122
- ]
2123
- },
2124
- "model.layers.8.post_attention_layernorm": {
2125
- "input": [
2126
- -39.070579528808594,
2127
- 4.379852294921875
2128
- ],
2129
- "output": [
2130
- -0.64253169298172,
2131
- 0.589389979839325
2132
- ]
2133
- },
2134
- "model.layers.8.self_attn.k_proj": {
2135
- "output": [
2136
- -23.870710372924805,
2137
- 24.439062118530273
2138
- ]
2139
- },
2140
- "model.layers.8.self_attn.o_proj": {
2141
- "output": [
2142
- -1.288500428199768,
2143
- 0.9605840444564819
2144
- ]
2145
- },
2146
- "model.layers.8.self_attn.pv_bmm": {
2147
- "input": [
2148
- -2.849586122621872e-10,
2149
- 5.764626502990723
2150
- ],
2151
- "input2": [
2152
- -0.7795315980911255,
2153
- 1.2283529043197632
2154
- ],
2155
- "output": [
2156
- -0.6425102353096008,
2157
- 1.029327630996704
2158
- ]
2159
- },
2160
- "model.layers.8.self_attn.q_proj": {
2161
- "output": [
2162
- -9.715987205505371,
2163
- 10.930485725402832
2164
- ]
2165
- },
2166
- "model.layers.8.self_attn.qk_bmm": {
2167
- "input": [
2168
- -11.523988723754883,
2169
- 11.433958053588867
2170
- ],
2171
- "input2": [
2172
- -23.01451873779297,
2173
- 28.01767349243164
2174
- ],
2175
- "output": [
2176
- -531.0772705078125,
2177
- 249.6875
2178
- ]
2179
- },
2180
- "model.layers.8.self_attn.v_proj": {
2181
- "output": [
2182
- -0.77955162525177,
2183
- 1.2283843755722046
2184
- ]
2185
- },
2186
- "model.layers.9.input_layernorm": {
2187
- "input": [
2188
- -63.642173767089844,
2189
- 7.1283416748046875
2190
- ],
2191
- "output": [
2192
- -1.4341191053390503,
2193
- 1.5881980657577515
2194
- ]
2195
- },
2196
- "model.layers.9.mlp.act_fn": {
2197
- "input2": [
2198
- 3.249040446462459e-06,
2199
- 0.9275291562080383
2200
- ],
2201
- "output": [
2202
- -0.2559678554534912,
2203
- 6.990260124206543
2204
- ]
2205
- },
2206
- "model.layers.9.mlp.w1": {
2207
- "output": [
2208
- -2.845679521560669,
2209
- 3.5196564197540283
2210
- ]
2211
- },
2212
- "model.layers.9.mlp.w2": {
2213
- "input": [
2214
- -0.31340545415878296,
2215
- 0.35817772150039673
2216
- ],
2217
- "output": [
2218
- -1.1538662910461426,
2219
- 0.8241255283355713
2220
- ]
2221
- },
2222
- "model.layers.9.mlp.w3": {
2223
- "output": [
2224
- -0.6430490612983704,
2225
- 0.6687710881233215
2226
- ]
2227
- },
2228
- "model.layers.9.post_attention_layernorm": {
2229
- "input": [
2230
- -47.34988021850586,
2231
- 5.296352386474609
2232
- ],
2233
- "output": [
2234
- -0.6652911901473999,
2235
- 0.8494342565536499
2236
- ]
2237
- },
2238
- "model.layers.9.self_attn.k_proj": {
2239
- "output": [
2240
- -19.59954833984375,
2241
- 18.552242279052734
2242
- ]
2243
- },
2244
- "model.layers.9.self_attn.o_proj": {
2245
- "output": [
2246
- -0.9600111246109009,
2247
- 0.7467882633209229
2248
- ]
2249
- },
2250
- "model.layers.9.self_attn.pv_bmm": {
2251
- "input": [
2252
- 0.0,
2253
- 10.371491432189941
2254
- ],
2255
- "input2": [
2256
- -1.1789764165878296,
2257
- 0.9838939905166626
2258
- ],
2259
- "output": [
2260
- -0.702292263507843,
2261
- 0.6048941016197205
2262
- ]
2263
- },
2264
- "model.layers.9.self_attn.q_proj": {
2265
- "output": [
2266
- -7.073794841766357,
2267
- 7.958018779754639
2268
- ]
2269
- },
2270
- "model.layers.9.self_attn.qk_bmm": {
2271
- "input": [
2272
- -8.201776504516602,
2273
- 7.064304351806641
2274
- ],
2275
- "input2": [
2276
- -19.554767608642578,
2277
- 18.802661895751953
2278
- ],
2279
- "output": [
2280
- -267.5766296386719,
2281
- 236.26300048828125
2282
- ]
2283
- },
2284
- "model.layers.9.self_attn.v_proj": {
2285
- "output": [
2286
- -1.1789911985397339,
2287
- 0.9839063882827759
2288
- ]
2289
- }
2290
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110dccb5a5a605efea1c6d6e9be9af17d68244fc90707f46968329510eeca00e
3
+ size 53168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,50 +1,3 @@
1
- {
2
- "_name_or_path": "checkpoints/hfmodels/llama-1.1b",
3
- "architectures": [
4
- "HFForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "bos_token_id": 1,
9
- "embd_pdrop": 0.0,
10
- "eos_token_id": 2,
11
- "head_dim": null,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 5632,
16
- "l2norm_as_rmsnorm": true,
17
- "layer_norm_eps": 1e-05,
18
- "max_position_embeddings": 2048,
19
- "mlp_bias": false,
20
- "model_type": "hfmodel",
21
- "norm_class": "rmsnorm",
22
- "normalize_embed": false,
23
- "normalizie_embed": false,
24
- "num_attention_heads": 32,
25
- "num_experts_per_tok": 1,
26
- "num_hidden_layers": 22,
27
- "num_key_value_heads": 4,
28
- "num_linears_per_mlp": 3,
29
- "num_local_experts": 1,
30
- "output_router_logits": false,
31
- "parallel_residual": false,
32
- "partial_rotary_factor": 1.0,
33
- "pretraining_tp": 1,
34
- "qk_layernorm": false,
35
- "resid_pdrop": 0.0,
36
- "rope_scaling": null,
37
- "rope_theta": 10000.0,
38
- "router_aux_loss_coef": 0.001,
39
- "shared_attention_norm": false,
40
- "sliding_window": null,
41
- "static_causal_mask": false,
42
- "tie_word_embeddings": false,
43
- "torch_dtype": "float32",
44
- "transformers_version": "4.41.0.dev0",
45
- "use_cache": true,
46
- "use_matmul_as_module": true,
47
- "use_megablocks": false,
48
- "use_qkv_bias_only": false,
49
- "vocab_size": 32000
50
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3813577311a23b9ae7530b6b0504e7a6560aedbe813bcee72d479a6f0bead81e
3
+ size 1307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
default_qcfg.json CHANGED
The diff for this file is too large to render. See raw diff
 
generation_config.json CHANGED
@@ -1,7 +1,3 @@
1
- {
2
- "bos_token_id": 1,
3
- "eos_token_id": 2,
4
- "max_length": 2048,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.41.0.dev0"
7
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56cf26bf2918fddebbfcb484144e503431a431f8b3695a502d4228b7978cd216
3
+ size 129
 
 
 
 
results.json CHANGED
@@ -1,24 +1,3 @@
1
- {
2
- "config": {
3
- "batch_size": 1,
4
- "batch_sizes": [],
5
- "bootstrap_iters": 100000,
6
- "description_dict": null,
7
- "device": null,
8
- "limit": null,
9
- "model": "llama-1.1b-e2e-w4a8-s1024-e60",
10
- "model_args": null,
11
- "no_cache": true,
12
- "num_fewshot": 0
13
- },
14
- "results": {
15
- "wikitext": {
16
- "bits_per_byte": 0.7654894102768953,
17
- "byte_perplexity": 1.6999465790529529,
18
- "word_perplexity": 17.070303884500657
19
- }
20
- },
21
- "versions": {
22
- "wikitext": 1
23
- }
24
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d877bb88f14c1c0f1bcbdb5e3f5999443626766bf6ba55dc7f3f0f9d8aed4b
3
+ size 575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -1,30 +1,3 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d96d7a9e6ced037f12394b7ea6a5b02e6ca87e0d11edaa8d60d9be857ce7db
3
+ size 551
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,44 +1,3 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": true,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
- }
30
- },
31
- "bos_token": "<s>",
32
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
- "clean_up_tokenization_spaces": false,
34
- "eos_token": "</s>",
35
- "legacy": false,
36
- "model_max_length": 2048,
37
- "pad_token": "</s>",
38
- "padding_side": "right",
39
- "sp_model_kwargs": {},
40
- "spaces_between_special_tokens": false,
41
- "tokenizer_class": "LlamaTokenizer",
42
- "unk_token": "<unk>",
43
- "use_default_system_prompt": false
44
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6025dc3da07c0b33ed86a1f613b1b6a7d92d27822ee1d860d86812afa66c1b64
3
+ size 1410