Narsil commited on
Commit
66c7fdc
·
verified ·
1 Parent(s): 120fa41

Upload topology.json

Browse files
Files changed (1) hide show
  1. topology.json +942 -942
topology.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "tensors": {
3
- "ln_f.bias": {
4
  "type": "Distributed",
5
  "shape": [
6
  768
@@ -27,7 +27,7 @@
27
  }
28
  ]
29
  },
30
- "h.4.mlp.c_proj.bias": {
31
  "type": "Distributed",
32
  "shape": [
33
  768
@@ -54,34 +54,39 @@
54
  }
55
  ]
56
  },
57
- "h.3.attn.c_proj.bias": {
58
  "type": "Distributed",
59
  "shape": [
 
60
  768
61
  ],
62
  "dtype": "F32",
63
  "chunks": [
64
  {
65
  "offsets": [
 
66
  0
67
  ],
68
  "shape": [
69
- 384
 
70
  ],
71
  "filename_index": 0
72
  },
73
  {
74
  "offsets": [
75
- 384
 
76
  ],
77
  "shape": [
78
- 384
 
79
  ],
80
  "filename_index": 1
81
  }
82
  ]
83
  },
84
- "h.0.ln_2.weight": {
85
  "type": "Distributed",
86
  "shape": [
87
  768
@@ -108,37 +113,52 @@
108
  }
109
  ]
110
  },
111
- "h.2.mlp.c_proj.bias": {
112
  "type": "Distributed",
113
  "shape": [
114
- 768
 
 
 
115
  ],
116
  "dtype": "F32",
117
  "chunks": [
118
  {
119
  "offsets": [
 
 
 
120
  0
121
  ],
122
  "shape": [
123
- 384
 
 
 
124
  ],
125
  "filename_index": 0
126
  },
127
  {
128
  "offsets": [
129
- 384
 
 
 
130
  ],
131
  "shape": [
132
- 384
 
 
 
133
  ],
134
  "filename_index": 1
135
  }
136
  ]
137
  },
138
- "h.7.mlp.c_fc.bias": {
139
  "type": "Distributed",
140
  "shape": [
141
- 3072
142
  ],
143
  "dtype": "F32",
144
  "chunks": [
@@ -147,22 +167,22 @@
147
  0
148
  ],
149
  "shape": [
150
- 1536
151
  ],
152
  "filename_index": 0
153
  },
154
  {
155
  "offsets": [
156
- 1536
157
  ],
158
  "shape": [
159
- 1536
160
  ],
161
  "filename_index": 1
162
  }
163
  ]
164
  },
165
- "h.4.ln_1.weight": {
166
  "type": "Distributed",
167
  "shape": [
168
  768
@@ -189,39 +209,34 @@
189
  }
190
  ]
191
  },
192
- "h.11.mlp.c_proj.weight": {
193
  "type": "Distributed",
194
  "shape": [
195
- 3072,
196
- 768
197
  ],
198
  "dtype": "F32",
199
  "chunks": [
200
  {
201
  "offsets": [
202
- 0,
203
  0
204
  ],
205
  "shape": [
206
- 1536,
207
- 768
208
  ],
209
  "filename_index": 0
210
  },
211
  {
212
  "offsets": [
213
- 1536,
214
- 0
215
  ],
216
  "shape": [
217
- 1536,
218
- 768
219
  ],
220
  "filename_index": 1
221
  }
222
  ]
223
  },
224
- "h.10.ln_2.bias": {
225
  "type": "Distributed",
226
  "shape": [
227
  768
@@ -248,49 +263,7 @@
248
  }
249
  ]
250
  },
251
- "h.6.attn.bias": {
252
- "type": "Distributed",
253
- "shape": [
254
- 1,
255
- 1,
256
- 1024,
257
- 1024
258
- ],
259
- "dtype": "F32",
260
- "chunks": [
261
- {
262
- "offsets": [
263
- 0,
264
- 0,
265
- 0,
266
- 0
267
- ],
268
- "shape": [
269
- 1,
270
- 1,
271
- 1024,
272
- 512
273
- ],
274
- "filename_index": 0
275
- },
276
- {
277
- "offsets": [
278
- 0,
279
- 0,
280
- 0,
281
- 512
282
- ],
283
- "shape": [
284
- 1,
285
- 1,
286
- 1024,
287
- 512
288
- ],
289
- "filename_index": 1
290
- }
291
- ]
292
- },
293
- "h.1.attn.c_attn.bias": {
294
  "type": "Distributed",
295
  "shape": [
296
  2304
@@ -317,7 +290,7 @@
317
  }
318
  ]
319
  },
320
- "h.9.attn.c_proj.bias": {
321
  "type": "Distributed",
322
  "shape": [
323
  768
@@ -344,10 +317,10 @@
344
  }
345
  ]
346
  },
347
- "h.7.attn.c_proj.bias": {
348
  "type": "Distributed",
349
  "shape": [
350
- 768
351
  ],
352
  "dtype": "F32",
353
  "chunks": [
@@ -356,80 +329,58 @@
356
  0
357
  ],
358
  "shape": [
359
- 384
360
  ],
361
  "filename_index": 0
362
  },
363
  {
364
  "offsets": [
365
- 384
366
  ],
367
  "shape": [
368
- 384
369
  ],
370
  "filename_index": 1
371
  }
372
  ]
373
  },
374
- "h.8.attn.c_proj.bias": {
375
  "type": "Distributed",
376
  "shape": [
 
377
  768
378
  ],
379
  "dtype": "F32",
380
  "chunks": [
381
  {
382
  "offsets": [
 
383
  0
384
  ],
385
  "shape": [
386
- 384
 
387
  ],
388
  "filename_index": 0
389
  },
390
  {
391
  "offsets": [
392
- 384
393
- ],
394
- "shape": [
395
- 384
396
- ],
397
- "filename_index": 1
398
- }
399
- ]
400
- },
401
- "h.4.ln_1.bias": {
402
- "type": "Distributed",
403
- "shape": [
404
- 768
405
- ],
406
- "dtype": "F32",
407
- "chunks": [
408
- {
409
- "offsets": [
410
  0
411
  ],
412
  "shape": [
413
- 384
414
- ],
415
- "filename_index": 0
416
- },
417
- {
418
- "offsets": [
419
- 384
420
- ],
421
- "shape": [
422
- 384
423
  ],
424
  "filename_index": 1
425
  }
426
  ]
427
  },
428
- "h.3.mlp.c_fc.weight": {
429
  "type": "Distributed",
430
  "shape": [
431
  768,
432
- 3072
433
  ],
434
  "dtype": "F32",
435
  "chunks": [
@@ -440,56 +391,51 @@
440
  ],
441
  "shape": [
442
  768,
443
- 1536
444
  ],
445
  "filename_index": 0
446
  },
447
  {
448
  "offsets": [
449
  0,
450
- 1536
451
  ],
452
  "shape": [
453
  768,
454
- 1536
455
  ],
456
  "filename_index": 1
457
  }
458
  ]
459
  },
460
- "h.9.mlp.c_proj.weight": {
461
  "type": "Distributed",
462
  "shape": [
463
- 3072,
464
  768
465
  ],
466
  "dtype": "F32",
467
  "chunks": [
468
  {
469
  "offsets": [
470
- 0,
471
  0
472
  ],
473
  "shape": [
474
- 1536,
475
- 768
476
  ],
477
  "filename_index": 0
478
  },
479
  {
480
  "offsets": [
481
- 1536,
482
- 0
483
  ],
484
  "shape": [
485
- 1536,
486
- 768
487
  ],
488
  "filename_index": 1
489
  }
490
  ]
491
  },
492
- "h.3.ln_1.weight": {
493
  "type": "Distributed",
494
  "shape": [
495
  768
@@ -516,42 +462,37 @@
516
  }
517
  ]
518
  },
519
- "h.6.mlp.c_fc.weight": {
520
  "type": "Distributed",
521
  "shape": [
522
- 768,
523
  3072
524
  ],
525
  "dtype": "F32",
526
  "chunks": [
527
  {
528
  "offsets": [
529
- 0,
530
  0
531
  ],
532
  "shape": [
533
- 768,
534
  1536
535
  ],
536
  "filename_index": 0
537
  },
538
  {
539
  "offsets": [
540
- 0,
541
  1536
542
  ],
543
  "shape": [
544
- 768,
545
  1536
546
  ],
547
  "filename_index": 1
548
  }
549
  ]
550
  },
551
- "h.5.attn.c_attn.bias": {
552
  "type": "Distributed",
553
  "shape": [
554
- 2304
555
  ],
556
  "dtype": "F32",
557
  "chunks": [
@@ -560,22 +501,22 @@
560
  0
561
  ],
562
  "shape": [
563
- 1152
564
  ],
565
  "filename_index": 0
566
  },
567
  {
568
  "offsets": [
569
- 1152
570
  ],
571
  "shape": [
572
- 1152
573
  ],
574
  "filename_index": 1
575
  }
576
  ]
577
  },
578
- "h.0.attn.c_proj.bias": {
579
  "type": "Distributed",
580
  "shape": [
581
  768
@@ -602,11 +543,11 @@
602
  }
603
  ]
604
  },
605
- "h.5.mlp.c_fc.weight": {
606
  "type": "Distributed",
607
  "shape": [
608
  768,
609
- 3072
610
  ],
611
  "dtype": "F32",
612
  "chunks": [
@@ -616,25 +557,25 @@
616
  0
617
  ],
618
  "shape": [
619
- 768,
620
- 1536
621
  ],
622
  "filename_index": 0
623
  },
624
  {
625
  "offsets": [
626
- 0,
627
- 1536
628
  ],
629
  "shape": [
630
- 768,
631
- 1536
632
  ],
633
  "filename_index": 1
634
  }
635
  ]
636
  },
637
- "h.10.attn.c_proj.bias": {
638
  "type": "Distributed",
639
  "shape": [
640
  768
@@ -661,7 +602,7 @@
661
  }
662
  ]
663
  },
664
- "h.2.ln_1.bias": {
665
  "type": "Distributed",
666
  "shape": [
667
  768
@@ -688,39 +629,34 @@
688
  }
689
  ]
690
  },
691
- "h.3.mlp.c_proj.weight": {
692
  "type": "Distributed",
693
  "shape": [
694
- 3072,
695
- 768
696
  ],
697
  "dtype": "F32",
698
  "chunks": [
699
  {
700
  "offsets": [
701
- 0,
702
  0
703
  ],
704
  "shape": [
705
- 1536,
706
- 768
707
  ],
708
  "filename_index": 0
709
  },
710
  {
711
  "offsets": [
712
- 1536,
713
- 0
714
  ],
715
  "shape": [
716
- 1536,
717
- 768
718
  ],
719
  "filename_index": 1
720
  }
721
  ]
722
  },
723
- "h.1.ln_1.weight": {
724
  "type": "Distributed",
725
  "shape": [
726
  768
@@ -747,10 +683,10 @@
747
  }
748
  ]
749
  },
750
- "h.9.ln_2.weight": {
751
  "type": "Distributed",
752
  "shape": [
753
- 768
754
  ],
755
  "dtype": "F32",
756
  "chunks": [
@@ -759,22 +695,22 @@
759
  0
760
  ],
761
  "shape": [
762
- 384
763
  ],
764
  "filename_index": 0
765
  },
766
  {
767
  "offsets": [
768
- 384
769
  ],
770
  "shape": [
771
- 384
772
  ],
773
  "filename_index": 1
774
  }
775
  ]
776
  },
777
- "h.5.mlp.c_fc.bias": {
778
  "type": "Distributed",
779
  "shape": [
780
  3072
@@ -801,93 +737,34 @@
801
  }
802
  ]
803
  },
804
- "h.9.ln_1.weight": {
805
- "type": "Distributed",
806
- "shape": [
807
- 768
808
- ],
809
- "dtype": "F32",
810
- "chunks": [
811
- {
812
- "offsets": [
813
- 0
814
- ],
815
- "shape": [
816
- 384
817
- ],
818
- "filename_index": 0
819
- },
820
- {
821
- "offsets": [
822
- 384
823
- ],
824
- "shape": [
825
- 384
826
- ],
827
- "filename_index": 1
828
- }
829
- ]
830
- },
831
- "h.2.mlp.c_fc.weight": {
832
  "type": "Distributed",
833
  "shape": [
834
- 768,
835
  3072
836
  ],
837
  "dtype": "F32",
838
  "chunks": [
839
  {
840
  "offsets": [
841
- 0,
842
  0
843
  ],
844
  "shape": [
845
- 768,
846
  1536
847
  ],
848
  "filename_index": 0
849
  },
850
  {
851
  "offsets": [
852
- 0,
853
  1536
854
  ],
855
  "shape": [
856
- 768,
857
  1536
858
  ],
859
  "filename_index": 1
860
  }
861
  ]
862
  },
863
- "h.9.mlp.c_proj.bias": {
864
- "type": "Distributed",
865
- "shape": [
866
- 768
867
- ],
868
- "dtype": "F32",
869
- "chunks": [
870
- {
871
- "offsets": [
872
- 0
873
- ],
874
- "shape": [
875
- 384
876
- ],
877
- "filename_index": 0
878
- },
879
- {
880
- "offsets": [
881
- 384
882
- ],
883
- "shape": [
884
- 384
885
- ],
886
- "filename_index": 1
887
- }
888
- ]
889
- },
890
- "h.11.attn.c_attn.weight": {
891
  "type": "Distributed",
892
  "shape": [
893
  768,
@@ -919,66 +796,76 @@
919
  }
920
  ]
921
  },
922
- "h.6.mlp.c_proj.bias": {
923
  "type": "Distributed",
924
  "shape": [
925
- 768
 
 
 
926
  ],
927
  "dtype": "F32",
928
  "chunks": [
929
  {
930
  "offsets": [
 
 
 
931
  0
932
  ],
933
  "shape": [
934
- 384
 
 
 
935
  ],
936
  "filename_index": 0
937
  },
938
  {
939
  "offsets": [
940
- 384
 
 
 
941
  ],
942
  "shape": [
943
- 384
 
 
 
944
  ],
945
  "filename_index": 1
946
  }
947
  ]
948
  },
949
- "h.8.mlp.c_proj.weight": {
950
  "type": "Distributed",
951
  "shape": [
952
- 3072,
953
  768
954
  ],
955
  "dtype": "F32",
956
  "chunks": [
957
  {
958
  "offsets": [
959
- 0,
960
  0
961
  ],
962
  "shape": [
963
- 1536,
964
- 768
965
  ],
966
  "filename_index": 0
967
  },
968
  {
969
  "offsets": [
970
- 1536,
971
- 0
972
  ],
973
  "shape": [
974
- 1536,
975
- 768
976
  ],
977
  "filename_index": 1
978
  }
979
  ]
980
  },
981
- "h.7.attn.bias": {
982
  "type": "Distributed",
983
  "shape": [
984
  1,
@@ -1020,7 +907,7 @@
1020
  }
1021
  ]
1022
  },
1023
- "h.6.ln_2.bias": {
1024
  "type": "Distributed",
1025
  "shape": [
1026
  768
@@ -1047,7 +934,7 @@
1047
  }
1048
  ]
1049
  },
1050
- "h.7.ln_2.bias": {
1051
  "type": "Distributed",
1052
  "shape": [
1053
  768
@@ -1074,49 +961,39 @@
1074
  }
1075
  ]
1076
  },
1077
- "h.11.attn.bias": {
1078
  "type": "Distributed",
1079
  "shape": [
1080
- 1,
1081
- 1,
1082
- 1024,
1083
- 1024
1084
  ],
1085
  "dtype": "F32",
1086
  "chunks": [
1087
  {
1088
  "offsets": [
1089
- 0,
1090
- 0,
1091
  0,
1092
  0
1093
  ],
1094
  "shape": [
1095
- 1,
1096
- 1,
1097
- 1024,
1098
- 512
1099
  ],
1100
  "filename_index": 0
1101
  },
1102
  {
1103
  "offsets": [
1104
- 0,
1105
- 0,
1106
- 0,
1107
- 512
1108
  ],
1109
  "shape": [
1110
- 1,
1111
- 1,
1112
- 1024,
1113
- 512
1114
  ],
1115
  "filename_index": 1
1116
  }
1117
  ]
1118
  },
1119
- "h.2.ln_2.weight": {
1120
  "type": "Distributed",
1121
  "shape": [
1122
  768
@@ -1143,38 +1020,43 @@
1143
  }
1144
  ]
1145
  },
1146
- "h.11.ln_1.weight": {
1147
  "type": "Distributed",
1148
  "shape": [
 
1149
  768
1150
  ],
1151
  "dtype": "F32",
1152
  "chunks": [
1153
  {
1154
  "offsets": [
 
1155
  0
1156
  ],
1157
  "shape": [
1158
- 384
 
1159
  ],
1160
  "filename_index": 0
1161
  },
1162
  {
1163
  "offsets": [
1164
- 384
 
1165
  ],
1166
  "shape": [
1167
- 384
 
1168
  ],
1169
  "filename_index": 1
1170
  }
1171
  ]
1172
  },
1173
- "h.7.attn.c_proj.weight": {
1174
  "type": "Distributed",
1175
  "shape": [
1176
  768,
1177
- 768
1178
  ],
1179
  "dtype": "F32",
1180
  "chunks": [
@@ -1184,52 +1066,84 @@
1184
  0
1185
  ],
1186
  "shape": [
1187
- 384,
1188
- 768
1189
  ],
1190
  "filename_index": 0
1191
  },
1192
  {
1193
  "offsets": [
1194
- 384,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195
  0
1196
  ],
1197
  "shape": [
1198
- 384,
1199
- 768
 
 
 
 
 
 
 
 
1200
  ],
1201
  "filename_index": 1
1202
  }
1203
  ]
1204
  },
1205
- "h.2.attn.c_attn.bias": {
1206
  "type": "Distributed",
1207
  "shape": [
 
1208
  2304
1209
  ],
1210
  "dtype": "F32",
1211
  "chunks": [
1212
  {
1213
  "offsets": [
 
1214
  0
1215
  ],
1216
  "shape": [
 
1217
  1152
1218
  ],
1219
  "filename_index": 0
1220
  },
1221
  {
1222
  "offsets": [
 
1223
  1152
1224
  ],
1225
  "shape": [
 
1226
  1152
1227
  ],
1228
  "filename_index": 1
1229
  }
1230
  ]
1231
  },
1232
- "h.1.ln_1.bias": {
1233
  "type": "Distributed",
1234
  "shape": [
1235
  768
@@ -1283,10 +1197,10 @@
1283
  }
1284
  ]
1285
  },
1286
- "h.10.mlp.c_proj.bias": {
1287
  "type": "Distributed",
1288
  "shape": [
1289
- 768
1290
  ],
1291
  "dtype": "F32",
1292
  "chunks": [
@@ -1295,86 +1209,76 @@
1295
  0
1296
  ],
1297
  "shape": [
1298
- 384
1299
  ],
1300
  "filename_index": 0
1301
  },
1302
  {
1303
  "offsets": [
1304
- 384
1305
  ],
1306
  "shape": [
1307
- 384
1308
  ],
1309
  "filename_index": 1
1310
  }
1311
  ]
1312
  },
1313
- "h.8.attn.c_proj.weight": {
1314
  "type": "Distributed",
1315
  "shape": [
1316
- 768,
1317
  768
1318
  ],
1319
  "dtype": "F32",
1320
  "chunks": [
1321
  {
1322
  "offsets": [
1323
- 0,
1324
  0
1325
  ],
1326
  "shape": [
1327
- 384,
1328
- 768
1329
  ],
1330
  "filename_index": 0
1331
  },
1332
  {
1333
  "offsets": [
1334
- 384,
1335
- 0
1336
  ],
1337
  "shape": [
1338
- 384,
1339
- 768
1340
  ],
1341
  "filename_index": 1
1342
  }
1343
  ]
1344
  },
1345
- "h.6.attn.c_attn.weight": {
1346
  "type": "Distributed",
1347
  "shape": [
1348
- 768,
1349
- 2304
1350
  ],
1351
  "dtype": "F32",
1352
  "chunks": [
1353
  {
1354
  "offsets": [
1355
- 0,
1356
  0
1357
  ],
1358
  "shape": [
1359
- 768,
1360
- 1152
1361
  ],
1362
  "filename_index": 0
1363
  },
1364
  {
1365
  "offsets": [
1366
- 0,
1367
- 1152
1368
  ],
1369
  "shape": [
1370
- 768,
1371
- 1152
1372
  ],
1373
  "filename_index": 1
1374
  }
1375
  ]
1376
  },
1377
- "h.1.mlp.c_proj.weight": {
1378
  "type": "Distributed",
1379
  "shape": [
1380
  3072,
@@ -1406,34 +1310,39 @@
1406
  }
1407
  ]
1408
  },
1409
- "h.0.attn.c_attn.bias": {
1410
  "type": "Distributed",
1411
  "shape": [
 
1412
  2304
1413
  ],
1414
  "dtype": "F32",
1415
  "chunks": [
1416
  {
1417
  "offsets": [
 
1418
  0
1419
  ],
1420
  "shape": [
 
1421
  1152
1422
  ],
1423
  "filename_index": 0
1424
  },
1425
  {
1426
  "offsets": [
 
1427
  1152
1428
  ],
1429
  "shape": [
 
1430
  1152
1431
  ],
1432
  "filename_index": 1
1433
  }
1434
  ]
1435
  },
1436
- "h.2.attn.c_proj.bias": {
1437
  "type": "Distributed",
1438
  "shape": [
1439
  768
@@ -1460,11 +1369,11 @@
1460
  }
1461
  ]
1462
  },
1463
- "h.9.mlp.c_fc.weight": {
1464
  "type": "Distributed",
1465
  "shape": [
1466
  768,
1467
- 3072
1468
  ],
1469
  "dtype": "F32",
1470
  "chunks": [
@@ -1474,28 +1383,28 @@
1474
  0
1475
  ],
1476
  "shape": [
1477
- 768,
1478
- 1536
1479
  ],
1480
  "filename_index": 0
1481
  },
1482
  {
1483
  "offsets": [
1484
- 0,
1485
- 1536
1486
  ],
1487
  "shape": [
1488
- 768,
1489
- 1536
1490
  ],
1491
  "filename_index": 1
1492
  }
1493
  ]
1494
  },
1495
- "h.7.mlp.c_proj.bias": {
1496
  "type": "Distributed",
1497
  "shape": [
1498
- 768
1499
  ],
1500
  "dtype": "F32",
1501
  "chunks": [
@@ -1504,54 +1413,49 @@
1504
  0
1505
  ],
1506
  "shape": [
1507
- 384
1508
  ],
1509
  "filename_index": 0
1510
  },
1511
  {
1512
  "offsets": [
1513
- 384
1514
  ],
1515
  "shape": [
1516
- 384
1517
  ],
1518
  "filename_index": 1
1519
  }
1520
  ]
1521
  },
1522
- "h.8.attn.c_attn.weight": {
1523
  "type": "Distributed",
1524
  "shape": [
1525
- 768,
1526
- 2304
1527
  ],
1528
  "dtype": "F32",
1529
  "chunks": [
1530
  {
1531
  "offsets": [
1532
- 0,
1533
  0
1534
  ],
1535
  "shape": [
1536
- 768,
1537
- 1152
1538
  ],
1539
  "filename_index": 0
1540
  },
1541
  {
1542
  "offsets": [
1543
- 0,
1544
- 1152
1545
  ],
1546
  "shape": [
1547
- 768,
1548
- 1152
1549
  ],
1550
  "filename_index": 1
1551
  }
1552
  ]
1553
  },
1554
- "h.8.ln_2.bias": {
1555
  "type": "Distributed",
1556
  "shape": [
1557
  768
@@ -1578,91 +1482,116 @@
1578
  }
1579
  ]
1580
  },
1581
- "h.0.ln_2.bias": {
1582
  "type": "Distributed",
1583
  "shape": [
1584
- 768
 
1585
  ],
1586
  "dtype": "F32",
1587
  "chunks": [
1588
  {
1589
  "offsets": [
 
1590
  0
1591
  ],
1592
  "shape": [
1593
- 384
 
1594
  ],
1595
  "filename_index": 0
1596
  },
1597
  {
1598
  "offsets": [
1599
- 384
 
1600
  ],
1601
  "shape": [
1602
- 384
 
1603
  ],
1604
  "filename_index": 1
1605
  }
1606
  ]
1607
  },
1608
- "h.1.mlp.c_proj.bias": {
1609
  "type": "Distributed",
1610
  "shape": [
1611
- 768
 
1612
  ],
1613
  "dtype": "F32",
1614
  "chunks": [
1615
  {
1616
  "offsets": [
 
1617
  0
1618
  ],
1619
  "shape": [
1620
- 384
 
1621
  ],
1622
  "filename_index": 0
1623
  },
1624
  {
1625
  "offsets": [
1626
- 384
 
1627
  ],
1628
  "shape": [
1629
- 384
 
1630
  ],
1631
  "filename_index": 1
1632
  }
1633
  ]
1634
  },
1635
- "h.0.ln_1.weight": {
1636
  "type": "Distributed",
1637
  "shape": [
1638
- 768
 
 
 
1639
  ],
1640
  "dtype": "F32",
1641
  "chunks": [
1642
  {
1643
  "offsets": [
 
 
 
1644
  0
1645
  ],
1646
  "shape": [
1647
- 384
 
 
 
1648
  ],
1649
  "filename_index": 0
1650
  },
1651
  {
1652
  "offsets": [
1653
- 384
 
 
 
1654
  ],
1655
  "shape": [
1656
- 384
 
 
 
1657
  ],
1658
  "filename_index": 1
1659
  }
1660
  ]
1661
  },
1662
- "h.5.attn.c_proj.weight": {
1663
  "type": "Distributed",
1664
  "shape": [
1665
- 768,
1666
  768
1667
  ],
1668
  "dtype": "F32",
@@ -1673,25 +1602,25 @@
1673
  0
1674
  ],
1675
  "shape": [
1676
- 384,
1677
- 768
1678
  ],
1679
  "filename_index": 0
1680
  },
1681
  {
1682
  "offsets": [
1683
- 384,
1684
- 0
1685
  ],
1686
  "shape": [
1687
- 384,
1688
- 768
1689
  ],
1690
  "filename_index": 1
1691
  }
1692
  ]
1693
  },
1694
- "h.4.ln_2.weight": {
1695
  "type": "Distributed",
1696
  "shape": [
1697
  768
@@ -1718,10 +1647,10 @@
1718
  }
1719
  ]
1720
  },
1721
- "h.11.attn.c_proj.bias": {
1722
  "type": "Distributed",
1723
  "shape": [
1724
- 768
1725
  ],
1726
  "dtype": "F32",
1727
  "chunks": [
@@ -1730,22 +1659,22 @@
1730
  0
1731
  ],
1732
  "shape": [
1733
- 384
1734
  ],
1735
  "filename_index": 0
1736
  },
1737
  {
1738
  "offsets": [
1739
- 384
1740
  ],
1741
  "shape": [
1742
- 384
1743
  ],
1744
  "filename_index": 1
1745
  }
1746
  ]
1747
  },
1748
- "h.11.ln_2.weight": {
1749
  "type": "Distributed",
1750
  "shape": [
1751
  768
@@ -1772,42 +1701,37 @@
1772
  }
1773
  ]
1774
  },
1775
- "h.6.attn.c_proj.weight": {
1776
  "type": "Distributed",
1777
  "shape": [
1778
- 768,
1779
  768
1780
  ],
1781
  "dtype": "F32",
1782
  "chunks": [
1783
  {
1784
  "offsets": [
1785
- 0,
1786
  0
1787
  ],
1788
  "shape": [
1789
- 384,
1790
- 768
1791
  ],
1792
  "filename_index": 0
1793
  },
1794
  {
1795
  "offsets": [
1796
- 384,
1797
- 0
1798
  ],
1799
  "shape": [
1800
- 384,
1801
- 768
1802
  ],
1803
  "filename_index": 1
1804
  }
1805
  ]
1806
  },
1807
- "h.6.ln_2.weight": {
1808
  "type": "Distributed",
1809
  "shape": [
1810
- 768
1811
  ],
1812
  "dtype": "F32",
1813
  "chunks": [
@@ -1816,22 +1740,22 @@
1816
  0
1817
  ],
1818
  "shape": [
1819
- 384
1820
  ],
1821
  "filename_index": 0
1822
  },
1823
  {
1824
  "offsets": [
1825
- 384
1826
  ],
1827
  "shape": [
1828
- 384
1829
  ],
1830
  "filename_index": 1
1831
  }
1832
  ]
1833
  },
1834
- "h.5.mlp.c_proj.bias": {
1835
  "type": "Distributed",
1836
  "shape": [
1837
  768
@@ -1858,7 +1782,7 @@
1858
  }
1859
  ]
1860
  },
1861
- "h.8.mlp.c_proj.bias": {
1862
  "type": "Distributed",
1863
  "shape": [
1864
  768
@@ -1885,34 +1809,39 @@
1885
  }
1886
  ]
1887
  },
1888
- "h.0.mlp.c_proj.bias": {
1889
  "type": "Distributed",
1890
  "shape": [
 
1891
  768
1892
  ],
1893
  "dtype": "F32",
1894
  "chunks": [
1895
  {
1896
  "offsets": [
 
1897
  0
1898
  ],
1899
  "shape": [
1900
- 384
 
1901
  ],
1902
  "filename_index": 0
1903
  },
1904
  {
1905
  "offsets": [
1906
- 384
 
1907
  ],
1908
  "shape": [
1909
- 384
 
1910
  ],
1911
  "filename_index": 1
1912
  }
1913
  ]
1914
  },
1915
- "h.3.ln_2.bias": {
1916
  "type": "Distributed",
1917
  "shape": [
1918
  768
@@ -1939,10 +1868,10 @@
1939
  }
1940
  ]
1941
  },
1942
- "h.5.mlp.c_proj.weight": {
1943
  "type": "Distributed",
1944
  "shape": [
1945
- 3072,
1946
  768
1947
  ],
1948
  "dtype": "F32",
@@ -1953,25 +1882,25 @@
1953
  0
1954
  ],
1955
  "shape": [
1956
- 1536,
1957
  768
1958
  ],
1959
  "filename_index": 0
1960
  },
1961
  {
1962
  "offsets": [
1963
- 1536,
1964
  0
1965
  ],
1966
  "shape": [
1967
- 1536,
1968
  768
1969
  ],
1970
  "filename_index": 1
1971
  }
1972
  ]
1973
  },
1974
- "h.5.ln_2.weight": {
1975
  "type": "Distributed",
1976
  "shape": [
1977
  768
@@ -1998,10 +1927,10 @@
1998
  }
1999
  ]
2000
  },
2001
- "h.10.mlp.c_fc.bias": {
2002
  "type": "Distributed",
2003
  "shape": [
2004
- 3072
2005
  ],
2006
  "dtype": "F32",
2007
  "chunks": [
@@ -2010,22 +1939,22 @@
2010
  0
2011
  ],
2012
  "shape": [
2013
- 1536
2014
  ],
2015
  "filename_index": 0
2016
  },
2017
  {
2018
  "offsets": [
2019
- 1536
2020
  ],
2021
  "shape": [
2022
- 1536
2023
  ],
2024
  "filename_index": 1
2025
  }
2026
  ]
2027
  },
2028
- "h.6.attn.c_proj.bias": {
2029
  "type": "Distributed",
2030
  "shape": [
2031
  768
@@ -2052,10 +1981,10 @@
2052
  }
2053
  ]
2054
  },
2055
- "h.3.mlp.c_fc.bias": {
2056
  "type": "Distributed",
2057
  "shape": [
2058
- 3072
2059
  ],
2060
  "dtype": "F32",
2061
  "chunks": [
@@ -2064,91 +1993,81 @@
2064
  0
2065
  ],
2066
  "shape": [
2067
- 1536
2068
  ],
2069
  "filename_index": 0
2070
  },
2071
  {
2072
  "offsets": [
2073
- 1536
2074
  ],
2075
  "shape": [
2076
- 1536
2077
  ],
2078
  "filename_index": 1
2079
  }
2080
  ]
2081
  },
2082
- "h.10.attn.bias": {
2083
  "type": "Distributed",
2084
  "shape": [
2085
- 1,
2086
- 1,
2087
- 1024,
2088
- 1024
2089
  ],
2090
  "dtype": "F32",
2091
  "chunks": [
2092
  {
2093
  "offsets": [
2094
- 0,
2095
- 0,
2096
- 0,
2097
  0
2098
  ],
2099
  "shape": [
2100
- 1,
2101
- 1,
2102
- 1024,
2103
- 512
2104
  ],
2105
  "filename_index": 0
2106
  },
2107
  {
2108
  "offsets": [
2109
- 0,
2110
- 0,
2111
- 0,
2112
- 512
2113
  ],
2114
  "shape": [
2115
- 1,
2116
- 1,
2117
- 1024,
2118
- 512
2119
  ],
2120
  "filename_index": 1
2121
  }
2122
  ]
2123
  },
2124
- "h.1.ln_2.weight": {
2125
  "type": "Distributed",
2126
  "shape": [
 
2127
  768
2128
  ],
2129
  "dtype": "F32",
2130
  "chunks": [
2131
  {
2132
  "offsets": [
 
2133
  0
2134
  ],
2135
  "shape": [
2136
- 384
 
2137
  ],
2138
  "filename_index": 0
2139
  },
2140
  {
2141
  "offsets": [
2142
- 384
 
2143
  ],
2144
  "shape": [
2145
- 384
 
2146
  ],
2147
  "filename_index": 1
2148
  }
2149
  ]
2150
  },
2151
- "h.7.ln_1.bias": {
2152
  "type": "Distributed",
2153
  "shape": [
2154
  768
@@ -2175,10 +2094,10 @@
2175
  }
2176
  ]
2177
  },
2178
- "h.3.mlp.c_proj.bias": {
2179
  "type": "Distributed",
2180
  "shape": [
2181
- 768
2182
  ],
2183
  "dtype": "F32",
2184
  "chunks": [
@@ -2187,58 +2106,53 @@
2187
  0
2188
  ],
2189
  "shape": [
2190
- 384
2191
  ],
2192
  "filename_index": 0
2193
  },
2194
  {
2195
  "offsets": [
2196
- 384
2197
  ],
2198
  "shape": [
2199
- 384
2200
  ],
2201
  "filename_index": 1
2202
  }
2203
  ]
2204
  },
2205
- "h.10.mlp.c_proj.weight": {
2206
  "type": "Distributed",
2207
  "shape": [
2208
- 3072,
2209
  768
2210
  ],
2211
  "dtype": "F32",
2212
  "chunks": [
2213
  {
2214
  "offsets": [
2215
- 0,
2216
  0
2217
  ],
2218
  "shape": [
2219
- 1536,
2220
- 768
2221
  ],
2222
  "filename_index": 0
2223
  },
2224
  {
2225
  "offsets": [
2226
- 1536,
2227
- 0
2228
  ],
2229
  "shape": [
2230
- 1536,
2231
- 768
2232
  ],
2233
  "filename_index": 1
2234
  }
2235
  ]
2236
  },
2237
- "h.4.mlp.c_fc.weight": {
2238
  "type": "Distributed",
2239
  "shape": [
2240
  768,
2241
- 3072
2242
  ],
2243
  "dtype": "F32",
2244
  "chunks": [
@@ -2248,25 +2162,25 @@
2248
  0
2249
  ],
2250
  "shape": [
2251
- 768,
2252
- 1536
2253
  ],
2254
  "filename_index": 0
2255
  },
2256
  {
2257
  "offsets": [
2258
- 0,
2259
- 1536
2260
  ],
2261
  "shape": [
2262
- 768,
2263
- 1536
2264
  ],
2265
  "filename_index": 1
2266
  }
2267
  ]
2268
  },
2269
- "h.7.ln_2.weight": {
2270
  "type": "Distributed",
2271
  "shape": [
2272
  768
@@ -2293,42 +2207,37 @@
2293
  }
2294
  ]
2295
  },
2296
- "h.10.mlp.c_fc.weight": {
2297
  "type": "Distributed",
2298
  "shape": [
2299
- 768,
2300
- 3072
2301
  ],
2302
  "dtype": "F32",
2303
  "chunks": [
2304
  {
2305
  "offsets": [
2306
- 0,
2307
  0
2308
  ],
2309
  "shape": [
2310
- 768,
2311
- 1536
2312
  ],
2313
  "filename_index": 0
2314
  },
2315
  {
2316
  "offsets": [
2317
- 0,
2318
- 1536
2319
  ],
2320
  "shape": [
2321
- 768,
2322
- 1536
2323
  ],
2324
  "filename_index": 1
2325
  }
2326
  ]
2327
  },
2328
- "h.8.ln_2.weight": {
2329
  "type": "Distributed",
2330
  "shape": [
2331
- 768
2332
  ],
2333
  "dtype": "F32",
2334
  "chunks": [
@@ -2337,25 +2246,25 @@
2337
  0
2338
  ],
2339
  "shape": [
2340
- 384
2341
  ],
2342
  "filename_index": 0
2343
  },
2344
  {
2345
  "offsets": [
2346
- 384
2347
  ],
2348
  "shape": [
2349
- 384
2350
  ],
2351
  "filename_index": 1
2352
  }
2353
  ]
2354
  },
2355
- "h.0.mlp.c_fc.bias": {
2356
  "type": "Distributed",
2357
  "shape": [
2358
- 3072
2359
  ],
2360
  "dtype": "F32",
2361
  "chunks": [
@@ -2364,217 +2273,212 @@
2364
  0
2365
  ],
2366
  "shape": [
2367
- 1536
2368
  ],
2369
  "filename_index": 0
2370
  },
2371
  {
2372
  "offsets": [
2373
- 1536
2374
  ],
2375
  "shape": [
2376
- 1536
2377
  ],
2378
  "filename_index": 1
2379
  }
2380
  ]
2381
  },
2382
- "h.8.mlp.c_fc.weight": {
2383
  "type": "Distributed",
2384
  "shape": [
2385
- 768,
2386
- 3072
2387
  ],
2388
  "dtype": "F32",
2389
  "chunks": [
2390
  {
2391
  "offsets": [
2392
- 0,
2393
  0
2394
  ],
2395
  "shape": [
2396
- 768,
2397
- 1536
2398
  ],
2399
  "filename_index": 0
2400
  },
2401
  {
2402
  "offsets": [
2403
- 0,
2404
- 1536
2405
  ],
2406
  "shape": [
2407
- 768,
2408
- 1536
2409
  ],
2410
  "filename_index": 1
2411
  }
2412
  ]
2413
  },
2414
- "wte.weight": {
2415
  "type": "Distributed",
2416
  "shape": [
2417
- 50257,
2418
  768
2419
  ],
2420
  "dtype": "F32",
2421
  "chunks": [
2422
  {
2423
  "offsets": [
2424
- 0,
2425
  0
2426
  ],
2427
  "shape": [
2428
- 50257,
2429
  384
2430
  ],
2431
  "filename_index": 0
2432
  },
2433
  {
2434
  "offsets": [
2435
- 0,
2436
  384
2437
  ],
2438
  "shape": [
2439
- 50257,
2440
  384
2441
  ],
2442
  "filename_index": 1
2443
  }
2444
  ]
2445
  },
2446
- "h.4.ln_2.bias": {
2447
  "type": "Distributed",
2448
  "shape": [
 
2449
  768
2450
  ],
2451
  "dtype": "F32",
2452
  "chunks": [
2453
  {
2454
  "offsets": [
 
2455
  0
2456
  ],
2457
  "shape": [
2458
- 384
 
2459
  ],
2460
  "filename_index": 0
2461
  },
2462
  {
2463
  "offsets": [
2464
- 384
 
2465
  ],
2466
  "shape": [
2467
- 384
 
2468
  ],
2469
  "filename_index": 1
2470
  }
2471
  ]
2472
  },
2473
- "h.9.attn.c_proj.weight": {
2474
  "type": "Distributed",
2475
  "shape": [
2476
- 768,
2477
- 768
 
 
2478
  ],
2479
  "dtype": "F32",
2480
  "chunks": [
2481
  {
2482
  "offsets": [
 
 
2483
  0,
2484
  0
2485
  ],
2486
  "shape": [
2487
- 384,
2488
- 768
 
 
2489
  ],
2490
  "filename_index": 0
2491
  },
2492
  {
2493
  "offsets": [
2494
- 384,
2495
- 0
 
 
2496
  ],
2497
  "shape": [
2498
- 384,
2499
- 768
 
 
2500
  ],
2501
  "filename_index": 1
2502
  }
2503
  ]
2504
  },
2505
- "h.9.ln_1.bias": {
2506
  "type": "Distributed",
2507
  "shape": [
2508
- 768
 
2509
  ],
2510
  "dtype": "F32",
2511
  "chunks": [
2512
  {
2513
  "offsets": [
 
2514
  0
2515
  ],
2516
  "shape": [
2517
- 384
 
2518
  ],
2519
  "filename_index": 0
2520
  },
2521
  {
2522
  "offsets": [
2523
- 384
 
2524
  ],
2525
  "shape": [
2526
- 384
 
2527
  ],
2528
  "filename_index": 1
2529
  }
2530
  ]
2531
  },
2532
- "h.4.attn.bias": {
2533
  "type": "Distributed",
2534
  "shape": [
2535
- 1,
2536
- 1,
2537
- 1024,
2538
- 1024
2539
  ],
2540
  "dtype": "F32",
2541
  "chunks": [
2542
  {
2543
  "offsets": [
2544
- 0,
2545
- 0,
2546
- 0,
2547
  0
2548
  ],
2549
  "shape": [
2550
- 1,
2551
- 1,
2552
- 1024,
2553
- 512
2554
  ],
2555
  "filename_index": 0
2556
  },
2557
  {
2558
  "offsets": [
2559
- 0,
2560
- 0,
2561
- 0,
2562
- 512
2563
  ],
2564
  "shape": [
2565
- 1,
2566
- 1,
2567
- 1024,
2568
- 512
2569
  ],
2570
  "filename_index": 1
2571
  }
2572
  ]
2573
  },
2574
- "h.11.attn.c_attn.bias": {
2575
  "type": "Distributed",
2576
  "shape": [
2577
- 2304
2578
  ],
2579
  "dtype": "F32",
2580
  "chunks": [
@@ -2583,26 +2487,26 @@
2583
  0
2584
  ],
2585
  "shape": [
2586
- 1152
2587
  ],
2588
  "filename_index": 0
2589
  },
2590
  {
2591
  "offsets": [
2592
- 1152
2593
  ],
2594
  "shape": [
2595
- 1152
2596
  ],
2597
  "filename_index": 1
2598
  }
2599
  ]
2600
  },
2601
- "h.2.attn.c_attn.weight": {
2602
  "type": "Distributed",
2603
  "shape": [
2604
  768,
2605
- 2304
2606
  ],
2607
  "dtype": "F32",
2608
  "chunks": [
@@ -2613,51 +2517,56 @@
2613
  ],
2614
  "shape": [
2615
  768,
2616
- 1152
2617
  ],
2618
  "filename_index": 0
2619
  },
2620
  {
2621
  "offsets": [
2622
  0,
2623
- 1152
2624
  ],
2625
  "shape": [
2626
  768,
2627
- 1152
2628
  ],
2629
  "filename_index": 1
2630
  }
2631
  ]
2632
  },
2633
- "h.1.attn.c_proj.bias": {
2634
  "type": "Distributed",
2635
  "shape": [
 
2636
  768
2637
  ],
2638
  "dtype": "F32",
2639
  "chunks": [
2640
  {
2641
  "offsets": [
 
2642
  0
2643
  ],
2644
  "shape": [
2645
- 384
 
2646
  ],
2647
  "filename_index": 0
2648
  },
2649
  {
2650
  "offsets": [
2651
- 384
 
2652
  ],
2653
  "shape": [
2654
- 384
 
2655
  ],
2656
  "filename_index": 1
2657
  }
2658
  ]
2659
  },
2660
- "h.5.attn.bias": {
2661
  "type": "Distributed",
2662
  "shape": [
2663
  1,
@@ -2699,7 +2608,7 @@
2699
  }
2700
  ]
2701
  },
2702
- "h.4.attn.c_attn.weight": {
2703
  "type": "Distributed",
2704
  "shape": [
2705
  768,
@@ -2731,151 +2640,134 @@
2731
  }
2732
  ]
2733
  },
2734
- "h.5.attn.c_proj.bias": {
2735
  "type": "Distributed",
2736
  "shape": [
 
2737
  768
2738
  ],
2739
  "dtype": "F32",
2740
  "chunks": [
2741
  {
2742
  "offsets": [
 
2743
  0
2744
  ],
2745
  "shape": [
2746
- 384
 
2747
  ],
2748
  "filename_index": 0
2749
  },
2750
  {
2751
  "offsets": [
2752
- 384
 
2753
  ],
2754
  "shape": [
2755
- 384
 
2756
  ],
2757
  "filename_index": 1
2758
  }
2759
  ]
2760
  },
2761
- "h.9.ln_2.bias": {
2762
  "type": "Distributed",
2763
  "shape": [
2764
- 768
 
2765
  ],
2766
  "dtype": "F32",
2767
  "chunks": [
2768
  {
2769
  "offsets": [
 
2770
  0
2771
  ],
2772
  "shape": [
2773
- 384
 
2774
  ],
2775
  "filename_index": 0
2776
  },
2777
  {
2778
  "offsets": [
2779
- 384
 
2780
  ],
2781
  "shape": [
2782
- 384
 
2783
  ],
2784
  "filename_index": 1
2785
  }
2786
  ]
2787
  },
2788
- "h.5.ln_2.bias": {
2789
  "type": "Distributed",
2790
  "shape": [
 
2791
  768
2792
  ],
2793
  "dtype": "F32",
2794
  "chunks": [
2795
  {
2796
  "offsets": [
 
2797
  0
2798
  ],
2799
  "shape": [
2800
- 384
 
2801
  ],
2802
  "filename_index": 0
2803
  },
2804
  {
2805
  "offsets": [
2806
- 384
 
2807
  ],
2808
  "shape": [
2809
- 384
 
2810
  ],
2811
  "filename_index": 1
2812
  }
2813
  ]
2814
  },
2815
- "h.2.attn.c_proj.weight": {
2816
  "type": "Distributed",
2817
  "shape": [
2818
- 768,
2819
  768
2820
  ],
2821
  "dtype": "F32",
2822
  "chunks": [
2823
  {
2824
  "offsets": [
2825
- 0,
2826
  0
2827
  ],
2828
  "shape": [
2829
- 384,
2830
- 768
2831
  ],
2832
  "filename_index": 0
2833
  },
2834
  {
2835
  "offsets": [
2836
- 384,
2837
- 0
2838
  ],
2839
  "shape": [
2840
- 384,
2841
- 768
2842
  ],
2843
  "filename_index": 1
2844
  }
2845
  ]
2846
  },
2847
- "h.10.ln_2.weight": {
2848
- "type": "Distributed",
2849
- "shape": [
2850
- 768
2851
- ],
2852
- "dtype": "F32",
2853
- "chunks": [
2854
- {
2855
- "offsets": [
2856
- 0
2857
- ],
2858
- "shape": [
2859
- 384
2860
- ],
2861
- "filename_index": 0
2862
- },
2863
- {
2864
- "offsets": [
2865
- 384
2866
- ],
2867
- "shape": [
2868
- 384
2869
- ],
2870
- "filename_index": 1
2871
- }
2872
- ]
2873
- },
2874
- "h.7.mlp.c_fc.weight": {
2875
  "type": "Distributed",
2876
  "shape": [
2877
  768,
2878
- 3072
2879
  ],
2880
  "dtype": "F32",
2881
  "chunks": [
@@ -2886,27 +2778,27 @@
2886
  ],
2887
  "shape": [
2888
  768,
2889
- 1536
2890
  ],
2891
  "filename_index": 0
2892
  },
2893
  {
2894
  "offsets": [
2895
  0,
2896
- 1536
2897
  ],
2898
  "shape": [
2899
  768,
2900
- 1536
2901
  ],
2902
  "filename_index": 1
2903
  }
2904
  ]
2905
  },
2906
- "h.11.mlp.c_fc.bias": {
2907
  "type": "Distributed",
2908
  "shape": [
2909
- 3072
2910
  ],
2911
  "dtype": "F32",
2912
  "chunks": [
@@ -2915,57 +2807,67 @@
2915
  0
2916
  ],
2917
  "shape": [
2918
- 1536
2919
  ],
2920
  "filename_index": 0
2921
  },
2922
  {
2923
  "offsets": [
2924
- 1536
2925
  ],
2926
  "shape": [
2927
- 1536
2928
  ],
2929
  "filename_index": 1
2930
  }
2931
  ]
2932
  },
2933
- "h.10.attn.c_attn.weight": {
2934
  "type": "Distributed",
2935
  "shape": [
2936
- 768,
2937
- 2304
 
 
2938
  ],
2939
  "dtype": "F32",
2940
  "chunks": [
2941
  {
2942
  "offsets": [
 
 
2943
  0,
2944
  0
2945
  ],
2946
  "shape": [
2947
- 768,
2948
- 1152
 
 
2949
  ],
2950
  "filename_index": 0
2951
  },
2952
  {
2953
  "offsets": [
2954
  0,
2955
- 1152
 
 
2956
  ],
2957
  "shape": [
2958
- 768,
2959
- 1152
 
 
2960
  ],
2961
  "filename_index": 1
2962
  }
2963
  ]
2964
  },
2965
- "h.3.attn.c_attn.bias": {
2966
  "type": "Distributed",
2967
  "shape": [
2968
- 2304
2969
  ],
2970
  "dtype": "F32",
2971
  "chunks": [
@@ -2974,22 +2876,22 @@
2974
  0
2975
  ],
2976
  "shape": [
2977
- 1152
2978
  ],
2979
  "filename_index": 0
2980
  },
2981
  {
2982
  "offsets": [
2983
- 1152
2984
  ],
2985
  "shape": [
2986
- 1152
2987
  ],
2988
  "filename_index": 1
2989
  }
2990
  ]
2991
  },
2992
- "h.10.ln_1.bias": {
2993
  "type": "Distributed",
2994
  "shape": [
2995
  768
@@ -3016,7 +2918,7 @@
3016
  }
3017
  ]
3018
  },
3019
- "ln_f.weight": {
3020
  "type": "Distributed",
3021
  "shape": [
3022
  768
@@ -3043,7 +2945,7 @@
3043
  }
3044
  ]
3045
  },
3046
- "h.11.mlp.c_proj.bias": {
3047
  "type": "Distributed",
3048
  "shape": [
3049
  768
@@ -3070,34 +2972,49 @@
3070
  }
3071
  ]
3072
  },
3073
- "h.9.mlp.c_fc.bias": {
3074
  "type": "Distributed",
3075
  "shape": [
3076
- 3072
 
 
 
3077
  ],
3078
  "dtype": "F32",
3079
  "chunks": [
3080
  {
3081
  "offsets": [
 
 
 
3082
  0
3083
  ],
3084
  "shape": [
3085
- 1536
 
 
 
3086
  ],
3087
  "filename_index": 0
3088
  },
3089
  {
3090
  "offsets": [
3091
- 1536
 
 
 
3092
  ],
3093
  "shape": [
3094
- 1536
 
 
 
3095
  ],
3096
  "filename_index": 1
3097
  }
3098
  ]
3099
  },
3100
- "h.11.ln_2.bias": {
3101
  "type": "Distributed",
3102
  "shape": [
3103
  768
@@ -3124,10 +3041,10 @@
3124
  }
3125
  ]
3126
  },
3127
- "h.0.attn.c_proj.weight": {
3128
  "type": "Distributed",
3129
  "shape": [
3130
- 768,
3131
  768
3132
  ],
3133
  "dtype": "F32",
@@ -3138,29 +3055,29 @@
3138
  0
3139
  ],
3140
  "shape": [
3141
- 384,
3142
  768
3143
  ],
3144
  "filename_index": 0
3145
  },
3146
  {
3147
  "offsets": [
3148
- 384,
3149
  0
3150
  ],
3151
  "shape": [
3152
- 384,
3153
  768
3154
  ],
3155
  "filename_index": 1
3156
  }
3157
  ]
3158
  },
3159
- "h.3.attn.c_proj.weight": {
3160
  "type": "Distributed",
3161
  "shape": [
3162
  768,
3163
- 768
3164
  ],
3165
  "dtype": "F32",
3166
  "chunks": [
@@ -3170,109 +3087,124 @@
3170
  0
3171
  ],
3172
  "shape": [
3173
- 384,
3174
- 768
3175
  ],
3176
  "filename_index": 0
3177
  },
3178
  {
3179
  "offsets": [
3180
- 384,
3181
- 0
3182
  ],
3183
  "shape": [
3184
- 384,
3185
- 768
3186
  ],
3187
  "filename_index": 1
3188
  }
3189
  ]
3190
  },
3191
- "h.7.attn.c_attn.bias": {
3192
  "type": "Distributed",
3193
  "shape": [
3194
- 2304
 
3195
  ],
3196
  "dtype": "F32",
3197
  "chunks": [
3198
  {
3199
  "offsets": [
 
3200
  0
3201
  ],
3202
  "shape": [
3203
- 1152
 
3204
  ],
3205
  "filename_index": 0
3206
  },
3207
  {
3208
  "offsets": [
3209
- 1152
 
3210
  ],
3211
  "shape": [
3212
- 1152
 
3213
  ],
3214
  "filename_index": 1
3215
  }
3216
  ]
3217
  },
3218
- "h.6.attn.c_attn.bias": {
3219
  "type": "Distributed",
3220
  "shape": [
 
3221
  2304
3222
  ],
3223
  "dtype": "F32",
3224
  "chunks": [
3225
  {
3226
  "offsets": [
 
3227
  0
3228
  ],
3229
  "shape": [
 
3230
  1152
3231
  ],
3232
  "filename_index": 0
3233
  },
3234
  {
3235
  "offsets": [
 
3236
  1152
3237
  ],
3238
  "shape": [
 
3239
  1152
3240
  ],
3241
  "filename_index": 1
3242
  }
3243
  ]
3244
  },
3245
- "h.6.ln_1.bias": {
3246
  "type": "Distributed",
3247
  "shape": [
3248
- 768
 
3249
  ],
3250
  "dtype": "F32",
3251
  "chunks": [
3252
  {
3253
  "offsets": [
 
3254
  0
3255
  ],
3256
  "shape": [
3257
- 384
 
3258
  ],
3259
  "filename_index": 0
3260
  },
3261
  {
3262
  "offsets": [
3263
- 384
 
3264
  ],
3265
  "shape": [
3266
- 384
 
3267
  ],
3268
  "filename_index": 1
3269
  }
3270
  ]
3271
  },
3272
- "h.10.attn.c_attn.bias": {
3273
  "type": "Distributed",
3274
  "shape": [
3275
- 2304
3276
  ],
3277
  "dtype": "F32",
3278
  "chunks": [
@@ -3281,25 +3213,25 @@
3281
  0
3282
  ],
3283
  "shape": [
3284
- 1152
3285
  ],
3286
  "filename_index": 0
3287
  },
3288
  {
3289
  "offsets": [
3290
- 1152
3291
  ],
3292
  "shape": [
3293
- 1152
3294
  ],
3295
  "filename_index": 1
3296
  }
3297
  ]
3298
  },
3299
- "h.4.mlp.c_fc.bias": {
3300
  "type": "Distributed",
3301
  "shape": [
3302
- 3072
3303
  ],
3304
  "dtype": "F32",
3305
  "chunks": [
@@ -3308,22 +3240,22 @@
3308
  0
3309
  ],
3310
  "shape": [
3311
- 1536
3312
  ],
3313
  "filename_index": 0
3314
  },
3315
  {
3316
  "offsets": [
3317
- 1536
3318
  ],
3319
  "shape": [
3320
- 1536
3321
  ],
3322
  "filename_index": 1
3323
  }
3324
  ]
3325
  },
3326
- "h.8.ln_1.weight": {
3327
  "type": "Distributed",
3328
  "shape": [
3329
  768
@@ -3350,258 +3282,258 @@
3350
  }
3351
  ]
3352
  },
3353
- "h.8.ln_1.bias": {
3354
  "type": "Distributed",
3355
  "shape": [
 
3356
  768
3357
  ],
3358
  "dtype": "F32",
3359
  "chunks": [
3360
  {
3361
  "offsets": [
 
3362
  0
3363
  ],
3364
  "shape": [
3365
- 384
 
3366
  ],
3367
  "filename_index": 0
3368
  },
3369
  {
3370
  "offsets": [
3371
- 384
 
3372
  ],
3373
  "shape": [
3374
- 384
 
3375
  ],
3376
  "filename_index": 1
3377
  }
3378
  ]
3379
  },
3380
- "h.6.ln_1.weight": {
3381
  "type": "Distributed",
3382
  "shape": [
3383
- 768
 
3384
  ],
3385
  "dtype": "F32",
3386
  "chunks": [
3387
  {
3388
  "offsets": [
 
3389
  0
3390
  ],
3391
  "shape": [
3392
- 384
 
3393
  ],
3394
  "filename_index": 0
3395
  },
3396
  {
3397
  "offsets": [
3398
- 384
 
3399
  ],
3400
  "shape": [
3401
- 384
 
3402
  ],
3403
  "filename_index": 1
3404
  }
3405
  ]
3406
  },
3407
- "h.9.attn.bias": {
3408
  "type": "Distributed",
3409
  "shape": [
3410
- 1,
3411
- 1,
3412
  1024,
3413
- 1024
3414
  ],
3415
  "dtype": "F32",
3416
  "chunks": [
3417
  {
3418
  "offsets": [
3419
- 0,
3420
- 0,
3421
  0,
3422
  0
3423
  ],
3424
  "shape": [
3425
- 1,
3426
- 1,
3427
  1024,
3428
- 512
3429
  ],
3430
  "filename_index": 0
3431
  },
3432
  {
3433
  "offsets": [
3434
  0,
3435
- 0,
3436
- 0,
3437
- 512
3438
  ],
3439
  "shape": [
3440
- 1,
3441
- 1,
3442
  1024,
3443
- 512
3444
  ],
3445
  "filename_index": 1
3446
  }
3447
  ]
3448
  },
3449
- "h.10.attn.c_proj.weight": {
3450
  "type": "Distributed",
3451
  "shape": [
3452
- 768,
3453
  768
3454
  ],
3455
  "dtype": "F32",
3456
  "chunks": [
3457
  {
3458
  "offsets": [
3459
- 0,
3460
  0
3461
  ],
3462
  "shape": [
3463
- 384,
3464
- 768
3465
  ],
3466
  "filename_index": 0
3467
  },
3468
  {
3469
  "offsets": [
3470
- 384,
3471
- 0
3472
  ],
3473
  "shape": [
3474
- 384,
3475
- 768
3476
  ],
3477
  "filename_index": 1
3478
  }
3479
  ]
3480
  },
3481
- "h.8.attn.c_attn.bias": {
3482
  "type": "Distributed",
3483
  "shape": [
3484
- 2304
 
3485
  ],
3486
  "dtype": "F32",
3487
  "chunks": [
3488
  {
3489
  "offsets": [
 
3490
  0
3491
  ],
3492
  "shape": [
3493
- 1152
 
3494
  ],
3495
  "filename_index": 0
3496
  },
3497
  {
3498
  "offsets": [
3499
- 1152
 
3500
  ],
3501
  "shape": [
3502
- 1152
 
3503
  ],
3504
  "filename_index": 1
3505
  }
3506
  ]
3507
  },
3508
- "wpe.weight": {
3509
  "type": "Distributed",
3510
  "shape": [
 
 
3511
  1024,
3512
- 768
3513
  ],
3514
  "dtype": "F32",
3515
  "chunks": [
3516
  {
3517
  "offsets": [
 
 
3518
  0,
3519
  0
3520
  ],
3521
  "shape": [
 
 
3522
  1024,
3523
- 384
3524
  ],
3525
  "filename_index": 0
3526
  },
3527
  {
3528
  "offsets": [
3529
  0,
3530
- 384
 
 
3531
  ],
3532
  "shape": [
 
 
3533
  1024,
3534
- 384
3535
  ],
3536
  "filename_index": 1
3537
  }
3538
  ]
3539
  },
3540
- "h.11.mlp.c_fc.weight": {
3541
  "type": "Distributed",
3542
  "shape": [
3543
- 768,
3544
- 3072
3545
  ],
3546
  "dtype": "F32",
3547
  "chunks": [
3548
  {
3549
  "offsets": [
3550
- 0,
3551
  0
3552
  ],
3553
  "shape": [
3554
- 768,
3555
- 1536
3556
  ],
3557
  "filename_index": 0
3558
  },
3559
  {
3560
  "offsets": [
3561
- 0,
3562
- 1536
3563
  ],
3564
  "shape": [
3565
- 768,
3566
- 1536
3567
  ],
3568
  "filename_index": 1
3569
  }
3570
  ]
3571
  },
3572
- "h.3.attn.c_attn.weight": {
3573
  "type": "Distributed",
3574
  "shape": [
3575
- 768,
3576
- 2304
3577
  ],
3578
  "dtype": "F32",
3579
  "chunks": [
3580
  {
3581
  "offsets": [
3582
- 0,
3583
  0
3584
  ],
3585
  "shape": [
3586
- 768,
3587
- 1152
3588
  ],
3589
  "filename_index": 0
3590
  },
3591
  {
3592
  "offsets": [
3593
- 0,
3594
- 1152
3595
  ],
3596
  "shape": [
3597
- 768,
3598
- 1152
3599
  ],
3600
  "filename_index": 1
3601
  }
3602
  ]
3603
  },
3604
- "h.10.ln_1.weight": {
3605
  "type": "Distributed",
3606
  "shape": [
3607
  768
@@ -3628,11 +3560,11 @@
3628
  }
3629
  ]
3630
  },
3631
- "h.0.mlp.c_proj.weight": {
3632
  "type": "Distributed",
3633
  "shape": [
3634
- 3072,
3635
- 768
3636
  ],
3637
  "dtype": "F32",
3638
  "chunks": [
@@ -3642,51 +3574,46 @@
3642
  0
3643
  ],
3644
  "shape": [
3645
- 1536,
3646
- 768
3647
  ],
3648
  "filename_index": 0
3649
  },
3650
  {
3651
  "offsets": [
3652
- 1536,
3653
- 0
3654
  ],
3655
  "shape": [
3656
- 1536,
3657
- 768
3658
  ],
3659
  "filename_index": 1
3660
  }
3661
  ]
3662
  },
3663
- "h.0.mlp.c_fc.weight": {
3664
  "type": "Distributed",
3665
  "shape": [
3666
- 768,
3667
- 3072
3668
  ],
3669
  "dtype": "F32",
3670
  "chunks": [
3671
  {
3672
  "offsets": [
3673
- 0,
3674
  0
3675
  ],
3676
  "shape": [
3677
- 768,
3678
- 1536
3679
  ],
3680
  "filename_index": 0
3681
  },
3682
  {
3683
  "offsets": [
3684
- 0,
3685
- 1536
3686
  ],
3687
  "shape": [
3688
- 768,
3689
- 1536
3690
  ],
3691
  "filename_index": 1
3692
  }
@@ -3734,38 +3661,43 @@
3734
  }
3735
  ]
3736
  },
3737
- "h.5.ln_1.weight": {
3738
  "type": "Distributed",
3739
  "shape": [
3740
- 768
 
3741
  ],
3742
  "dtype": "F32",
3743
  "chunks": [
3744
  {
3745
  "offsets": [
 
3746
  0
3747
  ],
3748
  "shape": [
3749
- 384
 
3750
  ],
3751
  "filename_index": 0
3752
  },
3753
  {
3754
  "offsets": [
3755
- 384
 
3756
  ],
3757
  "shape": [
3758
- 384
 
3759
  ],
3760
  "filename_index": 1
3761
  }
3762
  ]
3763
  },
3764
- "h.0.attn.c_attn.weight": {
3765
  "type": "Distributed",
3766
  "shape": [
3767
- 768,
3768
- 2304
3769
  ],
3770
  "dtype": "F32",
3771
  "chunks": [
@@ -3775,25 +3707,25 @@
3775
  0
3776
  ],
3777
  "shape": [
3778
- 768,
3779
- 1152
3780
  ],
3781
  "filename_index": 0
3782
  },
3783
  {
3784
  "offsets": [
3785
- 0,
3786
- 1152
3787
  ],
3788
  "shape": [
3789
- 768,
3790
- 1152
3791
  ],
3792
  "filename_index": 1
3793
  }
3794
  ]
3795
  },
3796
- "h.5.ln_1.bias": {
3797
  "type": "Distributed",
3798
  "shape": [
3799
  768
@@ -3820,10 +3752,10 @@
3820
  }
3821
  ]
3822
  },
3823
- "h.2.ln_2.bias": {
3824
  "type": "Distributed",
3825
  "shape": [
3826
- 768
3827
  ],
3828
  "dtype": "F32",
3829
  "chunks": [
@@ -3832,84 +3764,84 @@
3832
  0
3833
  ],
3834
  "shape": [
3835
- 384
3836
  ],
3837
  "filename_index": 0
3838
  },
3839
  {
3840
  "offsets": [
3841
- 384
3842
  ],
3843
  "shape": [
3844
- 384
3845
  ],
3846
  "filename_index": 1
3847
  }
3848
  ]
3849
  },
3850
- "h.4.attn.c_proj.weight": {
3851
  "type": "Distributed",
3852
  "shape": [
3853
- 768,
3854
  768
3855
  ],
3856
  "dtype": "F32",
3857
  "chunks": [
3858
  {
3859
  "offsets": [
3860
- 0,
3861
  0
3862
  ],
3863
  "shape": [
3864
- 384,
3865
- 768
3866
  ],
3867
  "filename_index": 0
3868
  },
3869
  {
3870
  "offsets": [
3871
- 384,
3872
- 0
3873
  ],
3874
  "shape": [
3875
- 384,
3876
- 768
3877
  ],
3878
  "filename_index": 1
3879
  }
3880
  ]
3881
  },
3882
- "h.2.mlp.c_fc.bias": {
3883
  "type": "Distributed",
3884
  "shape": [
 
3885
  3072
3886
  ],
3887
  "dtype": "F32",
3888
  "chunks": [
3889
  {
3890
  "offsets": [
 
3891
  0
3892
  ],
3893
  "shape": [
 
3894
  1536
3895
  ],
3896
  "filename_index": 0
3897
  },
3898
  {
3899
  "offsets": [
 
3900
  1536
3901
  ],
3902
  "shape": [
 
3903
  1536
3904
  ],
3905
  "filename_index": 1
3906
  }
3907
  ]
3908
  },
3909
- "h.1.mlp.c_fc.bias": {
3910
  "type": "Distributed",
3911
  "shape": [
3912
- 3072
3913
  ],
3914
  "dtype": "F32",
3915
  "chunks": [
@@ -3918,25 +3850,25 @@
3918
  0
3919
  ],
3920
  "shape": [
3921
- 1536
3922
  ],
3923
  "filename_index": 0
3924
  },
3925
  {
3926
  "offsets": [
3927
- 1536
3928
  ],
3929
  "shape": [
3930
- 1536
3931
  ],
3932
  "filename_index": 1
3933
  }
3934
  ]
3935
  },
3936
- "h.4.attn.c_proj.bias": {
3937
  "type": "Distributed",
3938
  "shape": [
3939
- 768
3940
  ],
3941
  "dtype": "F32",
3942
  "chunks": [
@@ -3945,26 +3877,26 @@
3945
  0
3946
  ],
3947
  "shape": [
3948
- 384
3949
  ],
3950
  "filename_index": 0
3951
  },
3952
  {
3953
  "offsets": [
3954
- 384
3955
  ],
3956
  "shape": [
3957
- 384
3958
  ],
3959
  "filename_index": 1
3960
  }
3961
  ]
3962
  },
3963
- "h.1.mlp.c_fc.weight": {
3964
  "type": "Distributed",
3965
  "shape": [
3966
- 768,
3967
- 3072
3968
  ],
3969
  "dtype": "F32",
3970
  "chunks": [
@@ -3974,28 +3906,28 @@
3974
  0
3975
  ],
3976
  "shape": [
3977
- 768,
3978
- 1536
3979
  ],
3980
  "filename_index": 0
3981
  },
3982
  {
3983
  "offsets": [
3984
- 0,
3985
- 1536
3986
  ],
3987
  "shape": [
3988
- 768,
3989
- 1536
3990
  ],
3991
  "filename_index": 1
3992
  }
3993
  ]
3994
  },
3995
- "h.11.attn.c_proj.weight": {
3996
  "type": "Distributed",
3997
  "shape": [
3998
- 768,
3999
  768
4000
  ],
4001
  "dtype": "F32",
@@ -4006,71 +3938,56 @@
4006
  0
4007
  ],
4008
  "shape": [
4009
- 384,
4010
  768
4011
  ],
4012
  "filename_index": 0
4013
  },
4014
  {
4015
  "offsets": [
4016
- 384,
4017
  0
4018
  ],
4019
  "shape": [
4020
- 384,
4021
  768
4022
  ],
4023
  "filename_index": 1
4024
  }
4025
  ]
4026
  },
4027
- "h.8.attn.bias": {
4028
  "type": "Distributed",
4029
  "shape": [
4030
- 1,
4031
- 1,
4032
- 1024,
4033
- 1024
4034
  ],
4035
  "dtype": "F32",
4036
  "chunks": [
4037
  {
4038
  "offsets": [
4039
- 0,
4040
- 0,
4041
- 0,
4042
  0
4043
  ],
4044
  "shape": [
4045
- 1,
4046
- 1,
4047
- 1024,
4048
- 512
4049
  ],
4050
  "filename_index": 0
4051
  },
4052
  {
4053
  "offsets": [
4054
- 0,
4055
- 0,
4056
- 0,
4057
- 512
4058
  ],
4059
  "shape": [
4060
- 1,
4061
- 1,
4062
- 1024,
4063
- 512
4064
  ],
4065
  "filename_index": 1
4066
  }
4067
  ]
4068
  },
4069
- "h.2.mlp.c_proj.weight": {
4070
  "type": "Distributed",
4071
  "shape": [
4072
- 3072,
4073
- 768
4074
  ],
4075
  "dtype": "F32",
4076
  "chunks": [
@@ -4080,28 +3997,28 @@
4080
  0
4081
  ],
4082
  "shape": [
4083
- 1536,
4084
- 768
4085
  ],
4086
  "filename_index": 0
4087
  },
4088
  {
4089
  "offsets": [
4090
- 1536,
4091
- 0
4092
- ],
4093
  "shape": [
4094
- 1536,
4095
- 768
4096
  ],
4097
  "filename_index": 1
4098
  }
4099
  ]
4100
  },
4101
- "h.3.ln_2.weight": {
4102
  "type": "Distributed",
4103
  "shape": [
4104
- 768
4105
  ],
4106
  "dtype": "F32",
4107
  "chunks": [
@@ -4110,25 +4027,25 @@
4110
  0
4111
  ],
4112
  "shape": [
4113
- 384
4114
  ],
4115
  "filename_index": 0
4116
  },
4117
  {
4118
  "offsets": [
4119
- 384
4120
  ],
4121
  "shape": [
4122
- 384
4123
  ],
4124
  "filename_index": 1
4125
  }
4126
  ]
4127
  },
4128
- "h.8.mlp.c_fc.bias": {
4129
  "type": "Distributed",
4130
  "shape": [
4131
- 3072
4132
  ],
4133
  "dtype": "F32",
4134
  "chunks": [
@@ -4137,22 +4054,22 @@
4137
  0
4138
  ],
4139
  "shape": [
4140
- 1536
4141
  ],
4142
  "filename_index": 0
4143
  },
4144
  {
4145
  "offsets": [
4146
- 1536
4147
  ],
4148
  "shape": [
4149
- 1536
4150
  ],
4151
  "filename_index": 1
4152
  }
4153
  ]
4154
  },
4155
- "h.9.attn.c_attn.bias": {
4156
  "type": "Distributed",
4157
  "shape": [
4158
  2304
@@ -4179,81 +4096,93 @@
4179
  }
4180
  ]
4181
  },
4182
- "h.2.attn.bias": {
4183
  "type": "Distributed",
4184
  "shape": [
4185
- 1,
4186
- 1,
4187
- 1024,
4188
- 1024
4189
  ],
4190
  "dtype": "F32",
4191
  "chunks": [
4192
  {
4193
  "offsets": [
4194
- 0,
4195
- 0,
4196
  0,
4197
  0
4198
  ],
4199
  "shape": [
4200
- 1,
4201
- 1,
4202
- 1024,
4203
- 512
4204
  ],
4205
  "filename_index": 0
4206
  },
4207
  {
4208
  "offsets": [
4209
- 0,
4210
- 0,
4211
- 0,
4212
- 512
4213
  ],
4214
  "shape": [
4215
- 1,
4216
- 1,
4217
- 1024,
4218
- 512
4219
  ],
4220
  "filename_index": 1
4221
  }
4222
  ]
4223
  },
4224
- "h.4.mlp.c_proj.weight": {
4225
  "type": "Distributed",
4226
  "shape": [
4227
- 3072,
4228
  768
4229
  ],
4230
  "dtype": "F32",
4231
  "chunks": [
4232
  {
4233
  "offsets": [
4234
- 0,
4235
  0
4236
  ],
4237
  "shape": [
4238
- 1536,
4239
- 768
4240
  ],
4241
  "filename_index": 0
4242
  },
4243
  {
4244
  "offsets": [
4245
- 1536,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4246
  0
4247
  ],
4248
  "shape": [
4249
- 1536,
4250
- 768
 
 
 
 
 
 
 
 
4251
  ],
4252
  "filename_index": 1
4253
  }
4254
  ]
4255
  },
4256
- "h.1.attn.bias": {
4257
  "type": "Distributed",
4258
  "shape": [
4259
  1,
@@ -4295,49 +4224,34 @@
4295
  }
4296
  ]
4297
  },
4298
- "h.3.attn.bias": {
4299
  "type": "Distributed",
4300
  "shape": [
4301
- 1,
4302
- 1,
4303
- 1024,
4304
- 1024
4305
  ],
4306
  "dtype": "F32",
4307
  "chunks": [
4308
  {
4309
  "offsets": [
4310
- 0,
4311
- 0,
4312
- 0,
4313
  0
4314
  ],
4315
  "shape": [
4316
- 1,
4317
- 1,
4318
- 1024,
4319
- 512
4320
  ],
4321
  "filename_index": 0
4322
  },
4323
  {
4324
  "offsets": [
4325
- 0,
4326
- 0,
4327
- 0,
4328
- 512
4329
  ],
4330
  "shape": [
4331
- 1,
4332
- 1,
4333
- 1024,
4334
- 512
4335
  ],
4336
  "filename_index": 1
4337
  }
4338
  ]
4339
  },
4340
- "h.0.ln_1.bias": {
4341
  "type": "Distributed",
4342
  "shape": [
4343
  768
@@ -4364,7 +4278,7 @@
4364
  }
4365
  ]
4366
  },
4367
- "h.3.ln_1.bias": {
4368
  "type": "Distributed",
4369
  "shape": [
4370
  768
@@ -4391,42 +4305,37 @@
4391
  }
4392
  ]
4393
  },
4394
- "h.1.attn.c_attn.weight": {
4395
  "type": "Distributed",
4396
  "shape": [
4397
- 768,
4398
- 2304
4399
  ],
4400
  "dtype": "F32",
4401
  "chunks": [
4402
  {
4403
  "offsets": [
4404
- 0,
4405
  0
4406
  ],
4407
  "shape": [
4408
- 768,
4409
- 1152
4410
  ],
4411
  "filename_index": 0
4412
  },
4413
  {
4414
  "offsets": [
4415
- 0,
4416
- 1152
4417
  ],
4418
  "shape": [
4419
- 768,
4420
- 1152
4421
  ],
4422
  "filename_index": 1
4423
  }
4424
  ]
4425
  },
4426
- "h.4.attn.c_attn.bias": {
4427
  "type": "Distributed",
4428
  "shape": [
4429
- 2304
4430
  ],
4431
  "dtype": "F32",
4432
  "chunks": [
@@ -4435,53 +4344,58 @@
4435
  0
4436
  ],
4437
  "shape": [
4438
- 1152
4439
  ],
4440
  "filename_index": 0
4441
  },
4442
  {
4443
  "offsets": [
4444
- 1152
4445
  ],
4446
  "shape": [
4447
- 1152
4448
  ],
4449
  "filename_index": 1
4450
  }
4451
  ]
4452
  },
4453
- "h.6.mlp.c_fc.bias": {
4454
  "type": "Distributed",
4455
  "shape": [
 
4456
  3072
4457
  ],
4458
  "dtype": "F32",
4459
  "chunks": [
4460
  {
4461
  "offsets": [
 
4462
  0
4463
  ],
4464
  "shape": [
 
4465
  1536
4466
  ],
4467
  "filename_index": 0
4468
  },
4469
  {
4470
  "offsets": [
 
4471
  1536
4472
  ],
4473
  "shape": [
 
4474
  1536
4475
  ],
4476
  "filename_index": 1
4477
  }
4478
  ]
4479
  },
4480
- "h.6.mlp.c_proj.weight": {
4481
  "type": "Distributed",
4482
  "shape": [
4483
- 3072,
4484
- 768
4485
  ],
4486
  "dtype": "F32",
4487
  "chunks": [
@@ -4491,57 +4405,121 @@
4491
  0
4492
  ],
4493
  "shape": [
4494
- 1536,
4495
- 768
4496
  ],
4497
  "filename_index": 0
4498
  },
4499
  {
4500
  "offsets": [
4501
- 1536,
4502
- 0
4503
  ],
4504
  "shape": [
4505
- 1536,
4506
- 768
4507
  ],
4508
  "filename_index": 1
4509
  }
4510
  ]
4511
  },
4512
- "h.7.mlp.c_proj.weight": {
4513
  "type": "Distributed",
4514
  "shape": [
4515
- 3072,
4516
  768
4517
  ],
4518
  "dtype": "F32",
4519
  "chunks": [
4520
  {
4521
  "offsets": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4522
  0,
4523
  0
4524
  ],
4525
  "shape": [
4526
- 1536,
4527
- 768
 
 
4528
  ],
4529
  "filename_index": 0
4530
  },
4531
  {
4532
  "offsets": [
4533
- 1536,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4534
  0
4535
  ],
4536
  "shape": [
4537
- 1536,
4538
- 768
 
 
 
 
 
 
 
 
4539
  ],
4540
  "filename_index": 1
4541
  }
4542
  ]
4543
  },
4544
- "h.7.ln_1.weight": {
4545
  "type": "Distributed",
4546
  "shape": [
4547
  768
@@ -4568,11 +4546,11 @@
4568
  }
4569
  ]
4570
  },
4571
- "h.5.attn.c_attn.weight": {
4572
  "type": "Distributed",
4573
  "shape": [
4574
  768,
4575
- 2304
4576
  ],
4577
  "dtype": "F32",
4578
  "chunks": [
@@ -4583,83 +4561,83 @@
4583
  ],
4584
  "shape": [
4585
  768,
4586
- 1152
4587
  ],
4588
  "filename_index": 0
4589
  },
4590
  {
4591
  "offsets": [
4592
  0,
4593
- 1152
4594
  ],
4595
  "shape": [
4596
  768,
4597
- 1152
4598
  ],
4599
  "filename_index": 1
4600
  }
4601
  ]
4602
  },
4603
- "h.9.attn.c_attn.weight": {
4604
  "type": "Distributed",
4605
  "shape": [
4606
- 768,
4607
- 2304
4608
  ],
4609
  "dtype": "F32",
4610
  "chunks": [
4611
  {
4612
  "offsets": [
4613
- 0,
4614
  0
4615
  ],
4616
  "shape": [
4617
- 768,
4618
- 1152
4619
  ],
4620
  "filename_index": 0
4621
  },
4622
  {
4623
  "offsets": [
4624
- 0,
4625
- 1152
4626
  ],
4627
  "shape": [
4628
- 768,
4629
- 1152
4630
  ],
4631
  "filename_index": 1
4632
  }
4633
  ]
4634
  },
4635
- "h.1.ln_2.bias": {
4636
  "type": "Distributed",
4637
  "shape": [
 
4638
  768
4639
  ],
4640
  "dtype": "F32",
4641
  "chunks": [
4642
  {
4643
  "offsets": [
 
4644
  0
4645
  ],
4646
  "shape": [
4647
- 384
 
4648
  ],
4649
  "filename_index": 0
4650
  },
4651
  {
4652
  "offsets": [
4653
- 384
 
4654
  ],
4655
  "shape": [
4656
- 384
 
4657
  ],
4658
  "filename_index": 1
4659
  }
4660
  ]
4661
  },
4662
- "h.11.ln_1.bias": {
4663
  "type": "Distributed",
4664
  "shape": [
4665
  768
@@ -4686,42 +4664,37 @@
4686
  }
4687
  ]
4688
  },
4689
- "h.7.attn.c_attn.weight": {
4690
  "type": "Distributed",
4691
  "shape": [
4692
- 768,
4693
- 2304
4694
  ],
4695
  "dtype": "F32",
4696
  "chunks": [
4697
  {
4698
  "offsets": [
4699
- 0,
4700
  0
4701
  ],
4702
  "shape": [
4703
- 768,
4704
- 1152
4705
  ],
4706
  "filename_index": 0
4707
  },
4708
  {
4709
  "offsets": [
4710
- 0,
4711
- 1152
4712
  ],
4713
  "shape": [
4714
- 768,
4715
- 1152
4716
  ],
4717
  "filename_index": 1
4718
  }
4719
  ]
4720
  },
4721
- "h.1.attn.c_proj.weight": {
4722
  "type": "Distributed",
4723
  "shape": [
4724
- 768,
4725
  768
4726
  ],
4727
  "dtype": "F32",
@@ -4732,23 +4705,50 @@
4732
  0
4733
  ],
4734
  "shape": [
4735
- 384,
4736
  768
4737
  ],
4738
  "filename_index": 0
4739
  },
4740
  {
4741
  "offsets": [
4742
- 384,
4743
  0
4744
  ],
4745
  "shape": [
4746
- 384,
4747
  768
4748
  ],
4749
  "filename_index": 1
4750
  }
4751
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4752
  }
4753
  },
4754
  "filenames": [
 
1
  {
2
  "tensors": {
3
+ "h.3.mlp.c_proj.bias": {
4
  "type": "Distributed",
5
  "shape": [
6
  768
 
27
  }
28
  ]
29
  },
30
+ "h.3.ln_1.weight": {
31
  "type": "Distributed",
32
  "shape": [
33
  768
 
54
  }
55
  ]
56
  },
57
+ "h.4.attn.c_proj.weight": {
58
  "type": "Distributed",
59
  "shape": [
60
+ 768,
61
  768
62
  ],
63
  "dtype": "F32",
64
  "chunks": [
65
  {
66
  "offsets": [
67
+ 0,
68
  0
69
  ],
70
  "shape": [
71
+ 384,
72
+ 768
73
  ],
74
  "filename_index": 0
75
  },
76
  {
77
  "offsets": [
78
+ 384,
79
+ 0
80
  ],
81
  "shape": [
82
+ 384,
83
+ 768
84
  ],
85
  "filename_index": 1
86
  }
87
  ]
88
  },
89
+ "h.1.attn.c_proj.bias": {
90
  "type": "Distributed",
91
  "shape": [
92
  768
 
113
  }
114
  ]
115
  },
116
+ "h.6.attn.bias": {
117
  "type": "Distributed",
118
  "shape": [
119
+ 1,
120
+ 1,
121
+ 1024,
122
+ 1024
123
  ],
124
  "dtype": "F32",
125
  "chunks": [
126
  {
127
  "offsets": [
128
+ 0,
129
+ 0,
130
+ 0,
131
  0
132
  ],
133
  "shape": [
134
+ 1,
135
+ 1,
136
+ 1024,
137
+ 512
138
  ],
139
  "filename_index": 0
140
  },
141
  {
142
  "offsets": [
143
+ 0,
144
+ 0,
145
+ 0,
146
+ 512
147
  ],
148
  "shape": [
149
+ 1,
150
+ 1,
151
+ 1024,
152
+ 512
153
  ],
154
  "filename_index": 1
155
  }
156
  ]
157
  },
158
+ "h.5.ln_1.bias": {
159
  "type": "Distributed",
160
  "shape": [
161
+ 768
162
  ],
163
  "dtype": "F32",
164
  "chunks": [
 
167
  0
168
  ],
169
  "shape": [
170
+ 384
171
  ],
172
  "filename_index": 0
173
  },
174
  {
175
  "offsets": [
176
+ 384
177
  ],
178
  "shape": [
179
+ 384
180
  ],
181
  "filename_index": 1
182
  }
183
  ]
184
  },
185
+ "h.2.mlp.c_proj.bias": {
186
  "type": "Distributed",
187
  "shape": [
188
  768
 
209
  }
210
  ]
211
  },
212
+ "h.0.attn.c_attn.bias": {
213
  "type": "Distributed",
214
  "shape": [
215
+ 2304
 
216
  ],
217
  "dtype": "F32",
218
  "chunks": [
219
  {
220
  "offsets": [
 
221
  0
222
  ],
223
  "shape": [
224
+ 1152
 
225
  ],
226
  "filename_index": 0
227
  },
228
  {
229
  "offsets": [
230
+ 1152
 
231
  ],
232
  "shape": [
233
+ 1152
 
234
  ],
235
  "filename_index": 1
236
  }
237
  ]
238
  },
239
+ "h.11.ln_1.bias": {
240
  "type": "Distributed",
241
  "shape": [
242
  768
 
263
  }
264
  ]
265
  },
266
+ "h.4.attn.c_attn.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  "type": "Distributed",
268
  "shape": [
269
  2304
 
290
  }
291
  ]
292
  },
293
+ "h.6.mlp.c_proj.bias": {
294
  "type": "Distributed",
295
  "shape": [
296
  768
 
317
  }
318
  ]
319
  },
320
+ "h.9.mlp.c_fc.bias": {
321
  "type": "Distributed",
322
  "shape": [
323
+ 3072
324
  ],
325
  "dtype": "F32",
326
  "chunks": [
 
329
  0
330
  ],
331
  "shape": [
332
+ 1536
333
  ],
334
  "filename_index": 0
335
  },
336
  {
337
  "offsets": [
338
+ 1536
339
  ],
340
  "shape": [
341
+ 1536
342
  ],
343
  "filename_index": 1
344
  }
345
  ]
346
  },
347
+ "h.3.mlp.c_proj.weight": {
348
  "type": "Distributed",
349
  "shape": [
350
+ 3072,
351
  768
352
  ],
353
  "dtype": "F32",
354
  "chunks": [
355
  {
356
  "offsets": [
357
+ 0,
358
  0
359
  ],
360
  "shape": [
361
+ 1536,
362
+ 768
363
  ],
364
  "filename_index": 0
365
  },
366
  {
367
  "offsets": [
368
+ 1536,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  0
370
  ],
371
  "shape": [
372
+ 1536,
373
+ 768
 
 
 
 
 
 
 
 
374
  ],
375
  "filename_index": 1
376
  }
377
  ]
378
  },
379
+ "h.7.attn.c_attn.weight": {
380
  "type": "Distributed",
381
  "shape": [
382
  768,
383
+ 2304
384
  ],
385
  "dtype": "F32",
386
  "chunks": [
 
391
  ],
392
  "shape": [
393
  768,
394
+ 1152
395
  ],
396
  "filename_index": 0
397
  },
398
  {
399
  "offsets": [
400
  0,
401
+ 1152
402
  ],
403
  "shape": [
404
  768,
405
+ 1152
406
  ],
407
  "filename_index": 1
408
  }
409
  ]
410
  },
411
+ "h.9.ln_2.weight": {
412
  "type": "Distributed",
413
  "shape": [
 
414
  768
415
  ],
416
  "dtype": "F32",
417
  "chunks": [
418
  {
419
  "offsets": [
 
420
  0
421
  ],
422
  "shape": [
423
+ 384
 
424
  ],
425
  "filename_index": 0
426
  },
427
  {
428
  "offsets": [
429
+ 384
 
430
  ],
431
  "shape": [
432
+ 384
 
433
  ],
434
  "filename_index": 1
435
  }
436
  ]
437
  },
438
+ "h.3.ln_2.weight": {
439
  "type": "Distributed",
440
  "shape": [
441
  768
 
462
  }
463
  ]
464
  },
465
+ "h.5.mlp.c_fc.bias": {
466
  "type": "Distributed",
467
  "shape": [
 
468
  3072
469
  ],
470
  "dtype": "F32",
471
  "chunks": [
472
  {
473
  "offsets": [
 
474
  0
475
  ],
476
  "shape": [
 
477
  1536
478
  ],
479
  "filename_index": 0
480
  },
481
  {
482
  "offsets": [
 
483
  1536
484
  ],
485
  "shape": [
 
486
  1536
487
  ],
488
  "filename_index": 1
489
  }
490
  ]
491
  },
492
+ "h.6.ln_2.bias": {
493
  "type": "Distributed",
494
  "shape": [
495
+ 768
496
  ],
497
  "dtype": "F32",
498
  "chunks": [
 
501
  0
502
  ],
503
  "shape": [
504
+ 384
505
  ],
506
  "filename_index": 0
507
  },
508
  {
509
  "offsets": [
510
+ 384
511
  ],
512
  "shape": [
513
+ 384
514
  ],
515
  "filename_index": 1
516
  }
517
  ]
518
  },
519
+ "h.4.ln_1.weight": {
520
  "type": "Distributed",
521
  "shape": [
522
  768
 
543
  }
544
  ]
545
  },
546
+ "h.3.attn.c_proj.weight": {
547
  "type": "Distributed",
548
  "shape": [
549
  768,
550
+ 768
551
  ],
552
  "dtype": "F32",
553
  "chunks": [
 
557
  0
558
  ],
559
  "shape": [
560
+ 384,
561
+ 768
562
  ],
563
  "filename_index": 0
564
  },
565
  {
566
  "offsets": [
567
+ 384,
568
+ 0
569
  ],
570
  "shape": [
571
+ 384,
572
+ 768
573
  ],
574
  "filename_index": 1
575
  }
576
  ]
577
  },
578
+ "h.1.ln_1.weight": {
579
  "type": "Distributed",
580
  "shape": [
581
  768
 
602
  }
603
  ]
604
  },
605
+ "h.6.ln_1.bias": {
606
  "type": "Distributed",
607
  "shape": [
608
  768
 
629
  }
630
  ]
631
  },
632
+ "h.3.attn.c_attn.bias": {
633
  "type": "Distributed",
634
  "shape": [
635
+ 2304
 
636
  ],
637
  "dtype": "F32",
638
  "chunks": [
639
  {
640
  "offsets": [
 
641
  0
642
  ],
643
  "shape": [
644
+ 1152
 
645
  ],
646
  "filename_index": 0
647
  },
648
  {
649
  "offsets": [
650
+ 1152
 
651
  ],
652
  "shape": [
653
+ 1152
 
654
  ],
655
  "filename_index": 1
656
  }
657
  ]
658
  },
659
+ "ln_f.weight": {
660
  "type": "Distributed",
661
  "shape": [
662
  768
 
683
  }
684
  ]
685
  },
686
+ "h.10.mlp.c_fc.bias": {
687
  "type": "Distributed",
688
  "shape": [
689
+ 3072
690
  ],
691
  "dtype": "F32",
692
  "chunks": [
 
695
  0
696
  ],
697
  "shape": [
698
+ 1536
699
  ],
700
  "filename_index": 0
701
  },
702
  {
703
  "offsets": [
704
+ 1536
705
  ],
706
  "shape": [
707
+ 1536
708
  ],
709
  "filename_index": 1
710
  }
711
  ]
712
  },
713
+ "h.6.mlp.c_fc.bias": {
714
  "type": "Distributed",
715
  "shape": [
716
  3072
 
737
  }
738
  ]
739
  },
740
+ "h.11.mlp.c_fc.bias": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  "type": "Distributed",
742
  "shape": [
 
743
  3072
744
  ],
745
  "dtype": "F32",
746
  "chunks": [
747
  {
748
  "offsets": [
 
749
  0
750
  ],
751
  "shape": [
 
752
  1536
753
  ],
754
  "filename_index": 0
755
  },
756
  {
757
  "offsets": [
 
758
  1536
759
  ],
760
  "shape": [
 
761
  1536
762
  ],
763
  "filename_index": 1
764
  }
765
  ]
766
  },
767
+ "h.5.attn.c_attn.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768
  "type": "Distributed",
769
  "shape": [
770
  768,
 
796
  }
797
  ]
798
  },
799
+ "h.1.attn.bias": {
800
  "type": "Distributed",
801
  "shape": [
802
+ 1,
803
+ 1,
804
+ 1024,
805
+ 1024
806
  ],
807
  "dtype": "F32",
808
  "chunks": [
809
  {
810
  "offsets": [
811
+ 0,
812
+ 0,
813
+ 0,
814
  0
815
  ],
816
  "shape": [
817
+ 1,
818
+ 1,
819
+ 1024,
820
+ 512
821
  ],
822
  "filename_index": 0
823
  },
824
  {
825
  "offsets": [
826
+ 0,
827
+ 0,
828
+ 0,
829
+ 512
830
  ],
831
  "shape": [
832
+ 1,
833
+ 1,
834
+ 1024,
835
+ 512
836
  ],
837
  "filename_index": 1
838
  }
839
  ]
840
  },
841
+ "h.3.attn.c_proj.bias": {
842
  "type": "Distributed",
843
  "shape": [
 
844
  768
845
  ],
846
  "dtype": "F32",
847
  "chunks": [
848
  {
849
  "offsets": [
 
850
  0
851
  ],
852
  "shape": [
853
+ 384
 
854
  ],
855
  "filename_index": 0
856
  },
857
  {
858
  "offsets": [
859
+ 384
 
860
  ],
861
  "shape": [
862
+ 384
 
863
  ],
864
  "filename_index": 1
865
  }
866
  ]
867
  },
868
+ "h.9.attn.bias": {
869
  "type": "Distributed",
870
  "shape": [
871
  1,
 
907
  }
908
  ]
909
  },
910
+ "h.4.ln_2.bias": {
911
  "type": "Distributed",
912
  "shape": [
913
  768
 
934
  }
935
  ]
936
  },
937
+ "h.5.mlp.c_proj.bias": {
938
  "type": "Distributed",
939
  "shape": [
940
  768
 
961
  }
962
  ]
963
  },
964
+ "h.7.mlp.c_proj.weight": {
965
  "type": "Distributed",
966
  "shape": [
967
+ 3072,
968
+ 768
 
 
969
  ],
970
  "dtype": "F32",
971
  "chunks": [
972
  {
973
  "offsets": [
 
 
974
  0,
975
  0
976
  ],
977
  "shape": [
978
+ 1536,
979
+ 768
 
 
980
  ],
981
  "filename_index": 0
982
  },
983
  {
984
  "offsets": [
985
+ 1536,
986
+ 0
 
 
987
  ],
988
  "shape": [
989
+ 1536,
990
+ 768
 
 
991
  ],
992
  "filename_index": 1
993
  }
994
  ]
995
  },
996
+ "h.5.attn.c_proj.bias": {
997
  "type": "Distributed",
998
  "shape": [
999
  768
 
1020
  }
1021
  ]
1022
  },
1023
+ "h.0.attn.c_proj.weight": {
1024
  "type": "Distributed",
1025
  "shape": [
1026
+ 768,
1027
  768
1028
  ],
1029
  "dtype": "F32",
1030
  "chunks": [
1031
  {
1032
  "offsets": [
1033
+ 0,
1034
  0
1035
  ],
1036
  "shape": [
1037
+ 384,
1038
+ 768
1039
  ],
1040
  "filename_index": 0
1041
  },
1042
  {
1043
  "offsets": [
1044
+ 384,
1045
+ 0
1046
  ],
1047
  "shape": [
1048
+ 384,
1049
+ 768
1050
  ],
1051
  "filename_index": 1
1052
  }
1053
  ]
1054
  },
1055
+ "h.9.mlp.c_fc.weight": {
1056
  "type": "Distributed",
1057
  "shape": [
1058
  768,
1059
+ 3072
1060
  ],
1061
  "dtype": "F32",
1062
  "chunks": [
 
1066
  0
1067
  ],
1068
  "shape": [
1069
+ 768,
1070
+ 1536
1071
  ],
1072
  "filename_index": 0
1073
  },
1074
  {
1075
  "offsets": [
1076
+ 0,
1077
+ 1536
1078
+ ],
1079
+ "shape": [
1080
+ 768,
1081
+ 1536
1082
+ ],
1083
+ "filename_index": 1
1084
+ }
1085
+ ]
1086
+ },
1087
+ "h.9.ln_1.bias": {
1088
+ "type": "Distributed",
1089
+ "shape": [
1090
+ 768
1091
+ ],
1092
+ "dtype": "F32",
1093
+ "chunks": [
1094
+ {
1095
+ "offsets": [
1096
  0
1097
  ],
1098
  "shape": [
1099
+ 384
1100
+ ],
1101
+ "filename_index": 0
1102
+ },
1103
+ {
1104
+ "offsets": [
1105
+ 384
1106
+ ],
1107
+ "shape": [
1108
+ 384
1109
  ],
1110
  "filename_index": 1
1111
  }
1112
  ]
1113
  },
1114
+ "h.6.attn.c_attn.weight": {
1115
  "type": "Distributed",
1116
  "shape": [
1117
+ 768,
1118
  2304
1119
  ],
1120
  "dtype": "F32",
1121
  "chunks": [
1122
  {
1123
  "offsets": [
1124
+ 0,
1125
  0
1126
  ],
1127
  "shape": [
1128
+ 768,
1129
  1152
1130
  ],
1131
  "filename_index": 0
1132
  },
1133
  {
1134
  "offsets": [
1135
+ 0,
1136
  1152
1137
  ],
1138
  "shape": [
1139
+ 768,
1140
  1152
1141
  ],
1142
  "filename_index": 1
1143
  }
1144
  ]
1145
  },
1146
+ "h.11.mlp.c_proj.bias": {
1147
  "type": "Distributed",
1148
  "shape": [
1149
  768
 
1197
  }
1198
  ]
1199
  },
1200
+ "h.7.attn.c_attn.bias": {
1201
  "type": "Distributed",
1202
  "shape": [
1203
+ 2304
1204
  ],
1205
  "dtype": "F32",
1206
  "chunks": [
 
1209
  0
1210
  ],
1211
  "shape": [
1212
+ 1152
1213
  ],
1214
  "filename_index": 0
1215
  },
1216
  {
1217
  "offsets": [
1218
+ 1152
1219
  ],
1220
  "shape": [
1221
+ 1152
1222
  ],
1223
  "filename_index": 1
1224
  }
1225
  ]
1226
  },
1227
+ "h.4.ln_1.bias": {
1228
  "type": "Distributed",
1229
  "shape": [
 
1230
  768
1231
  ],
1232
  "dtype": "F32",
1233
  "chunks": [
1234
  {
1235
  "offsets": [
 
1236
  0
1237
  ],
1238
  "shape": [
1239
+ 384
 
1240
  ],
1241
  "filename_index": 0
1242
  },
1243
  {
1244
  "offsets": [
1245
+ 384
 
1246
  ],
1247
  "shape": [
1248
+ 384
 
1249
  ],
1250
  "filename_index": 1
1251
  }
1252
  ]
1253
  },
1254
+ "h.9.mlp.c_proj.bias": {
1255
  "type": "Distributed",
1256
  "shape": [
1257
+ 768
 
1258
  ],
1259
  "dtype": "F32",
1260
  "chunks": [
1261
  {
1262
  "offsets": [
 
1263
  0
1264
  ],
1265
  "shape": [
1266
+ 384
 
1267
  ],
1268
  "filename_index": 0
1269
  },
1270
  {
1271
  "offsets": [
1272
+ 384
 
1273
  ],
1274
  "shape": [
1275
+ 384
 
1276
  ],
1277
  "filename_index": 1
1278
  }
1279
  ]
1280
  },
1281
+ "h.4.mlp.c_proj.weight": {
1282
  "type": "Distributed",
1283
  "shape": [
1284
  3072,
 
1310
  }
1311
  ]
1312
  },
1313
+ "h.3.attn.c_attn.weight": {
1314
  "type": "Distributed",
1315
  "shape": [
1316
+ 768,
1317
  2304
1318
  ],
1319
  "dtype": "F32",
1320
  "chunks": [
1321
  {
1322
  "offsets": [
1323
+ 0,
1324
  0
1325
  ],
1326
  "shape": [
1327
+ 768,
1328
  1152
1329
  ],
1330
  "filename_index": 0
1331
  },
1332
  {
1333
  "offsets": [
1334
+ 0,
1335
  1152
1336
  ],
1337
  "shape": [
1338
+ 768,
1339
  1152
1340
  ],
1341
  "filename_index": 1
1342
  }
1343
  ]
1344
  },
1345
+ "h.2.ln_2.bias": {
1346
  "type": "Distributed",
1347
  "shape": [
1348
  768
 
1369
  }
1370
  ]
1371
  },
1372
+ "h.6.attn.c_proj.weight": {
1373
  "type": "Distributed",
1374
  "shape": [
1375
  768,
1376
+ 768
1377
  ],
1378
  "dtype": "F32",
1379
  "chunks": [
 
1383
  0
1384
  ],
1385
  "shape": [
1386
+ 384,
1387
+ 768
1388
  ],
1389
  "filename_index": 0
1390
  },
1391
  {
1392
  "offsets": [
1393
+ 384,
1394
+ 0
1395
  ],
1396
  "shape": [
1397
+ 384,
1398
+ 768
1399
  ],
1400
  "filename_index": 1
1401
  }
1402
  ]
1403
  },
1404
+ "h.8.mlp.c_fc.bias": {
1405
  "type": "Distributed",
1406
  "shape": [
1407
+ 3072
1408
  ],
1409
  "dtype": "F32",
1410
  "chunks": [
 
1413
  0
1414
  ],
1415
  "shape": [
1416
+ 1536
1417
  ],
1418
  "filename_index": 0
1419
  },
1420
  {
1421
  "offsets": [
1422
+ 1536
1423
  ],
1424
  "shape": [
1425
+ 1536
1426
  ],
1427
  "filename_index": 1
1428
  }
1429
  ]
1430
  },
1431
+ "h.2.mlp.c_fc.bias": {
1432
  "type": "Distributed",
1433
  "shape": [
1434
+ 3072
 
1435
  ],
1436
  "dtype": "F32",
1437
  "chunks": [
1438
  {
1439
  "offsets": [
 
1440
  0
1441
  ],
1442
  "shape": [
1443
+ 1536
 
1444
  ],
1445
  "filename_index": 0
1446
  },
1447
  {
1448
  "offsets": [
1449
+ 1536
 
1450
  ],
1451
  "shape": [
1452
+ 1536
 
1453
  ],
1454
  "filename_index": 1
1455
  }
1456
  ]
1457
  },
1458
+ "h.11.attn.c_proj.bias": {
1459
  "type": "Distributed",
1460
  "shape": [
1461
  768
 
1482
  }
1483
  ]
1484
  },
1485
+ "h.11.attn.c_attn.weight": {
1486
  "type": "Distributed",
1487
  "shape": [
1488
+ 768,
1489
+ 2304
1490
  ],
1491
  "dtype": "F32",
1492
  "chunks": [
1493
  {
1494
  "offsets": [
1495
+ 0,
1496
  0
1497
  ],
1498
  "shape": [
1499
+ 768,
1500
+ 1152
1501
  ],
1502
  "filename_index": 0
1503
  },
1504
  {
1505
  "offsets": [
1506
+ 0,
1507
+ 1152
1508
  ],
1509
  "shape": [
1510
+ 768,
1511
+ 1152
1512
  ],
1513
  "filename_index": 1
1514
  }
1515
  ]
1516
  },
1517
+ "h.0.attn.c_attn.weight": {
1518
  "type": "Distributed",
1519
  "shape": [
1520
+ 768,
1521
+ 2304
1522
  ],
1523
  "dtype": "F32",
1524
  "chunks": [
1525
  {
1526
  "offsets": [
1527
+ 0,
1528
  0
1529
  ],
1530
  "shape": [
1531
+ 768,
1532
+ 1152
1533
  ],
1534
  "filename_index": 0
1535
  },
1536
  {
1537
  "offsets": [
1538
+ 0,
1539
+ 1152
1540
  ],
1541
  "shape": [
1542
+ 768,
1543
+ 1152
1544
  ],
1545
  "filename_index": 1
1546
  }
1547
  ]
1548
  },
1549
+ "h.2.attn.bias": {
1550
  "type": "Distributed",
1551
  "shape": [
1552
+ 1,
1553
+ 1,
1554
+ 1024,
1555
+ 1024
1556
  ],
1557
  "dtype": "F32",
1558
  "chunks": [
1559
  {
1560
  "offsets": [
1561
+ 0,
1562
+ 0,
1563
+ 0,
1564
  0
1565
  ],
1566
  "shape": [
1567
+ 1,
1568
+ 1,
1569
+ 1024,
1570
+ 512
1571
  ],
1572
  "filename_index": 0
1573
  },
1574
  {
1575
  "offsets": [
1576
+ 0,
1577
+ 0,
1578
+ 0,
1579
+ 512
1580
  ],
1581
  "shape": [
1582
+ 1,
1583
+ 1,
1584
+ 1024,
1585
+ 512
1586
  ],
1587
  "filename_index": 1
1588
  }
1589
  ]
1590
  },
1591
+ "wte.weight": {
1592
  "type": "Distributed",
1593
  "shape": [
1594
+ 50257,
1595
  768
1596
  ],
1597
  "dtype": "F32",
 
1602
  0
1603
  ],
1604
  "shape": [
1605
+ 50257,
1606
+ 384
1607
  ],
1608
  "filename_index": 0
1609
  },
1610
  {
1611
  "offsets": [
1612
+ 0,
1613
+ 384
1614
  ],
1615
  "shape": [
1616
+ 50257,
1617
+ 384
1618
  ],
1619
  "filename_index": 1
1620
  }
1621
  ]
1622
  },
1623
+ "h.11.ln_2.weight": {
1624
  "type": "Distributed",
1625
  "shape": [
1626
  768
 
1647
  }
1648
  ]
1649
  },
1650
+ "h.6.attn.c_attn.bias": {
1651
  "type": "Distributed",
1652
  "shape": [
1653
+ 2304
1654
  ],
1655
  "dtype": "F32",
1656
  "chunks": [
 
1659
  0
1660
  ],
1661
  "shape": [
1662
+ 1152
1663
  ],
1664
  "filename_index": 0
1665
  },
1666
  {
1667
  "offsets": [
1668
+ 1152
1669
  ],
1670
  "shape": [
1671
+ 1152
1672
  ],
1673
  "filename_index": 1
1674
  }
1675
  ]
1676
  },
1677
+ "h.1.ln_2.weight": {
1678
  "type": "Distributed",
1679
  "shape": [
1680
  768
 
1701
  }
1702
  ]
1703
  },
1704
+ "ln_f.bias": {
1705
  "type": "Distributed",
1706
  "shape": [
 
1707
  768
1708
  ],
1709
  "dtype": "F32",
1710
  "chunks": [
1711
  {
1712
  "offsets": [
 
1713
  0
1714
  ],
1715
  "shape": [
1716
+ 384
 
1717
  ],
1718
  "filename_index": 0
1719
  },
1720
  {
1721
  "offsets": [
1722
+ 384
 
1723
  ],
1724
  "shape": [
1725
+ 384
 
1726
  ],
1727
  "filename_index": 1
1728
  }
1729
  ]
1730
  },
1731
+ "h.10.attn.c_attn.bias": {
1732
  "type": "Distributed",
1733
  "shape": [
1734
+ 2304
1735
  ],
1736
  "dtype": "F32",
1737
  "chunks": [
 
1740
  0
1741
  ],
1742
  "shape": [
1743
+ 1152
1744
  ],
1745
  "filename_index": 0
1746
  },
1747
  {
1748
  "offsets": [
1749
+ 1152
1750
  ],
1751
  "shape": [
1752
+ 1152
1753
  ],
1754
  "filename_index": 1
1755
  }
1756
  ]
1757
  },
1758
+ "h.0.ln_1.bias": {
1759
  "type": "Distributed",
1760
  "shape": [
1761
  768
 
1782
  }
1783
  ]
1784
  },
1785
+ "h.10.mlp.c_proj.bias": {
1786
  "type": "Distributed",
1787
  "shape": [
1788
  768
 
1809
  }
1810
  ]
1811
  },
1812
+ "h.8.attn.c_proj.weight": {
1813
  "type": "Distributed",
1814
  "shape": [
1815
+ 768,
1816
  768
1817
  ],
1818
  "dtype": "F32",
1819
  "chunks": [
1820
  {
1821
  "offsets": [
1822
+ 0,
1823
  0
1824
  ],
1825
  "shape": [
1826
+ 384,
1827
+ 768
1828
  ],
1829
  "filename_index": 0
1830
  },
1831
  {
1832
  "offsets": [
1833
+ 384,
1834
+ 0
1835
  ],
1836
  "shape": [
1837
+ 384,
1838
+ 768
1839
  ],
1840
  "filename_index": 1
1841
  }
1842
  ]
1843
  },
1844
+ "h.10.ln_2.weight": {
1845
  "type": "Distributed",
1846
  "shape": [
1847
  768
 
1868
  }
1869
  ]
1870
  },
1871
+ "h.9.attn.c_proj.weight": {
1872
  "type": "Distributed",
1873
  "shape": [
1874
+ 768,
1875
  768
1876
  ],
1877
  "dtype": "F32",
 
1882
  0
1883
  ],
1884
  "shape": [
1885
+ 384,
1886
  768
1887
  ],
1888
  "filename_index": 0
1889
  },
1890
  {
1891
  "offsets": [
1892
+ 384,
1893
  0
1894
  ],
1895
  "shape": [
1896
+ 384,
1897
  768
1898
  ],
1899
  "filename_index": 1
1900
  }
1901
  ]
1902
  },
1903
+ "h.0.ln_2.bias": {
1904
  "type": "Distributed",
1905
  "shape": [
1906
  768
 
1927
  }
1928
  ]
1929
  },
1930
+ "h.10.ln_2.bias": {
1931
  "type": "Distributed",
1932
  "shape": [
1933
+ 768
1934
  ],
1935
  "dtype": "F32",
1936
  "chunks": [
 
1939
  0
1940
  ],
1941
  "shape": [
1942
+ 384
1943
  ],
1944
  "filename_index": 0
1945
  },
1946
  {
1947
  "offsets": [
1948
+ 384
1949
  ],
1950
  "shape": [
1951
+ 384
1952
  ],
1953
  "filename_index": 1
1954
  }
1955
  ]
1956
  },
1957
+ "h.1.mlp.c_proj.bias": {
1958
  "type": "Distributed",
1959
  "shape": [
1960
  768
 
1981
  }
1982
  ]
1983
  },
1984
+ "h.7.ln_2.bias": {
1985
  "type": "Distributed",
1986
  "shape": [
1987
+ 768
1988
  ],
1989
  "dtype": "F32",
1990
  "chunks": [
 
1993
  0
1994
  ],
1995
  "shape": [
1996
+ 384
1997
  ],
1998
  "filename_index": 0
1999
  },
2000
  {
2001
  "offsets": [
2002
+ 384
2003
  ],
2004
  "shape": [
2005
+ 384
2006
  ],
2007
  "filename_index": 1
2008
  }
2009
  ]
2010
  },
2011
+ "h.7.ln_1.weight": {
2012
  "type": "Distributed",
2013
  "shape": [
2014
+ 768
 
 
 
2015
  ],
2016
  "dtype": "F32",
2017
  "chunks": [
2018
  {
2019
  "offsets": [
 
 
 
2020
  0
2021
  ],
2022
  "shape": [
2023
+ 384
 
 
 
2024
  ],
2025
  "filename_index": 0
2026
  },
2027
  {
2028
  "offsets": [
2029
+ 384
 
 
 
2030
  ],
2031
  "shape": [
2032
+ 384
 
 
 
2033
  ],
2034
  "filename_index": 1
2035
  }
2036
  ]
2037
  },
2038
+ "h.1.mlp.c_proj.weight": {
2039
  "type": "Distributed",
2040
  "shape": [
2041
+ 3072,
2042
  768
2043
  ],
2044
  "dtype": "F32",
2045
  "chunks": [
2046
  {
2047
  "offsets": [
2048
+ 0,
2049
  0
2050
  ],
2051
  "shape": [
2052
+ 1536,
2053
+ 768
2054
  ],
2055
  "filename_index": 0
2056
  },
2057
  {
2058
  "offsets": [
2059
+ 1536,
2060
+ 0
2061
  ],
2062
  "shape": [
2063
+ 1536,
2064
+ 768
2065
  ],
2066
  "filename_index": 1
2067
  }
2068
  ]
2069
  },
2070
+ "h.3.ln_2.bias": {
2071
  "type": "Distributed",
2072
  "shape": [
2073
  768
 
2094
  }
2095
  ]
2096
  },
2097
+ "h.4.mlp.c_fc.bias": {
2098
  "type": "Distributed",
2099
  "shape": [
2100
+ 3072
2101
  ],
2102
  "dtype": "F32",
2103
  "chunks": [
 
2106
  0
2107
  ],
2108
  "shape": [
2109
+ 1536
2110
  ],
2111
  "filename_index": 0
2112
  },
2113
  {
2114
  "offsets": [
2115
+ 1536
2116
  ],
2117
  "shape": [
2118
+ 1536
2119
  ],
2120
  "filename_index": 1
2121
  }
2122
  ]
2123
  },
2124
+ "h.8.ln_2.weight": {
2125
  "type": "Distributed",
2126
  "shape": [
 
2127
  768
2128
  ],
2129
  "dtype": "F32",
2130
  "chunks": [
2131
  {
2132
  "offsets": [
 
2133
  0
2134
  ],
2135
  "shape": [
2136
+ 384
 
2137
  ],
2138
  "filename_index": 0
2139
  },
2140
  {
2141
  "offsets": [
2142
+ 384
 
2143
  ],
2144
  "shape": [
2145
+ 384
 
2146
  ],
2147
  "filename_index": 1
2148
  }
2149
  ]
2150
  },
2151
+ "h.1.attn.c_proj.weight": {
2152
  "type": "Distributed",
2153
  "shape": [
2154
  768,
2155
+ 768
2156
  ],
2157
  "dtype": "F32",
2158
  "chunks": [
 
2162
  0
2163
  ],
2164
  "shape": [
2165
+ 384,
2166
+ 768
2167
  ],
2168
  "filename_index": 0
2169
  },
2170
  {
2171
  "offsets": [
2172
+ 384,
2173
+ 0
2174
  ],
2175
  "shape": [
2176
+ 384,
2177
+ 768
2178
  ],
2179
  "filename_index": 1
2180
  }
2181
  ]
2182
  },
2183
+ "h.6.ln_1.weight": {
2184
  "type": "Distributed",
2185
  "shape": [
2186
  768
 
2207
  }
2208
  ]
2209
  },
2210
+ "h.1.attn.c_attn.bias": {
2211
  "type": "Distributed",
2212
  "shape": [
2213
+ 2304
 
2214
  ],
2215
  "dtype": "F32",
2216
  "chunks": [
2217
  {
2218
  "offsets": [
 
2219
  0
2220
  ],
2221
  "shape": [
2222
+ 1152
 
2223
  ],
2224
  "filename_index": 0
2225
  },
2226
  {
2227
  "offsets": [
2228
+ 1152
 
2229
  ],
2230
  "shape": [
2231
+ 1152
 
2232
  ],
2233
  "filename_index": 1
2234
  }
2235
  ]
2236
  },
2237
+ "h.3.mlp.c_fc.bias": {
2238
  "type": "Distributed",
2239
  "shape": [
2240
+ 3072
2241
  ],
2242
  "dtype": "F32",
2243
  "chunks": [
 
2246
  0
2247
  ],
2248
  "shape": [
2249
+ 1536
2250
  ],
2251
  "filename_index": 0
2252
  },
2253
  {
2254
  "offsets": [
2255
+ 1536
2256
  ],
2257
  "shape": [
2258
+ 1536
2259
  ],
2260
  "filename_index": 1
2261
  }
2262
  ]
2263
  },
2264
+ "h.1.ln_1.bias": {
2265
  "type": "Distributed",
2266
  "shape": [
2267
+ 768
2268
  ],
2269
  "dtype": "F32",
2270
  "chunks": [
 
2273
  0
2274
  ],
2275
  "shape": [
2276
+ 384
2277
  ],
2278
  "filename_index": 0
2279
  },
2280
  {
2281
  "offsets": [
2282
+ 384
2283
  ],
2284
  "shape": [
2285
+ 384
2286
  ],
2287
  "filename_index": 1
2288
  }
2289
  ]
2290
  },
2291
+ "h.9.ln_2.bias": {
2292
  "type": "Distributed",
2293
  "shape": [
2294
+ 768
 
2295
  ],
2296
  "dtype": "F32",
2297
  "chunks": [
2298
  {
2299
  "offsets": [
 
2300
  0
2301
  ],
2302
  "shape": [
2303
+ 384
 
2304
  ],
2305
  "filename_index": 0
2306
  },
2307
  {
2308
  "offsets": [
2309
+ 384
 
2310
  ],
2311
  "shape": [
2312
+ 384
 
2313
  ],
2314
  "filename_index": 1
2315
  }
2316
  ]
2317
  },
2318
+ "h.8.ln_1.weight": {
2319
  "type": "Distributed",
2320
  "shape": [
 
2321
  768
2322
  ],
2323
  "dtype": "F32",
2324
  "chunks": [
2325
  {
2326
  "offsets": [
 
2327
  0
2328
  ],
2329
  "shape": [
 
2330
  384
2331
  ],
2332
  "filename_index": 0
2333
  },
2334
  {
2335
  "offsets": [
 
2336
  384
2337
  ],
2338
  "shape": [
 
2339
  384
2340
  ],
2341
  "filename_index": 1
2342
  }
2343
  ]
2344
  },
2345
+ "h.2.attn.c_proj.weight": {
2346
  "type": "Distributed",
2347
  "shape": [
2348
+ 768,
2349
  768
2350
  ],
2351
  "dtype": "F32",
2352
  "chunks": [
2353
  {
2354
  "offsets": [
2355
+ 0,
2356
  0
2357
  ],
2358
  "shape": [
2359
+ 384,
2360
+ 768
2361
  ],
2362
  "filename_index": 0
2363
  },
2364
  {
2365
  "offsets": [
2366
+ 384,
2367
+ 0
2368
  ],
2369
  "shape": [
2370
+ 384,
2371
+ 768
2372
  ],
2373
  "filename_index": 1
2374
  }
2375
  ]
2376
  },
2377
+ "h.8.attn.bias": {
2378
  "type": "Distributed",
2379
  "shape": [
2380
+ 1,
2381
+ 1,
2382
+ 1024,
2383
+ 1024
2384
  ],
2385
  "dtype": "F32",
2386
  "chunks": [
2387
  {
2388
  "offsets": [
2389
+ 0,
2390
+ 0,
2391
  0,
2392
  0
2393
  ],
2394
  "shape": [
2395
+ 1,
2396
+ 1,
2397
+ 1024,
2398
+ 512
2399
  ],
2400
  "filename_index": 0
2401
  },
2402
  {
2403
  "offsets": [
2404
+ 0,
2405
+ 0,
2406
+ 0,
2407
+ 512
2408
  ],
2409
  "shape": [
2410
+ 1,
2411
+ 1,
2412
+ 1024,
2413
+ 512
2414
  ],
2415
  "filename_index": 1
2416
  }
2417
  ]
2418
  },
2419
+ "h.1.attn.c_attn.weight": {
2420
  "type": "Distributed",
2421
  "shape": [
2422
+ 768,
2423
+ 2304
2424
  ],
2425
  "dtype": "F32",
2426
  "chunks": [
2427
  {
2428
  "offsets": [
2429
+ 0,
2430
  0
2431
  ],
2432
  "shape": [
2433
+ 768,
2434
+ 1152
2435
  ],
2436
  "filename_index": 0
2437
  },
2438
  {
2439
  "offsets": [
2440
+ 0,
2441
+ 1152
2442
  ],
2443
  "shape": [
2444
+ 768,
2445
+ 1152
2446
  ],
2447
  "filename_index": 1
2448
  }
2449
  ]
2450
  },
2451
+ "h.0.ln_1.weight": {
2452
  "type": "Distributed",
2453
  "shape": [
2454
+ 768
 
 
 
2455
  ],
2456
  "dtype": "F32",
2457
  "chunks": [
2458
  {
2459
  "offsets": [
 
 
 
2460
  0
2461
  ],
2462
  "shape": [
2463
+ 384
 
 
 
2464
  ],
2465
  "filename_index": 0
2466
  },
2467
  {
2468
  "offsets": [
2469
+ 384
 
 
 
2470
  ],
2471
  "shape": [
2472
+ 384
 
 
 
2473
  ],
2474
  "filename_index": 1
2475
  }
2476
  ]
2477
  },
2478
+ "h.7.attn.c_proj.bias": {
2479
  "type": "Distributed",
2480
  "shape": [
2481
+ 768
2482
  ],
2483
  "dtype": "F32",
2484
  "chunks": [
 
2487
  0
2488
  ],
2489
  "shape": [
2490
+ 384
2491
  ],
2492
  "filename_index": 0
2493
  },
2494
  {
2495
  "offsets": [
2496
+ 384
2497
  ],
2498
  "shape": [
2499
+ 384
2500
  ],
2501
  "filename_index": 1
2502
  }
2503
  ]
2504
  },
2505
+ "h.10.mlp.c_fc.weight": {
2506
  "type": "Distributed",
2507
  "shape": [
2508
  768,
2509
+ 3072
2510
  ],
2511
  "dtype": "F32",
2512
  "chunks": [
 
2517
  ],
2518
  "shape": [
2519
  768,
2520
+ 1536
2521
  ],
2522
  "filename_index": 0
2523
  },
2524
  {
2525
  "offsets": [
2526
  0,
2527
+ 1536
2528
  ],
2529
  "shape": [
2530
  768,
2531
+ 1536
2532
  ],
2533
  "filename_index": 1
2534
  }
2535
  ]
2536
  },
2537
+ "h.2.mlp.c_proj.weight": {
2538
  "type": "Distributed",
2539
  "shape": [
2540
+ 3072,
2541
  768
2542
  ],
2543
  "dtype": "F32",
2544
  "chunks": [
2545
  {
2546
  "offsets": [
2547
+ 0,
2548
  0
2549
  ],
2550
  "shape": [
2551
+ 1536,
2552
+ 768
2553
  ],
2554
  "filename_index": 0
2555
  },
2556
  {
2557
  "offsets": [
2558
+ 1536,
2559
+ 0
2560
  ],
2561
  "shape": [
2562
+ 1536,
2563
+ 768
2564
  ],
2565
  "filename_index": 1
2566
  }
2567
  ]
2568
  },
2569
+ "h.7.attn.bias": {
2570
  "type": "Distributed",
2571
  "shape": [
2572
  1,
 
2608
  }
2609
  ]
2610
  },
2611
+ "h.2.attn.c_attn.weight": {
2612
  "type": "Distributed",
2613
  "shape": [
2614
  768,
 
2640
  }
2641
  ]
2642
  },
2643
+ "h.7.attn.c_proj.weight": {
2644
  "type": "Distributed",
2645
  "shape": [
2646
+ 768,
2647
  768
2648
  ],
2649
  "dtype": "F32",
2650
  "chunks": [
2651
  {
2652
  "offsets": [
2653
+ 0,
2654
  0
2655
  ],
2656
  "shape": [
2657
+ 384,
2658
+ 768
2659
  ],
2660
  "filename_index": 0
2661
  },
2662
  {
2663
  "offsets": [
2664
+ 384,
2665
+ 0
2666
  ],
2667
  "shape": [
2668
+ 384,
2669
+ 768
2670
  ],
2671
  "filename_index": 1
2672
  }
2673
  ]
2674
  },
2675
+ "h.2.mlp.c_fc.weight": {
2676
  "type": "Distributed",
2677
  "shape": [
2678
+ 768,
2679
+ 3072
2680
  ],
2681
  "dtype": "F32",
2682
  "chunks": [
2683
  {
2684
  "offsets": [
2685
+ 0,
2686
  0
2687
  ],
2688
  "shape": [
2689
+ 768,
2690
+ 1536
2691
  ],
2692
  "filename_index": 0
2693
  },
2694
  {
2695
  "offsets": [
2696
+ 0,
2697
+ 1536
2698
  ],
2699
  "shape": [
2700
+ 768,
2701
+ 1536
2702
  ],
2703
  "filename_index": 1
2704
  }
2705
  ]
2706
  },
2707
+ "h.9.mlp.c_proj.weight": {
2708
  "type": "Distributed",
2709
  "shape": [
2710
+ 3072,
2711
  768
2712
  ],
2713
  "dtype": "F32",
2714
  "chunks": [
2715
  {
2716
  "offsets": [
2717
+ 0,
2718
  0
2719
  ],
2720
  "shape": [
2721
+ 1536,
2722
+ 768
2723
  ],
2724
  "filename_index": 0
2725
  },
2726
  {
2727
  "offsets": [
2728
+ 1536,
2729
+ 0
2730
  ],
2731
  "shape": [
2732
+ 1536,
2733
+ 768
2734
  ],
2735
  "filename_index": 1
2736
  }
2737
  ]
2738
  },
2739
+ "h.5.ln_2.weight": {
2740
  "type": "Distributed",
2741
  "shape": [
 
2742
  768
2743
  ],
2744
  "dtype": "F32",
2745
  "chunks": [
2746
  {
2747
  "offsets": [
 
2748
  0
2749
  ],
2750
  "shape": [
2751
+ 384
 
2752
  ],
2753
  "filename_index": 0
2754
  },
2755
  {
2756
  "offsets": [
2757
+ 384
 
2758
  ],
2759
  "shape": [
2760
+ 384
 
2761
  ],
2762
  "filename_index": 1
2763
  }
2764
  ]
2765
  },
2766
+ "h.10.attn.c_attn.weight": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2767
  "type": "Distributed",
2768
  "shape": [
2769
  768,
2770
+ 2304
2771
  ],
2772
  "dtype": "F32",
2773
  "chunks": [
 
2778
  ],
2779
  "shape": [
2780
  768,
2781
+ 1152
2782
  ],
2783
  "filename_index": 0
2784
  },
2785
  {
2786
  "offsets": [
2787
  0,
2788
+ 1152
2789
  ],
2790
  "shape": [
2791
  768,
2792
+ 1152
2793
  ],
2794
  "filename_index": 1
2795
  }
2796
  ]
2797
  },
2798
+ "h.9.attn.c_proj.bias": {
2799
  "type": "Distributed",
2800
  "shape": [
2801
+ 768
2802
  ],
2803
  "dtype": "F32",
2804
  "chunks": [
 
2807
  0
2808
  ],
2809
  "shape": [
2810
+ 384
2811
  ],
2812
  "filename_index": 0
2813
  },
2814
  {
2815
  "offsets": [
2816
+ 384
2817
  ],
2818
  "shape": [
2819
+ 384
2820
  ],
2821
  "filename_index": 1
2822
  }
2823
  ]
2824
  },
2825
+ "h.11.attn.bias": {
2826
  "type": "Distributed",
2827
  "shape": [
2828
+ 1,
2829
+ 1,
2830
+ 1024,
2831
+ 1024
2832
  ],
2833
  "dtype": "F32",
2834
  "chunks": [
2835
  {
2836
  "offsets": [
2837
+ 0,
2838
+ 0,
2839
  0,
2840
  0
2841
  ],
2842
  "shape": [
2843
+ 1,
2844
+ 1,
2845
+ 1024,
2846
+ 512
2847
  ],
2848
  "filename_index": 0
2849
  },
2850
  {
2851
  "offsets": [
2852
  0,
2853
+ 0,
2854
+ 0,
2855
+ 512
2856
  ],
2857
  "shape": [
2858
+ 1,
2859
+ 1,
2860
+ 1024,
2861
+ 512
2862
  ],
2863
  "filename_index": 1
2864
  }
2865
  ]
2866
  },
2867
+ "h.0.mlp.c_proj.bias": {
2868
  "type": "Distributed",
2869
  "shape": [
2870
+ 768
2871
  ],
2872
  "dtype": "F32",
2873
  "chunks": [
 
2876
  0
2877
  ],
2878
  "shape": [
2879
+ 384
2880
  ],
2881
  "filename_index": 0
2882
  },
2883
  {
2884
  "offsets": [
2885
+ 384
2886
  ],
2887
  "shape": [
2888
+ 384
2889
  ],
2890
  "filename_index": 1
2891
  }
2892
  ]
2893
  },
2894
+ "h.7.mlp.c_proj.bias": {
2895
  "type": "Distributed",
2896
  "shape": [
2897
  768
 
2918
  }
2919
  ]
2920
  },
2921
+ "h.5.ln_2.bias": {
2922
  "type": "Distributed",
2923
  "shape": [
2924
  768
 
2945
  }
2946
  ]
2947
  },
2948
+ "h.1.ln_2.bias": {
2949
  "type": "Distributed",
2950
  "shape": [
2951
  768
 
2972
  }
2973
  ]
2974
  },
2975
+ "h.4.attn.bias": {
2976
  "type": "Distributed",
2977
  "shape": [
2978
+ 1,
2979
+ 1,
2980
+ 1024,
2981
+ 1024
2982
  ],
2983
  "dtype": "F32",
2984
  "chunks": [
2985
  {
2986
  "offsets": [
2987
+ 0,
2988
+ 0,
2989
+ 0,
2990
  0
2991
  ],
2992
  "shape": [
2993
+ 1,
2994
+ 1,
2995
+ 1024,
2996
+ 512
2997
  ],
2998
  "filename_index": 0
2999
  },
3000
  {
3001
  "offsets": [
3002
+ 0,
3003
+ 0,
3004
+ 0,
3005
+ 512
3006
  ],
3007
  "shape": [
3008
+ 1,
3009
+ 1,
3010
+ 1024,
3011
+ 512
3012
  ],
3013
  "filename_index": 1
3014
  }
3015
  ]
3016
  },
3017
+ "h.10.ln_1.weight": {
3018
  "type": "Distributed",
3019
  "shape": [
3020
  768
 
3041
  }
3042
  ]
3043
  },
3044
+ "h.10.mlp.c_proj.weight": {
3045
  "type": "Distributed",
3046
  "shape": [
3047
+ 3072,
3048
  768
3049
  ],
3050
  "dtype": "F32",
 
3055
  0
3056
  ],
3057
  "shape": [
3058
+ 1536,
3059
  768
3060
  ],
3061
  "filename_index": 0
3062
  },
3063
  {
3064
  "offsets": [
3065
+ 1536,
3066
  0
3067
  ],
3068
  "shape": [
3069
+ 1536,
3070
  768
3071
  ],
3072
  "filename_index": 1
3073
  }
3074
  ]
3075
  },
3076
+ "h.4.attn.c_attn.weight": {
3077
  "type": "Distributed",
3078
  "shape": [
3079
  768,
3080
+ 2304
3081
  ],
3082
  "dtype": "F32",
3083
  "chunks": [
 
3087
  0
3088
  ],
3089
  "shape": [
3090
+ 768,
3091
+ 1152
3092
  ],
3093
  "filename_index": 0
3094
  },
3095
  {
3096
  "offsets": [
3097
+ 0,
3098
+ 1152
3099
  ],
3100
  "shape": [
3101
+ 768,
3102
+ 1152
3103
  ],
3104
  "filename_index": 1
3105
  }
3106
  ]
3107
  },
3108
+ "h.11.mlp.c_fc.weight": {
3109
  "type": "Distributed",
3110
  "shape": [
3111
+ 768,
3112
+ 3072
3113
  ],
3114
  "dtype": "F32",
3115
  "chunks": [
3116
  {
3117
  "offsets": [
3118
+ 0,
3119
  0
3120
  ],
3121
  "shape": [
3122
+ 768,
3123
+ 1536
3124
  ],
3125
  "filename_index": 0
3126
  },
3127
  {
3128
  "offsets": [
3129
+ 0,
3130
+ 1536
3131
  ],
3132
  "shape": [
3133
+ 768,
3134
+ 1536
3135
  ],
3136
  "filename_index": 1
3137
  }
3138
  ]
3139
  },
3140
+ "h.9.attn.c_attn.weight": {
3141
  "type": "Distributed",
3142
  "shape": [
3143
+ 768,
3144
  2304
3145
  ],
3146
  "dtype": "F32",
3147
  "chunks": [
3148
  {
3149
  "offsets": [
3150
+ 0,
3151
  0
3152
  ],
3153
  "shape": [
3154
+ 768,
3155
  1152
3156
  ],
3157
  "filename_index": 0
3158
  },
3159
  {
3160
  "offsets": [
3161
+ 0,
3162
  1152
3163
  ],
3164
  "shape": [
3165
+ 768,
3166
  1152
3167
  ],
3168
  "filename_index": 1
3169
  }
3170
  ]
3171
  },
3172
+ "h.8.attn.c_attn.weight": {
3173
  "type": "Distributed",
3174
  "shape": [
3175
+ 768,
3176
+ 2304
3177
  ],
3178
  "dtype": "F32",
3179
  "chunks": [
3180
  {
3181
  "offsets": [
3182
+ 0,
3183
  0
3184
  ],
3185
  "shape": [
3186
+ 768,
3187
+ 1152
3188
  ],
3189
  "filename_index": 0
3190
  },
3191
  {
3192
  "offsets": [
3193
+ 0,
3194
+ 1152
3195
  ],
3196
  "shape": [
3197
+ 768,
3198
+ 1152
3199
  ],
3200
  "filename_index": 1
3201
  }
3202
  ]
3203
  },
3204
+ "h.0.attn.c_proj.bias": {
3205
  "type": "Distributed",
3206
  "shape": [
3207
+ 768
3208
  ],
3209
  "dtype": "F32",
3210
  "chunks": [
 
3213
  0
3214
  ],
3215
  "shape": [
3216
+ 384
3217
  ],
3218
  "filename_index": 0
3219
  },
3220
  {
3221
  "offsets": [
3222
+ 384
3223
  ],
3224
  "shape": [
3225
+ 384
3226
  ],
3227
  "filename_index": 1
3228
  }
3229
  ]
3230
  },
3231
+ "h.8.ln_2.bias": {
3232
  "type": "Distributed",
3233
  "shape": [
3234
+ 768
3235
  ],
3236
  "dtype": "F32",
3237
  "chunks": [
 
3240
  0
3241
  ],
3242
  "shape": [
3243
+ 384
3244
  ],
3245
  "filename_index": 0
3246
  },
3247
  {
3248
  "offsets": [
3249
+ 384
3250
  ],
3251
  "shape": [
3252
+ 384
3253
  ],
3254
  "filename_index": 1
3255
  }
3256
  ]
3257
  },
3258
+ "h.10.attn.c_proj.bias": {
3259
  "type": "Distributed",
3260
  "shape": [
3261
  768
 
3282
  }
3283
  ]
3284
  },
3285
+ "h.5.attn.c_proj.weight": {
3286
  "type": "Distributed",
3287
  "shape": [
3288
+ 768,
3289
  768
3290
  ],
3291
  "dtype": "F32",
3292
  "chunks": [
3293
  {
3294
  "offsets": [
3295
+ 0,
3296
  0
3297
  ],
3298
  "shape": [
3299
+ 384,
3300
+ 768
3301
  ],
3302
  "filename_index": 0
3303
  },
3304
  {
3305
  "offsets": [
3306
+ 384,
3307
+ 0
3308
  ],
3309
  "shape": [
3310
+ 384,
3311
+ 768
3312
  ],
3313
  "filename_index": 1
3314
  }
3315
  ]
3316
  },
3317
+ "h.4.mlp.c_fc.weight": {
3318
  "type": "Distributed",
3319
  "shape": [
3320
+ 768,
3321
+ 3072
3322
  ],
3323
  "dtype": "F32",
3324
  "chunks": [
3325
  {
3326
  "offsets": [
3327
+ 0,
3328
  0
3329
  ],
3330
  "shape": [
3331
+ 768,
3332
+ 1536
3333
  ],
3334
  "filename_index": 0
3335
  },
3336
  {
3337
  "offsets": [
3338
+ 0,
3339
+ 1536
3340
  ],
3341
  "shape": [
3342
+ 768,
3343
+ 1536
3344
  ],
3345
  "filename_index": 1
3346
  }
3347
  ]
3348
  },
3349
+ "wpe.weight": {
3350
  "type": "Distributed",
3351
  "shape": [
 
 
3352
  1024,
3353
+ 768
3354
  ],
3355
  "dtype": "F32",
3356
  "chunks": [
3357
  {
3358
  "offsets": [
 
 
3359
  0,
3360
  0
3361
  ],
3362
  "shape": [
 
 
3363
  1024,
3364
+ 384
3365
  ],
3366
  "filename_index": 0
3367
  },
3368
  {
3369
  "offsets": [
3370
  0,
3371
+ 384
 
 
3372
  ],
3373
  "shape": [
 
 
3374
  1024,
3375
+ 384
3376
  ],
3377
  "filename_index": 1
3378
  }
3379
  ]
3380
  },
3381
+ "h.6.ln_2.weight": {
3382
  "type": "Distributed",
3383
  "shape": [
 
3384
  768
3385
  ],
3386
  "dtype": "F32",
3387
  "chunks": [
3388
  {
3389
  "offsets": [
 
3390
  0
3391
  ],
3392
  "shape": [
3393
+ 384
 
3394
  ],
3395
  "filename_index": 0
3396
  },
3397
  {
3398
  "offsets": [
3399
+ 384
 
3400
  ],
3401
  "shape": [
3402
+ 384
 
3403
  ],
3404
  "filename_index": 1
3405
  }
3406
  ]
3407
  },
3408
+ "h.10.attn.c_proj.weight": {
3409
  "type": "Distributed",
3410
  "shape": [
3411
+ 768,
3412
+ 768
3413
  ],
3414
  "dtype": "F32",
3415
  "chunks": [
3416
  {
3417
  "offsets": [
3418
+ 0,
3419
  0
3420
  ],
3421
  "shape": [
3422
+ 384,
3423
+ 768
3424
  ],
3425
  "filename_index": 0
3426
  },
3427
  {
3428
  "offsets": [
3429
+ 384,
3430
+ 0
3431
  ],
3432
  "shape": [
3433
+ 384,
3434
+ 768
3435
  ],
3436
  "filename_index": 1
3437
  }
3438
  ]
3439
  },
3440
+ "h.3.attn.bias": {
3441
  "type": "Distributed",
3442
  "shape": [
3443
+ 1,
3444
+ 1,
3445
  1024,
3446
+ 1024
3447
  ],
3448
  "dtype": "F32",
3449
  "chunks": [
3450
  {
3451
  "offsets": [
3452
+ 0,
3453
+ 0,
3454
  0,
3455
  0
3456
  ],
3457
  "shape": [
3458
+ 1,
3459
+ 1,
3460
  1024,
3461
+ 512
3462
  ],
3463
  "filename_index": 0
3464
  },
3465
  {
3466
  "offsets": [
3467
  0,
3468
+ 0,
3469
+ 0,
3470
+ 512
3471
  ],
3472
  "shape": [
3473
+ 1,
3474
+ 1,
3475
  1024,
3476
+ 512
3477
  ],
3478
  "filename_index": 1
3479
  }
3480
  ]
3481
  },
3482
+ "h.11.attn.c_attn.bias": {
3483
  "type": "Distributed",
3484
  "shape": [
3485
+ 2304
 
3486
  ],
3487
  "dtype": "F32",
3488
  "chunks": [
3489
  {
3490
  "offsets": [
 
3491
  0
3492
  ],
3493
  "shape": [
3494
+ 1152
 
3495
  ],
3496
  "filename_index": 0
3497
  },
3498
  {
3499
  "offsets": [
3500
+ 1152
 
3501
  ],
3502
  "shape": [
3503
+ 1152
 
3504
  ],
3505
  "filename_index": 1
3506
  }
3507
  ]
3508
  },
3509
+ "h.2.ln_1.bias": {
3510
  "type": "Distributed",
3511
  "shape": [
3512
+ 768
 
3513
  ],
3514
  "dtype": "F32",
3515
  "chunks": [
3516
  {
3517
  "offsets": [
 
3518
  0
3519
  ],
3520
  "shape": [
3521
+ 384
 
3522
  ],
3523
  "filename_index": 0
3524
  },
3525
  {
3526
  "offsets": [
3527
+ 384
 
3528
  ],
3529
  "shape": [
3530
+ 384
 
3531
  ],
3532
  "filename_index": 1
3533
  }
3534
  ]
3535
  },
3536
+ "h.9.ln_1.weight": {
3537
  "type": "Distributed",
3538
  "shape": [
3539
  768
 
3560
  }
3561
  ]
3562
  },
3563
+ "h.8.mlp.c_fc.weight": {
3564
  "type": "Distributed",
3565
  "shape": [
3566
+ 768,
3567
+ 3072
3568
  ],
3569
  "dtype": "F32",
3570
  "chunks": [
 
3574
  0
3575
  ],
3576
  "shape": [
3577
+ 768,
3578
+ 1536
3579
  ],
3580
  "filename_index": 0
3581
  },
3582
  {
3583
  "offsets": [
3584
+ 0,
3585
+ 1536
3586
  ],
3587
  "shape": [
3588
+ 768,
3589
+ 1536
3590
  ],
3591
  "filename_index": 1
3592
  }
3593
  ]
3594
  },
3595
+ "h.2.ln_2.weight": {
3596
  "type": "Distributed",
3597
  "shape": [
3598
+ 768
 
3599
  ],
3600
  "dtype": "F32",
3601
  "chunks": [
3602
  {
3603
  "offsets": [
 
3604
  0
3605
  ],
3606
  "shape": [
3607
+ 384
 
3608
  ],
3609
  "filename_index": 0
3610
  },
3611
  {
3612
  "offsets": [
3613
+ 384
 
3614
  ],
3615
  "shape": [
3616
+ 384
 
3617
  ],
3618
  "filename_index": 1
3619
  }
 
3661
  }
3662
  ]
3663
  },
3664
+ "h.5.mlp.c_fc.weight": {
3665
  "type": "Distributed",
3666
  "shape": [
3667
+ 768,
3668
+ 3072
3669
  ],
3670
  "dtype": "F32",
3671
  "chunks": [
3672
  {
3673
  "offsets": [
3674
+ 0,
3675
  0
3676
  ],
3677
  "shape": [
3678
+ 768,
3679
+ 1536
3680
  ],
3681
  "filename_index": 0
3682
  },
3683
  {
3684
  "offsets": [
3685
+ 0,
3686
+ 1536
3687
  ],
3688
  "shape": [
3689
+ 768,
3690
+ 1536
3691
  ],
3692
  "filename_index": 1
3693
  }
3694
  ]
3695
  },
3696
+ "h.11.mlp.c_proj.weight": {
3697
  "type": "Distributed",
3698
  "shape": [
3699
+ 3072,
3700
+ 768
3701
  ],
3702
  "dtype": "F32",
3703
  "chunks": [
 
3707
  0
3708
  ],
3709
  "shape": [
3710
+ 1536,
3711
+ 768
3712
  ],
3713
  "filename_index": 0
3714
  },
3715
  {
3716
  "offsets": [
3717
+ 1536,
3718
+ 0
3719
  ],
3720
  "shape": [
3721
+ 1536,
3722
+ 768
3723
  ],
3724
  "filename_index": 1
3725
  }
3726
  ]
3727
  },
3728
+ "h.7.ln_1.bias": {
3729
  "type": "Distributed",
3730
  "shape": [
3731
  768
 
3752
  }
3753
  ]
3754
  },
3755
+ "h.8.attn.c_attn.bias": {
3756
  "type": "Distributed",
3757
  "shape": [
3758
+ 2304
3759
  ],
3760
  "dtype": "F32",
3761
  "chunks": [
 
3764
  0
3765
  ],
3766
  "shape": [
3767
+ 1152
3768
  ],
3769
  "filename_index": 0
3770
  },
3771
  {
3772
  "offsets": [
3773
+ 1152
3774
  ],
3775
  "shape": [
3776
+ 1152
3777
  ],
3778
  "filename_index": 1
3779
  }
3780
  ]
3781
  },
3782
+ "h.6.attn.c_proj.bias": {
3783
  "type": "Distributed",
3784
  "shape": [
 
3785
  768
3786
  ],
3787
  "dtype": "F32",
3788
  "chunks": [
3789
  {
3790
  "offsets": [
 
3791
  0
3792
  ],
3793
  "shape": [
3794
+ 384
 
3795
  ],
3796
  "filename_index": 0
3797
  },
3798
  {
3799
  "offsets": [
3800
+ 384
 
3801
  ],
3802
  "shape": [
3803
+ 384
 
3804
  ],
3805
  "filename_index": 1
3806
  }
3807
  ]
3808
  },
3809
+ "h.3.mlp.c_fc.weight": {
3810
  "type": "Distributed",
3811
  "shape": [
3812
+ 768,
3813
  3072
3814
  ],
3815
  "dtype": "F32",
3816
  "chunks": [
3817
  {
3818
  "offsets": [
3819
+ 0,
3820
  0
3821
  ],
3822
  "shape": [
3823
+ 768,
3824
  1536
3825
  ],
3826
  "filename_index": 0
3827
  },
3828
  {
3829
  "offsets": [
3830
+ 0,
3831
  1536
3832
  ],
3833
  "shape": [
3834
+ 768,
3835
  1536
3836
  ],
3837
  "filename_index": 1
3838
  }
3839
  ]
3840
  },
3841
+ "h.4.ln_2.weight": {
3842
  "type": "Distributed",
3843
  "shape": [
3844
+ 768
3845
  ],
3846
  "dtype": "F32",
3847
  "chunks": [
 
3850
  0
3851
  ],
3852
  "shape": [
3853
+ 384
3854
  ],
3855
  "filename_index": 0
3856
  },
3857
  {
3858
  "offsets": [
3859
+ 384
3860
  ],
3861
  "shape": [
3862
+ 384
3863
  ],
3864
  "filename_index": 1
3865
  }
3866
  ]
3867
  },
3868
+ "h.5.attn.c_attn.bias": {
3869
  "type": "Distributed",
3870
  "shape": [
3871
+ 2304
3872
  ],
3873
  "dtype": "F32",
3874
  "chunks": [
 
3877
  0
3878
  ],
3879
  "shape": [
3880
+ 1152
3881
  ],
3882
  "filename_index": 0
3883
  },
3884
  {
3885
  "offsets": [
3886
+ 1152
3887
  ],
3888
  "shape": [
3889
+ 1152
3890
  ],
3891
  "filename_index": 1
3892
  }
3893
  ]
3894
  },
3895
+ "h.8.mlp.c_proj.weight": {
3896
  "type": "Distributed",
3897
  "shape": [
3898
+ 3072,
3899
+ 768
3900
  ],
3901
  "dtype": "F32",
3902
  "chunks": [
 
3906
  0
3907
  ],
3908
  "shape": [
3909
+ 1536,
3910
+ 768
3911
  ],
3912
  "filename_index": 0
3913
  },
3914
  {
3915
  "offsets": [
3916
+ 1536,
3917
+ 0
3918
  ],
3919
  "shape": [
3920
+ 1536,
3921
+ 768
3922
  ],
3923
  "filename_index": 1
3924
  }
3925
  ]
3926
  },
3927
+ "h.5.mlp.c_proj.weight": {
3928
  "type": "Distributed",
3929
  "shape": [
3930
+ 3072,
3931
  768
3932
  ],
3933
  "dtype": "F32",
 
3938
  0
3939
  ],
3940
  "shape": [
3941
+ 1536,
3942
  768
3943
  ],
3944
  "filename_index": 0
3945
  },
3946
  {
3947
  "offsets": [
3948
+ 1536,
3949
  0
3950
  ],
3951
  "shape": [
3952
+ 1536,
3953
  768
3954
  ],
3955
  "filename_index": 1
3956
  }
3957
  ]
3958
  },
3959
+ "h.4.mlp.c_proj.bias": {
3960
  "type": "Distributed",
3961
  "shape": [
3962
+ 768
 
 
 
3963
  ],
3964
  "dtype": "F32",
3965
  "chunks": [
3966
  {
3967
  "offsets": [
 
 
 
3968
  0
3969
  ],
3970
  "shape": [
3971
+ 384
 
 
 
3972
  ],
3973
  "filename_index": 0
3974
  },
3975
  {
3976
  "offsets": [
3977
+ 384
 
 
 
3978
  ],
3979
  "shape": [
3980
+ 384
 
 
 
3981
  ],
3982
  "filename_index": 1
3983
  }
3984
  ]
3985
  },
3986
+ "h.6.mlp.c_fc.weight": {
3987
  "type": "Distributed",
3988
  "shape": [
3989
+ 768,
3990
+ 3072
3991
  ],
3992
  "dtype": "F32",
3993
  "chunks": [
 
3997
  0
3998
  ],
3999
  "shape": [
4000
+ 768,
4001
+ 1536
4002
  ],
4003
  "filename_index": 0
4004
  },
4005
  {
4006
  "offsets": [
4007
+ 0,
4008
+ 1536
4009
+ ],
4010
  "shape": [
4011
+ 768,
4012
+ 1536
4013
  ],
4014
  "filename_index": 1
4015
  }
4016
  ]
4017
  },
4018
+ "h.9.attn.c_attn.bias": {
4019
  "type": "Distributed",
4020
  "shape": [
4021
+ 2304
4022
  ],
4023
  "dtype": "F32",
4024
  "chunks": [
 
4027
  0
4028
  ],
4029
  "shape": [
4030
+ 1152
4031
  ],
4032
  "filename_index": 0
4033
  },
4034
  {
4035
  "offsets": [
4036
+ 1152
4037
  ],
4038
  "shape": [
4039
+ 1152
4040
  ],
4041
  "filename_index": 1
4042
  }
4043
  ]
4044
  },
4045
+ "h.8.attn.c_proj.bias": {
4046
  "type": "Distributed",
4047
  "shape": [
4048
+ 768
4049
  ],
4050
  "dtype": "F32",
4051
  "chunks": [
 
4054
  0
4055
  ],
4056
  "shape": [
4057
+ 384
4058
  ],
4059
  "filename_index": 0
4060
  },
4061
  {
4062
  "offsets": [
4063
+ 384
4064
  ],
4065
  "shape": [
4066
+ 384
4067
  ],
4068
  "filename_index": 1
4069
  }
4070
  ]
4071
  },
4072
+ "h.2.attn.c_attn.bias": {
4073
  "type": "Distributed",
4074
  "shape": [
4075
  2304
 
4096
  }
4097
  ]
4098
  },
4099
+ "h.0.mlp.c_proj.weight": {
4100
  "type": "Distributed",
4101
  "shape": [
4102
+ 3072,
4103
+ 768
 
 
4104
  ],
4105
  "dtype": "F32",
4106
  "chunks": [
4107
  {
4108
  "offsets": [
 
 
4109
  0,
4110
  0
4111
  ],
4112
  "shape": [
4113
+ 1536,
4114
+ 768
 
 
4115
  ],
4116
  "filename_index": 0
4117
  },
4118
  {
4119
  "offsets": [
4120
+ 1536,
4121
+ 0
 
 
4122
  ],
4123
  "shape": [
4124
+ 1536,
4125
+ 768
 
 
4126
  ],
4127
  "filename_index": 1
4128
  }
4129
  ]
4130
  },
4131
+ "h.11.ln_1.weight": {
4132
  "type": "Distributed",
4133
  "shape": [
 
4134
  768
4135
  ],
4136
  "dtype": "F32",
4137
  "chunks": [
4138
  {
4139
  "offsets": [
 
4140
  0
4141
  ],
4142
  "shape": [
4143
+ 384
 
4144
  ],
4145
  "filename_index": 0
4146
  },
4147
  {
4148
  "offsets": [
4149
+ 384
4150
+ ],
4151
+ "shape": [
4152
+ 384
4153
+ ],
4154
+ "filename_index": 1
4155
+ }
4156
+ ]
4157
+ },
4158
+ "h.3.ln_1.bias": {
4159
+ "type": "Distributed",
4160
+ "shape": [
4161
+ 768
4162
+ ],
4163
+ "dtype": "F32",
4164
+ "chunks": [
4165
+ {
4166
+ "offsets": [
4167
  0
4168
  ],
4169
  "shape": [
4170
+ 384
4171
+ ],
4172
+ "filename_index": 0
4173
+ },
4174
+ {
4175
+ "offsets": [
4176
+ 384
4177
+ ],
4178
+ "shape": [
4179
+ 384
4180
  ],
4181
  "filename_index": 1
4182
  }
4183
  ]
4184
  },
4185
+ "h.10.attn.bias": {
4186
  "type": "Distributed",
4187
  "shape": [
4188
  1,
 
4224
  }
4225
  ]
4226
  },
4227
+ "h.10.ln_1.bias": {
4228
  "type": "Distributed",
4229
  "shape": [
4230
+ 768
 
 
 
4231
  ],
4232
  "dtype": "F32",
4233
  "chunks": [
4234
  {
4235
  "offsets": [
 
 
 
4236
  0
4237
  ],
4238
  "shape": [
4239
+ 384
 
 
 
4240
  ],
4241
  "filename_index": 0
4242
  },
4243
  {
4244
  "offsets": [
4245
+ 384
 
 
 
4246
  ],
4247
  "shape": [
4248
+ 384
 
 
 
4249
  ],
4250
  "filename_index": 1
4251
  }
4252
  ]
4253
  },
4254
+ "h.5.ln_1.weight": {
4255
  "type": "Distributed",
4256
  "shape": [
4257
  768
 
4278
  }
4279
  ]
4280
  },
4281
+ "h.0.ln_2.weight": {
4282
  "type": "Distributed",
4283
  "shape": [
4284
  768
 
4305
  }
4306
  ]
4307
  },
4308
+ "h.0.mlp.c_fc.bias": {
4309
  "type": "Distributed",
4310
  "shape": [
4311
+ 3072
 
4312
  ],
4313
  "dtype": "F32",
4314
  "chunks": [
4315
  {
4316
  "offsets": [
 
4317
  0
4318
  ],
4319
  "shape": [
4320
+ 1536
 
4321
  ],
4322
  "filename_index": 0
4323
  },
4324
  {
4325
  "offsets": [
4326
+ 1536
 
4327
  ],
4328
  "shape": [
4329
+ 1536
 
4330
  ],
4331
  "filename_index": 1
4332
  }
4333
  ]
4334
  },
4335
+ "h.11.ln_2.bias": {
4336
  "type": "Distributed",
4337
  "shape": [
4338
+ 768
4339
  ],
4340
  "dtype": "F32",
4341
  "chunks": [
 
4344
  0
4345
  ],
4346
  "shape": [
4347
+ 384
4348
  ],
4349
  "filename_index": 0
4350
  },
4351
  {
4352
  "offsets": [
4353
+ 384
4354
  ],
4355
  "shape": [
4356
+ 384
4357
  ],
4358
  "filename_index": 1
4359
  }
4360
  ]
4361
  },
4362
+ "h.1.mlp.c_fc.weight": {
4363
  "type": "Distributed",
4364
  "shape": [
4365
+ 768,
4366
  3072
4367
  ],
4368
  "dtype": "F32",
4369
  "chunks": [
4370
  {
4371
  "offsets": [
4372
+ 0,
4373
  0
4374
  ],
4375
  "shape": [
4376
+ 768,
4377
  1536
4378
  ],
4379
  "filename_index": 0
4380
  },
4381
  {
4382
  "offsets": [
4383
+ 0,
4384
  1536
4385
  ],
4386
  "shape": [
4387
+ 768,
4388
  1536
4389
  ],
4390
  "filename_index": 1
4391
  }
4392
  ]
4393
  },
4394
+ "h.7.mlp.c_fc.weight": {
4395
  "type": "Distributed",
4396
  "shape": [
4397
+ 768,
4398
+ 3072
4399
  ],
4400
  "dtype": "F32",
4401
  "chunks": [
 
4405
  0
4406
  ],
4407
  "shape": [
4408
+ 768,
4409
+ 1536
4410
  ],
4411
  "filename_index": 0
4412
  },
4413
  {
4414
  "offsets": [
4415
+ 0,
4416
+ 1536
4417
  ],
4418
  "shape": [
4419
+ 768,
4420
+ 1536
4421
  ],
4422
  "filename_index": 1
4423
  }
4424
  ]
4425
  },
4426
+ "h.8.ln_1.bias": {
4427
  "type": "Distributed",
4428
  "shape": [
 
4429
  768
4430
  ],
4431
  "dtype": "F32",
4432
  "chunks": [
4433
  {
4434
  "offsets": [
4435
+ 0
4436
+ ],
4437
+ "shape": [
4438
+ 384
4439
+ ],
4440
+ "filename_index": 0
4441
+ },
4442
+ {
4443
+ "offsets": [
4444
+ 384
4445
+ ],
4446
+ "shape": [
4447
+ 384
4448
+ ],
4449
+ "filename_index": 1
4450
+ }
4451
+ ]
4452
+ },
4453
+ "h.5.attn.bias": {
4454
+ "type": "Distributed",
4455
+ "shape": [
4456
+ 1,
4457
+ 1,
4458
+ 1024,
4459
+ 1024
4460
+ ],
4461
+ "dtype": "F32",
4462
+ "chunks": [
4463
+ {
4464
+ "offsets": [
4465
+ 0,
4466
+ 0,
4467
  0,
4468
  0
4469
  ],
4470
  "shape": [
4471
+ 1,
4472
+ 1,
4473
+ 1024,
4474
+ 512
4475
  ],
4476
  "filename_index": 0
4477
  },
4478
  {
4479
  "offsets": [
4480
+ 0,
4481
+ 0,
4482
+ 0,
4483
+ 512
4484
+ ],
4485
+ "shape": [
4486
+ 1,
4487
+ 1,
4488
+ 1024,
4489
+ 512
4490
+ ],
4491
+ "filename_index": 1
4492
+ }
4493
+ ]
4494
+ },
4495
+ "h.7.ln_2.weight": {
4496
+ "type": "Distributed",
4497
+ "shape": [
4498
+ 768
4499
+ ],
4500
+ "dtype": "F32",
4501
+ "chunks": [
4502
+ {
4503
+ "offsets": [
4504
  0
4505
  ],
4506
  "shape": [
4507
+ 384
4508
+ ],
4509
+ "filename_index": 0
4510
+ },
4511
+ {
4512
+ "offsets": [
4513
+ 384
4514
+ ],
4515
+ "shape": [
4516
+ 384
4517
  ],
4518
  "filename_index": 1
4519
  }
4520
  ]
4521
  },
4522
+ "h.4.attn.c_proj.bias": {
4523
  "type": "Distributed",
4524
  "shape": [
4525
  768
 
4546
  }
4547
  ]
4548
  },
4549
+ "h.0.mlp.c_fc.weight": {
4550
  "type": "Distributed",
4551
  "shape": [
4552
  768,
4553
+ 3072
4554
  ],
4555
  "dtype": "F32",
4556
  "chunks": [
 
4561
  ],
4562
  "shape": [
4563
  768,
4564
+ 1536
4565
  ],
4566
  "filename_index": 0
4567
  },
4568
  {
4569
  "offsets": [
4570
  0,
4571
+ 1536
4572
  ],
4573
  "shape": [
4574
  768,
4575
+ 1536
4576
  ],
4577
  "filename_index": 1
4578
  }
4579
  ]
4580
  },
4581
+ "h.7.mlp.c_fc.bias": {
4582
  "type": "Distributed",
4583
  "shape": [
4584
+ 3072
 
4585
  ],
4586
  "dtype": "F32",
4587
  "chunks": [
4588
  {
4589
  "offsets": [
 
4590
  0
4591
  ],
4592
  "shape": [
4593
+ 1536
 
4594
  ],
4595
  "filename_index": 0
4596
  },
4597
  {
4598
  "offsets": [
4599
+ 1536
 
4600
  ],
4601
  "shape": [
4602
+ 1536
 
4603
  ],
4604
  "filename_index": 1
4605
  }
4606
  ]
4607
  },
4608
+ "h.11.attn.c_proj.weight": {
4609
  "type": "Distributed",
4610
  "shape": [
4611
+ 768,
4612
  768
4613
  ],
4614
  "dtype": "F32",
4615
  "chunks": [
4616
  {
4617
  "offsets": [
4618
+ 0,
4619
  0
4620
  ],
4621
  "shape": [
4622
+ 384,
4623
+ 768
4624
  ],
4625
  "filename_index": 0
4626
  },
4627
  {
4628
  "offsets": [
4629
+ 384,
4630
+ 0
4631
  ],
4632
  "shape": [
4633
+ 384,
4634
+ 768
4635
  ],
4636
  "filename_index": 1
4637
  }
4638
  ]
4639
  },
4640
+ "h.8.mlp.c_proj.bias": {
4641
  "type": "Distributed",
4642
  "shape": [
4643
  768
 
4664
  }
4665
  ]
4666
  },
4667
+ "h.1.mlp.c_fc.bias": {
4668
  "type": "Distributed",
4669
  "shape": [
4670
+ 3072
 
4671
  ],
4672
  "dtype": "F32",
4673
  "chunks": [
4674
  {
4675
  "offsets": [
 
4676
  0
4677
  ],
4678
  "shape": [
4679
+ 1536
 
4680
  ],
4681
  "filename_index": 0
4682
  },
4683
  {
4684
  "offsets": [
4685
+ 1536
 
4686
  ],
4687
  "shape": [
4688
+ 1536
 
4689
  ],
4690
  "filename_index": 1
4691
  }
4692
  ]
4693
  },
4694
+ "h.6.mlp.c_proj.weight": {
4695
  "type": "Distributed",
4696
  "shape": [
4697
+ 3072,
4698
  768
4699
  ],
4700
  "dtype": "F32",
 
4705
  0
4706
  ],
4707
  "shape": [
4708
+ 1536,
4709
  768
4710
  ],
4711
  "filename_index": 0
4712
  },
4713
  {
4714
  "offsets": [
4715
+ 1536,
4716
  0
4717
  ],
4718
  "shape": [
4719
+ 1536,
4720
  768
4721
  ],
4722
  "filename_index": 1
4723
  }
4724
  ]
4725
+ },
4726
+ "h.2.attn.c_proj.bias": {
4727
+ "type": "Distributed",
4728
+ "shape": [
4729
+ 768
4730
+ ],
4731
+ "dtype": "F32",
4732
+ "chunks": [
4733
+ {
4734
+ "offsets": [
4735
+ 0
4736
+ ],
4737
+ "shape": [
4738
+ 384
4739
+ ],
4740
+ "filename_index": 0
4741
+ },
4742
+ {
4743
+ "offsets": [
4744
+ 384
4745
+ ],
4746
+ "shape": [
4747
+ 384
4748
+ ],
4749
+ "filename_index": 1
4750
+ }
4751
+ ]
4752
  }
4753
  },
4754
  "filenames": [