| { | |
| "final_norm.bias": { | |
| "scale": 0.08834397792816162, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "final_norm.weight": { | |
| "scale": 0.7890332937240601, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.attn.c_attn.bias": { | |
| "scale": 0.12309323251247406, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.0.attn.c_attn.weight": { | |
| "scale": 0.1041356548666954, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.0.attn.c_proj.bias": { | |
| "scale": 0.08951065689325333, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.attn.c_proj.weight": { | |
| "scale": 0.1165793165564537, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.ln_1.bias": { | |
| "scale": 0.026223499327898026, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.ln_1.weight": { | |
| "scale": 0.03431953117251396, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.ln_2.bias": { | |
| "scale": 0.20567700266838074, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.ln_2.weight": { | |
| "scale": 0.11458853632211685, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.mlp.c_fc.bias": { | |
| "scale": 0.047463927417993546, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.0.mlp.c_fc.weight": { | |
| "scale": 0.17041946947574615, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.0.mlp.c_proj.bias": { | |
| "scale": 0.29554909467697144, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.0.mlp.c_proj.weight": { | |
| "scale": 0.6621343493461609, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.attn.c_attn.bias": { | |
| "scale": 0.09180662781000137, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.1.attn.c_attn.weight": { | |
| "scale": 0.06224552541971207, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.1.attn.c_proj.bias": { | |
| "scale": 0.2972453534603119, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.attn.c_proj.weight": { | |
| "scale": 0.45653465390205383, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.ln_1.bias": { | |
| "scale": 0.20046189427375793, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.ln_1.weight": { | |
| "scale": 0.07655565440654755, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.ln_2.bias": { | |
| "scale": 0.21620801091194153, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.ln_2.weight": { | |
| "scale": 0.20269657671451569, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.mlp.c_fc.bias": { | |
| "scale": 0.026467734947800636, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.1.mlp.c_fc.weight": { | |
| "scale": 0.12142758071422577, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.1.mlp.c_proj.bias": { | |
| "scale": 0.3446175754070282, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.1.mlp.c_proj.weight": { | |
| "scale": 0.7414490580558777, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.attn.c_attn.bias": { | |
| "scale": 0.044142965227365494, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.10.attn.c_attn.weight": { | |
| "scale": 0.11839079111814499, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.10.attn.c_proj.bias": { | |
| "scale": 0.0456073097884655, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.attn.c_proj.weight": { | |
| "scale": 0.08686741441488266, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.ln_1.bias": { | |
| "scale": 0.14560243487358093, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.ln_1.weight": { | |
| "scale": 0.15239107608795166, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.ln_2.bias": { | |
| "scale": 0.13042710721492767, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.ln_2.weight": { | |
| "scale": 0.25696828961372375, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.mlp.c_fc.bias": { | |
| "scale": 0.029524575918912888, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.10.mlp.c_fc.weight": { | |
| "scale": 0.09666890650987625, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.10.mlp.c_proj.bias": { | |
| "scale": 0.1028929278254509, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.10.mlp.c_proj.weight": { | |
| "scale": 0.3654429316520691, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.attn.c_attn.bias": { | |
| "scale": 0.048490703105926514, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.11.attn.c_attn.weight": { | |
| "scale": 0.11234062910079956, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.11.attn.c_proj.bias": { | |
| "scale": 0.0510806143283844, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.attn.c_proj.weight": { | |
| "scale": 0.08797170966863632, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.ln_1.bias": { | |
| "scale": 0.164643794298172, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.ln_1.weight": { | |
| "scale": 0.16134341061115265, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.ln_2.bias": { | |
| "scale": 0.09844086319208145, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.ln_2.weight": { | |
| "scale": 0.22313998639583588, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.mlp.c_fc.bias": { | |
| "scale": 0.03060619719326496, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.11.mlp.c_fc.weight": { | |
| "scale": 0.06761165708303452, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.11.mlp.c_proj.bias": { | |
| "scale": 0.10643502324819565, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.11.mlp.c_proj.weight": { | |
| "scale": 0.4576462209224701, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.attn.c_attn.bias": { | |
| "scale": 0.04992813244462013, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.12.attn.c_attn.weight": { | |
| "scale": 0.14503903687000275, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.12.attn.c_proj.bias": { | |
| "scale": 0.033475592732429504, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.attn.c_proj.weight": { | |
| "scale": 0.11046390980482101, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.ln_1.bias": { | |
| "scale": 0.18613700568675995, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.ln_1.weight": { | |
| "scale": 0.16866794228553772, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.ln_2.bias": { | |
| "scale": 0.12722595036029816, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.ln_2.weight": { | |
| "scale": 0.23250164091587067, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.mlp.c_fc.bias": { | |
| "scale": 0.014812483452260494, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.12.mlp.c_fc.weight": { | |
| "scale": 0.068848617374897, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.12.mlp.c_proj.bias": { | |
| "scale": 0.12035634368658066, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.12.mlp.c_proj.weight": { | |
| "scale": 0.2743368148803711, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.attn.c_attn.bias": { | |
| "scale": 0.04760030657052994, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.13.attn.c_attn.weight": { | |
| "scale": 0.08714257925748825, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.13.attn.c_proj.bias": { | |
| "scale": 0.06746888160705566, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.attn.c_proj.weight": { | |
| "scale": 0.11844473332166672, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.ln_1.bias": { | |
| "scale": 0.15157721936702728, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.ln_1.weight": { | |
| "scale": 0.16393320262432098, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.ln_2.bias": { | |
| "scale": 0.1306534856557846, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.ln_2.weight": { | |
| "scale": 0.22248651087284088, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.mlp.c_fc.bias": { | |
| "scale": 0.019469883292913437, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.13.mlp.c_fc.weight": { | |
| "scale": 0.06430874019861221, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.13.mlp.c_proj.bias": { | |
| "scale": 0.11026618629693985, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.13.mlp.c_proj.weight": { | |
| "scale": 0.2532596290111542, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.attn.c_attn.bias": { | |
| "scale": 0.04019672051072121, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.14.attn.c_attn.weight": { | |
| "scale": 0.14072343707084656, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.14.attn.c_proj.bias": { | |
| "scale": 0.02364635095000267, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.attn.c_proj.weight": { | |
| "scale": 0.10160692036151886, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.ln_1.bias": { | |
| "scale": 0.159767284989357, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.ln_1.weight": { | |
| "scale": 0.16219459474086761, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.ln_2.bias": { | |
| "scale": 0.13014180958271027, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.ln_2.weight": { | |
| "scale": 0.21348479390144348, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.mlp.c_fc.bias": { | |
| "scale": 0.01531550008803606, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.14.mlp.c_fc.weight": { | |
| "scale": 0.08244454115629196, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.14.mlp.c_proj.bias": { | |
| "scale": 0.1048700362443924, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.14.mlp.c_proj.weight": { | |
| "scale": 0.28695791959762573, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.attn.c_attn.bias": { | |
| "scale": 0.04081624746322632, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.15.attn.c_attn.weight": { | |
| "scale": 0.07819346338510513, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.15.attn.c_proj.bias": { | |
| "scale": 0.08492325991392136, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.attn.c_proj.weight": { | |
| "scale": 0.11697079986333847, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.ln_1.bias": { | |
| "scale": 0.14451484382152557, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.ln_1.weight": { | |
| "scale": 0.16579607129096985, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.ln_2.bias": { | |
| "scale": 0.11252031475305557, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.ln_2.weight": { | |
| "scale": 0.20037208497524261, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.mlp.c_fc.bias": { | |
| "scale": 0.020076142624020576, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.15.mlp.c_fc.weight": { | |
| "scale": 0.0844724252820015, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.15.mlp.c_proj.bias": { | |
| "scale": 0.09757417440414429, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.15.mlp.c_proj.weight": { | |
| "scale": 0.310793936252594, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.attn.c_attn.bias": { | |
| "scale": 0.04743470624089241, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.16.attn.c_attn.weight": { | |
| "scale": 0.12722057104110718, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.16.attn.c_proj.bias": { | |
| "scale": 0.09606064110994339, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.attn.c_proj.weight": { | |
| "scale": 0.07591201364994049, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.ln_1.bias": { | |
| "scale": 0.15709154307842255, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.ln_1.weight": { | |
| "scale": 0.15911130607128143, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.ln_2.bias": { | |
| "scale": 0.12005039304494858, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.ln_2.weight": { | |
| "scale": 0.18429037928581238, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.mlp.c_fc.bias": { | |
| "scale": 0.022201305255293846, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.16.mlp.c_fc.weight": { | |
| "scale": 0.08008849620819092, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.16.mlp.c_proj.bias": { | |
| "scale": 0.09774907678365707, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.16.mlp.c_proj.weight": { | |
| "scale": 0.16212008893489838, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.attn.c_attn.bias": { | |
| "scale": 0.04417693614959717, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.17.attn.c_attn.weight": { | |
| "scale": 0.1320490539073944, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.17.attn.c_proj.bias": { | |
| "scale": 0.06338126957416534, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.attn.c_proj.weight": { | |
| "scale": 0.13893575966358185, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.ln_1.bias": { | |
| "scale": 0.15823052823543549, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.ln_1.weight": { | |
| "scale": 0.16002225875854492, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.ln_2.bias": { | |
| "scale": 0.13184016942977905, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.ln_2.weight": { | |
| "scale": 0.1869189441204071, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.mlp.c_fc.bias": { | |
| "scale": 0.02481200359761715, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.17.mlp.c_fc.weight": { | |
| "scale": 0.0685386210680008, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.17.mlp.c_proj.bias": { | |
| "scale": 0.08835189044475555, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.17.mlp.c_proj.weight": { | |
| "scale": 0.18417593836784363, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.attn.c_attn.bias": { | |
| "scale": 0.037581078708171844, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.18.attn.c_attn.weight": { | |
| "scale": 0.12254094332456589, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.18.attn.c_proj.bias": { | |
| "scale": 0.11249931156635284, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.attn.c_proj.weight": { | |
| "scale": 0.09757298976182938, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.ln_1.bias": { | |
| "scale": 0.1457567662000656, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.ln_1.weight": { | |
| "scale": 0.15891136229038239, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.ln_2.bias": { | |
| "scale": 0.11753548681735992, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.ln_2.weight": { | |
| "scale": 0.17903394997119904, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.mlp.c_fc.bias": { | |
| "scale": 0.021563095971941948, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.18.mlp.c_fc.weight": { | |
| "scale": 0.06829343736171722, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.18.mlp.c_proj.bias": { | |
| "scale": 0.06819847971200943, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.18.mlp.c_proj.weight": { | |
| "scale": 0.12814833223819733, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.attn.c_attn.bias": { | |
| "scale": 0.04691386595368385, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.19.attn.c_attn.weight": { | |
| "scale": 0.11684088408946991, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.19.attn.c_proj.bias": { | |
| "scale": 0.05939425155520439, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.attn.c_proj.weight": { | |
| "scale": 0.12523870170116425, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.ln_1.bias": { | |
| "scale": 0.14781410992145538, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.ln_1.weight": { | |
| "scale": 0.15612022578716278, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.ln_2.bias": { | |
| "scale": 0.10865359008312225, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.ln_2.weight": { | |
| "scale": 0.17884387075901031, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.mlp.c_fc.bias": { | |
| "scale": 0.023445727303624153, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.19.mlp.c_fc.weight": { | |
| "scale": 0.07771860063076019, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.19.mlp.c_proj.bias": { | |
| "scale": 0.05867428332567215, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.19.mlp.c_proj.weight": { | |
| "scale": 0.1523352414369583, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.attn.c_attn.bias": { | |
| "scale": 0.04482164978981018, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.2.attn.c_attn.weight": { | |
| "scale": 0.06159922480583191, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.2.attn.c_proj.bias": { | |
| "scale": 0.3751963675022125, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.attn.c_proj.weight": { | |
| "scale": 0.4305261969566345, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.ln_1.bias": { | |
| "scale": 0.16870178282260895, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.ln_1.weight": { | |
| "scale": 0.09723272174596786, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.ln_2.bias": { | |
| "scale": 0.14638309180736542, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.ln_2.weight": { | |
| "scale": 0.24350528419017792, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.mlp.c_fc.bias": { | |
| "scale": 0.054711032658815384, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.2.mlp.c_fc.weight": { | |
| "scale": 0.19500534236431122, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.2.mlp.c_proj.bias": { | |
| "scale": 0.2771886885166168, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.2.mlp.c_proj.weight": { | |
| "scale": 0.7297950983047485, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.attn.c_attn.bias": { | |
| "scale": 0.03594465181231499, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.20.attn.c_attn.weight": { | |
| "scale": 0.12434620410203934, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.20.attn.c_proj.bias": { | |
| "scale": 0.09055911749601364, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.attn.c_proj.weight": { | |
| "scale": 0.10791827738285065, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.ln_1.bias": { | |
| "scale": 0.13368119299411774, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.ln_1.weight": { | |
| "scale": 0.15670302510261536, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.ln_2.bias": { | |
| "scale": 0.11155271530151367, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.ln_2.weight": { | |
| "scale": 0.16725917160511017, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.mlp.c_fc.bias": { | |
| "scale": 0.015253099612891674, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.20.mlp.c_fc.weight": { | |
| "scale": 0.06497155874967575, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.20.mlp.c_proj.bias": { | |
| "scale": 0.04290299862623215, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.20.mlp.c_proj.weight": { | |
| "scale": 0.2691304385662079, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.attn.c_attn.bias": { | |
| "scale": 0.04429187998175621, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.21.attn.c_attn.weight": { | |
| "scale": 0.10078362375497818, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.21.attn.c_proj.bias": { | |
| "scale": 0.07908321917057037, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.attn.c_proj.weight": { | |
| "scale": 0.1681288182735443, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.ln_1.bias": { | |
| "scale": 0.12408334761857986, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.ln_1.weight": { | |
| "scale": 0.1594894379377365, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.ln_2.bias": { | |
| "scale": 0.10880132019519806, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.ln_2.weight": { | |
| "scale": 0.1579473465681076, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.mlp.c_fc.bias": { | |
| "scale": 0.02153877541422844, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.21.mlp.c_fc.weight": { | |
| "scale": 0.07017097622156143, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.21.mlp.c_proj.bias": { | |
| "scale": 0.025096621364355087, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.21.mlp.c_proj.weight": { | |
| "scale": 0.20477719604969025, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.attn.c_attn.bias": { | |
| "scale": 0.04005954787135124, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.22.attn.c_attn.weight": { | |
| "scale": 0.07027842104434967, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.22.attn.c_proj.bias": { | |
| "scale": 0.06767906248569489, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.attn.c_proj.weight": { | |
| "scale": 0.09001224488019943, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.ln_1.bias": { | |
| "scale": 0.11637736856937408, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.ln_1.weight": { | |
| "scale": 0.15651044249534607, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.ln_2.bias": { | |
| "scale": 0.11879222095012665, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.ln_2.weight": { | |
| "scale": 0.1595410406589508, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.mlp.c_fc.bias": { | |
| "scale": 0.01718810759484768, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.22.mlp.c_fc.weight": { | |
| "scale": 0.07537717372179031, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.22.mlp.c_proj.bias": { | |
| "scale": 0.024833859875798225, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.22.mlp.c_proj.weight": { | |
| "scale": 0.17492838203907013, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.attn.c_attn.bias": { | |
| "scale": 0.0368594266474247, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.23.attn.c_attn.weight": { | |
| "scale": 0.0888095423579216, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.23.attn.c_proj.bias": { | |
| "scale": 0.02792450785636902, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.attn.c_proj.weight": { | |
| "scale": 0.10428722202777863, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.ln_1.bias": { | |
| "scale": 0.10264807939529419, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.ln_1.weight": { | |
| "scale": 0.14564886689186096, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.ln_2.bias": { | |
| "scale": 0.11791384220123291, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.ln_2.weight": { | |
| "scale": 0.15296445786952972, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.mlp.c_fc.bias": { | |
| "scale": 0.018314138054847717, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.23.mlp.c_fc.weight": { | |
| "scale": 0.0671987384557724, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.23.mlp.c_proj.bias": { | |
| "scale": 0.030889278277754784, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.23.mlp.c_proj.weight": { | |
| "scale": 0.13600651919841766, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.attn.c_attn.bias": { | |
| "scale": 0.04183034226298332, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.24.attn.c_attn.weight": { | |
| "scale": 0.05768841132521629, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.24.attn.c_proj.bias": { | |
| "scale": 0.02998465485870838, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.attn.c_proj.weight": { | |
| "scale": 0.09570259600877762, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.ln_1.bias": { | |
| "scale": 0.1029733270406723, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.ln_1.weight": { | |
| "scale": 0.1645420342683792, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.ln_2.bias": { | |
| "scale": 0.11531977355480194, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.ln_2.weight": { | |
| "scale": 0.16749481856822968, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.mlp.c_fc.bias": { | |
| "scale": 0.02433849684894085, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.24.mlp.c_fc.weight": { | |
| "scale": 0.056723203510046005, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.24.mlp.c_proj.bias": { | |
| "scale": 0.03132357448339462, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.24.mlp.c_proj.weight": { | |
| "scale": 0.08369418233633041, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.attn.c_attn.bias": { | |
| "scale": 0.043894506990909576, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.25.attn.c_attn.weight": { | |
| "scale": 0.05882854387164116, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.25.attn.c_proj.bias": { | |
| "scale": 0.03485613688826561, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.attn.c_proj.weight": { | |
| "scale": 0.08835429698228836, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.ln_1.bias": { | |
| "scale": 0.09291279315948486, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.ln_1.weight": { | |
| "scale": 0.17914152145385742, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.ln_2.bias": { | |
| "scale": 0.11765044182538986, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.ln_2.weight": { | |
| "scale": 0.1694125235080719, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.mlp.c_fc.bias": { | |
| "scale": 0.02206212468445301, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.25.mlp.c_fc.weight": { | |
| "scale": 0.06992777436971664, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.25.mlp.c_proj.bias": { | |
| "scale": 0.04068372771143913, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.25.mlp.c_proj.weight": { | |
| "scale": 0.09146194905042648, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.attn.c_attn.bias": { | |
| "scale": 0.08172182738780975, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.26.attn.c_attn.weight": { | |
| "scale": 0.06202762946486473, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.26.attn.c_proj.bias": { | |
| "scale": 0.03825494274497032, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.attn.c_proj.weight": { | |
| "scale": 0.10804062336683273, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.ln_1.bias": { | |
| "scale": 0.08116239309310913, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.ln_1.weight": { | |
| "scale": 0.17098096013069153, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.ln_2.bias": { | |
| "scale": 0.11734277009963989, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.ln_2.weight": { | |
| "scale": 0.1784631311893463, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.mlp.c_fc.bias": { | |
| "scale": 0.031606484204530716, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.26.mlp.c_fc.weight": { | |
| "scale": 0.05273488909006119, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.26.mlp.c_proj.bias": { | |
| "scale": 0.05072355270385742, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.26.mlp.c_proj.weight": { | |
| "scale": 0.14328084886074066, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.attn.c_attn.bias": { | |
| "scale": 0.05690082535147667, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.27.attn.c_attn.weight": { | |
| "scale": 0.07180392742156982, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.27.attn.c_proj.bias": { | |
| "scale": 0.05289801210165024, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.attn.c_proj.weight": { | |
| "scale": 0.0980907455086708, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.ln_1.bias": { | |
| "scale": 0.08269622176885605, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.ln_1.weight": { | |
| "scale": 0.17179779708385468, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.ln_2.bias": { | |
| "scale": 0.11928660422563553, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.ln_2.weight": { | |
| "scale": 0.18073512613773346, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.mlp.c_fc.bias": { | |
| "scale": 0.030272051692008972, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.27.mlp.c_fc.weight": { | |
| "scale": 0.05421888828277588, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.27.mlp.c_proj.bias": { | |
| "scale": 0.04355442896485329, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.27.mlp.c_proj.weight": { | |
| "scale": 0.15102733671665192, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.attn.c_attn.bias": { | |
| "scale": 0.04516652598977089, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.28.attn.c_attn.weight": { | |
| "scale": 0.05591177940368652, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.28.attn.c_proj.bias": { | |
| "scale": 0.07696392387151718, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.attn.c_proj.weight": { | |
| "scale": 0.1706492155790329, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.ln_1.bias": { | |
| "scale": 0.08867383003234863, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.ln_1.weight": { | |
| "scale": 0.18088868260383606, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.ln_2.bias": { | |
| "scale": 0.10548854619264603, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.ln_2.weight": { | |
| "scale": 0.19724524021148682, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.mlp.c_fc.bias": { | |
| "scale": 0.03599608689546585, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.28.mlp.c_fc.weight": { | |
| "scale": 0.15464694797992706, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.28.mlp.c_proj.bias": { | |
| "scale": 0.10096704214811325, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.28.mlp.c_proj.weight": { | |
| "scale": 0.5808261632919312, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.attn.c_attn.bias": { | |
| "scale": 0.061315275728702545, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.29.attn.c_attn.weight": { | |
| "scale": 0.072987399995327, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.29.attn.c_proj.bias": { | |
| "scale": 0.0334136076271534, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.attn.c_proj.weight": { | |
| "scale": 0.33243221044540405, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.ln_1.bias": { | |
| "scale": 0.0834951177239418, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.ln_1.weight": { | |
| "scale": 0.17551641166210175, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.ln_2.bias": { | |
| "scale": 0.060361869633197784, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.ln_2.weight": { | |
| "scale": 0.20771968364715576, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.mlp.c_fc.bias": { | |
| "scale": 0.04004308953881264, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.29.mlp.c_fc.weight": { | |
| "scale": 0.257427453994751, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.29.mlp.c_proj.bias": { | |
| "scale": 0.08023141324520111, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.29.mlp.c_proj.weight": { | |
| "scale": 1.5732485055923462, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.attn.c_attn.bias": { | |
| "scale": 0.036733418703079224, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.3.attn.c_attn.weight": { | |
| "scale": 0.09035025537014008, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.3.attn.c_proj.bias": { | |
| "scale": 0.38110747933387756, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.attn.c_proj.weight": { | |
| "scale": 0.5139561891555786, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.ln_1.bias": { | |
| "scale": 0.14589600265026093, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.ln_1.weight": { | |
| "scale": 0.11667633056640625, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.ln_2.bias": { | |
| "scale": 0.11972484737634659, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.ln_2.weight": { | |
| "scale": 0.2382904291152954, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.mlp.c_fc.bias": { | |
| "scale": 0.04690921679139137, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.3.mlp.c_fc.weight": { | |
| "scale": 0.16555476188659668, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.3.mlp.c_proj.bias": { | |
| "scale": 0.2362823784351349, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.3.mlp.c_proj.weight": { | |
| "scale": 0.9494528770446777, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.attn.c_attn.bias": { | |
| "scale": 0.03971828892827034, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.4.attn.c_attn.weight": { | |
| "scale": 0.0667421743273735, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.4.attn.c_proj.bias": { | |
| "scale": 0.3155006468296051, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.attn.c_proj.weight": { | |
| "scale": 0.31384560465812683, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.ln_1.bias": { | |
| "scale": 0.13793586194515228, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.ln_1.weight": { | |
| "scale": 0.1345834881067276, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.ln_2.bias": { | |
| "scale": 0.08576243370771408, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.ln_2.weight": { | |
| "scale": 0.23912283778190613, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.mlp.c_fc.bias": { | |
| "scale": 0.016857421025633812, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.4.mlp.c_fc.weight": { | |
| "scale": 0.0949544683098793, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.4.mlp.c_proj.bias": { | |
| "scale": 0.14176210761070251, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.4.mlp.c_proj.weight": { | |
| "scale": 1.0221376419067383, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.attn.c_attn.bias": { | |
| "scale": 0.03044235333800316, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.5.attn.c_attn.weight": { | |
| "scale": 0.0556764118373394, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.5.attn.c_proj.bias": { | |
| "scale": 0.17702117562294006, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.attn.c_proj.weight": { | |
| "scale": 0.13661186397075653, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.ln_1.bias": { | |
| "scale": 0.10966886579990387, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.ln_1.weight": { | |
| "scale": 0.17159949243068695, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.ln_2.bias": { | |
| "scale": 0.06587665528059006, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.ln_2.weight": { | |
| "scale": 0.2513478100299835, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.mlp.c_fc.bias": { | |
| "scale": 0.021238749846816063, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.5.mlp.c_fc.weight": { | |
| "scale": 0.08227789402008057, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.5.mlp.c_proj.bias": { | |
| "scale": 0.09128402918577194, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.5.mlp.c_proj.weight": { | |
| "scale": 0.8960160613059998, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.attn.c_attn.bias": { | |
| "scale": 0.04128195717930794, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.6.attn.c_attn.weight": { | |
| "scale": 0.09308914095163345, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.6.attn.c_proj.bias": { | |
| "scale": 0.09955250471830368, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.attn.c_proj.weight": { | |
| "scale": 0.09972423315048218, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.ln_1.bias": { | |
| "scale": 0.10598546266555786, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.ln_1.weight": { | |
| "scale": 0.20339453220367432, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.ln_2.bias": { | |
| "scale": 0.083857461810112, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.ln_2.weight": { | |
| "scale": 0.25439128279685974, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.mlp.c_fc.bias": { | |
| "scale": 0.026426810771226883, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.6.mlp.c_fc.weight": { | |
| "scale": 0.1172819659113884, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.6.mlp.c_proj.bias": { | |
| "scale": 0.08432779461145401, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.6.mlp.c_proj.weight": { | |
| "scale": 0.8800371289253235, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.attn.c_attn.bias": { | |
| "scale": 0.04523950815200806, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.7.attn.c_attn.weight": { | |
| "scale": 0.09843175113201141, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.7.attn.c_proj.bias": { | |
| "scale": 0.09096702188253403, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.attn.c_proj.weight": { | |
| "scale": 0.09420859813690186, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.ln_1.bias": { | |
| "scale": 0.152985617518425, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.ln_1.weight": { | |
| "scale": 0.15078113973140717, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.ln_2.bias": { | |
| "scale": 0.1413334757089615, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.ln_2.weight": { | |
| "scale": 0.2576432526111603, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.mlp.c_fc.bias": { | |
| "scale": 0.033283643424510956, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.7.mlp.c_fc.weight": { | |
| "scale": 0.07020364701747894, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.7.mlp.c_proj.bias": { | |
| "scale": 0.10713233798742294, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.7.mlp.c_proj.weight": { | |
| "scale": 0.42597928643226624, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.attn.c_attn.bias": { | |
| "scale": 0.03788977861404419, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.8.attn.c_attn.weight": { | |
| "scale": 0.08913753926753998, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.8.attn.c_proj.bias": { | |
| "scale": 0.08845029026269913, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.attn.c_proj.weight": { | |
| "scale": 0.09520888328552246, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.ln_1.bias": { | |
| "scale": 0.12517669796943665, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.ln_1.weight": { | |
| "scale": 0.1648232787847519, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.ln_2.bias": { | |
| "scale": 0.14098228514194489, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.ln_2.weight": { | |
| "scale": 0.24974539875984192, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.mlp.c_fc.bias": { | |
| "scale": 0.026503393426537514, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.8.mlp.c_fc.weight": { | |
| "scale": 0.06782539933919907, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.8.mlp.c_proj.bias": { | |
| "scale": 0.09713318198919296, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.8.mlp.c_proj.weight": { | |
| "scale": 0.43732672929763794, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.attn.c_attn.bias": { | |
| "scale": 0.04425312206149101, | |
| "shape": [ | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.9.attn.c_attn.weight": { | |
| "scale": 0.1181648001074791, | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ] | |
| }, | |
| "gpt.h.9.attn.c_proj.bias": { | |
| "scale": 0.07080474495887756, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.attn.c_proj.weight": { | |
| "scale": 0.10802309960126877, | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.ln_1.bias": { | |
| "scale": 0.15359099209308624, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.ln_1.weight": { | |
| "scale": 0.15670429170131683, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.ln_2.bias": { | |
| "scale": 0.1377403736114502, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.ln_2.weight": { | |
| "scale": 0.2576030492782593, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.mlp.c_fc.bias": { | |
| "scale": 0.031440090388059616, | |
| "shape": [ | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.9.mlp.c_fc.weight": { | |
| "scale": 0.07645577937364578, | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ] | |
| }, | |
| "gpt.h.9.mlp.c_proj.bias": { | |
| "scale": 0.10798899829387665, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.h.9.mlp.c_proj.weight": { | |
| "scale": 0.4369252622127533, | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.ln_f.bias": { | |
| "scale": 0.20075297355651855, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.ln_f.weight": { | |
| "scale": 0.41444164514541626, | |
| "shape": [ | |
| 1024 | |
| ] | |
| }, | |
| "gpt.wpe.emb.weight": { | |
| "scale": 0.0773041620850563, | |
| "shape": [ | |
| 608, | |
| 1024 | |
| ] | |
| }, | |
| "gpt.wte.weight": { | |
| "scale": 0.08020960539579391, | |
| "shape": [ | |
| 1026, | |
| 1024 | |
| ] | |
| }, | |
| "mel_head.bias": { | |
| "scale": 0.028449567034840584, | |
| "shape": [ | |
| 1026 | |
| ] | |
| }, | |
| "mel_head.weight": { | |
| "scale": 0.07584048807621002, | |
| "shape": [ | |
| 1026, | |
| 1024 | |
| ] | |
| } | |
| } |