{ "final_norm.bias": { "scale": 0.08834397792816162, "shape": [ 1024 ] }, "final_norm.weight": { "scale": 0.7890332937240601, "shape": [ 1024 ] }, "gpt.h.0.attn.c_attn.bias": { "scale": 0.12309323251247406, "shape": [ 3072 ] }, "gpt.h.0.attn.c_attn.weight": { "scale": 0.1041356548666954, "shape": [ 1024, 3072 ] }, "gpt.h.0.attn.c_proj.bias": { "scale": 0.08951065689325333, "shape": [ 1024 ] }, "gpt.h.0.attn.c_proj.weight": { "scale": 0.1165793165564537, "shape": [ 1024, 1024 ] }, "gpt.h.0.ln_1.bias": { "scale": 0.026223499327898026, "shape": [ 1024 ] }, "gpt.h.0.ln_1.weight": { "scale": 0.03431953117251396, "shape": [ 1024 ] }, "gpt.h.0.ln_2.bias": { "scale": 0.20567700266838074, "shape": [ 1024 ] }, "gpt.h.0.ln_2.weight": { "scale": 0.11458853632211685, "shape": [ 1024 ] }, "gpt.h.0.mlp.c_fc.bias": { "scale": 0.047463927417993546, "shape": [ 4096 ] }, "gpt.h.0.mlp.c_fc.weight": { "scale": 0.17041946947574615, "shape": [ 1024, 4096 ] }, "gpt.h.0.mlp.c_proj.bias": { "scale": 0.29554909467697144, "shape": [ 1024 ] }, "gpt.h.0.mlp.c_proj.weight": { "scale": 0.6621343493461609, "shape": [ 4096, 1024 ] }, "gpt.h.1.attn.c_attn.bias": { "scale": 0.09180662781000137, "shape": [ 3072 ] }, "gpt.h.1.attn.c_attn.weight": { "scale": 0.06224552541971207, "shape": [ 1024, 3072 ] }, "gpt.h.1.attn.c_proj.bias": { "scale": 0.2972453534603119, "shape": [ 1024 ] }, "gpt.h.1.attn.c_proj.weight": { "scale": 0.45653465390205383, "shape": [ 1024, 1024 ] }, "gpt.h.1.ln_1.bias": { "scale": 0.20046189427375793, "shape": [ 1024 ] }, "gpt.h.1.ln_1.weight": { "scale": 0.07655565440654755, "shape": [ 1024 ] }, "gpt.h.1.ln_2.bias": { "scale": 0.21620801091194153, "shape": [ 1024 ] }, "gpt.h.1.ln_2.weight": { "scale": 0.20269657671451569, "shape": [ 1024 ] }, "gpt.h.1.mlp.c_fc.bias": { "scale": 0.026467734947800636, "shape": [ 4096 ] }, "gpt.h.1.mlp.c_fc.weight": { "scale": 0.12142758071422577, "shape": [ 1024, 4096 ] }, "gpt.h.1.mlp.c_proj.bias": { "scale": 0.3446175754070282, "shape": [ 1024 ] }, "gpt.h.1.mlp.c_proj.weight": { "scale": 0.7414490580558777, "shape": [ 4096, 1024 ] }, "gpt.h.10.attn.c_attn.bias": { "scale": 0.044142965227365494, "shape": [ 3072 ] }, "gpt.h.10.attn.c_attn.weight": { "scale": 0.11839079111814499, "shape": [ 1024, 3072 ] }, "gpt.h.10.attn.c_proj.bias": { "scale": 0.0456073097884655, "shape": [ 1024 ] }, "gpt.h.10.attn.c_proj.weight": { "scale": 0.08686741441488266, "shape": [ 1024, 1024 ] }, "gpt.h.10.ln_1.bias": { "scale": 0.14560243487358093, "shape": [ 1024 ] }, "gpt.h.10.ln_1.weight": { "scale": 0.15239107608795166, "shape": [ 1024 ] }, "gpt.h.10.ln_2.bias": { "scale": 0.13042710721492767, "shape": [ 1024 ] }, "gpt.h.10.ln_2.weight": { "scale": 0.25696828961372375, "shape": [ 1024 ] }, "gpt.h.10.mlp.c_fc.bias": { "scale": 0.029524575918912888, "shape": [ 4096 ] }, "gpt.h.10.mlp.c_fc.weight": { "scale": 0.09666890650987625, "shape": [ 1024, 4096 ] }, "gpt.h.10.mlp.c_proj.bias": { "scale": 0.1028929278254509, "shape": [ 1024 ] }, "gpt.h.10.mlp.c_proj.weight": { "scale": 0.3654429316520691, "shape": [ 4096, 1024 ] }, "gpt.h.11.attn.c_attn.bias": { "scale": 0.048490703105926514, "shape": [ 3072 ] }, "gpt.h.11.attn.c_attn.weight": { "scale": 0.11234062910079956, "shape": [ 1024, 3072 ] }, "gpt.h.11.attn.c_proj.bias": { "scale": 0.0510806143283844, "shape": [ 1024 ] }, "gpt.h.11.attn.c_proj.weight": { "scale": 0.08797170966863632, "shape": [ 1024, 1024 ] }, "gpt.h.11.ln_1.bias": { "scale": 0.164643794298172, "shape": [ 1024 ] }, "gpt.h.11.ln_1.weight": { "scale": 0.16134341061115265, "shape": [ 1024 ] }, "gpt.h.11.ln_2.bias": { "scale": 0.09844086319208145, "shape": [ 1024 ] }, "gpt.h.11.ln_2.weight": { "scale": 0.22313998639583588, "shape": [ 1024 ] }, "gpt.h.11.mlp.c_fc.bias": { "scale": 0.03060619719326496, "shape": [ 4096 ] }, "gpt.h.11.mlp.c_fc.weight": { "scale": 0.06761165708303452, "shape": [ 1024, 4096 ] }, "gpt.h.11.mlp.c_proj.bias": { "scale": 0.10643502324819565, "shape": [ 1024 ] }, "gpt.h.11.mlp.c_proj.weight": { "scale": 0.4576462209224701, "shape": [ 4096, 1024 ] }, "gpt.h.12.attn.c_attn.bias": { "scale": 0.04992813244462013, "shape": [ 3072 ] }, "gpt.h.12.attn.c_attn.weight": { "scale": 0.14503903687000275, "shape": [ 1024, 3072 ] }, "gpt.h.12.attn.c_proj.bias": { "scale": 0.033475592732429504, "shape": [ 1024 ] }, "gpt.h.12.attn.c_proj.weight": { "scale": 0.11046390980482101, "shape": [ 1024, 1024 ] }, "gpt.h.12.ln_1.bias": { "scale": 0.18613700568675995, "shape": [ 1024 ] }, "gpt.h.12.ln_1.weight": { "scale": 0.16866794228553772, "shape": [ 1024 ] }, "gpt.h.12.ln_2.bias": { "scale": 0.12722595036029816, "shape": [ 1024 ] }, "gpt.h.12.ln_2.weight": { "scale": 0.23250164091587067, "shape": [ 1024 ] }, "gpt.h.12.mlp.c_fc.bias": { "scale": 0.014812483452260494, "shape": [ 4096 ] }, "gpt.h.12.mlp.c_fc.weight": { "scale": 0.068848617374897, "shape": [ 1024, 4096 ] }, "gpt.h.12.mlp.c_proj.bias": { "scale": 0.12035634368658066, "shape": [ 1024 ] }, "gpt.h.12.mlp.c_proj.weight": { "scale": 0.2743368148803711, "shape": [ 4096, 1024 ] }, "gpt.h.13.attn.c_attn.bias": { "scale": 0.04760030657052994, "shape": [ 3072 ] }, "gpt.h.13.attn.c_attn.weight": { "scale": 0.08714257925748825, "shape": [ 1024, 3072 ] }, "gpt.h.13.attn.c_proj.bias": { "scale": 0.06746888160705566, "shape": [ 1024 ] }, "gpt.h.13.attn.c_proj.weight": { "scale": 0.11844473332166672, "shape": [ 1024, 1024 ] }, "gpt.h.13.ln_1.bias": { "scale": 0.15157721936702728, "shape": [ 1024 ] }, "gpt.h.13.ln_1.weight": { "scale": 0.16393320262432098, "shape": [ 1024 ] }, "gpt.h.13.ln_2.bias": { "scale": 0.1306534856557846, "shape": [ 1024 ] }, "gpt.h.13.ln_2.weight": { "scale": 0.22248651087284088, "shape": [ 1024 ] }, "gpt.h.13.mlp.c_fc.bias": { "scale": 0.019469883292913437, "shape": [ 4096 ] }, "gpt.h.13.mlp.c_fc.weight": { "scale": 0.06430874019861221, "shape": [ 1024, 4096 ] }, "gpt.h.13.mlp.c_proj.bias": { "scale": 0.11026618629693985, "shape": [ 1024 ] }, "gpt.h.13.mlp.c_proj.weight": { "scale": 0.2532596290111542, "shape": [ 4096, 1024 ] }, "gpt.h.14.attn.c_attn.bias": { "scale": 0.04019672051072121, "shape": [ 3072 ] }, "gpt.h.14.attn.c_attn.weight": { "scale": 0.14072343707084656, "shape": [ 1024, 3072 ] }, "gpt.h.14.attn.c_proj.bias": { "scale": 0.02364635095000267, "shape": [ 1024 ] }, "gpt.h.14.attn.c_proj.weight": { "scale": 0.10160692036151886, "shape": [ 1024, 1024 ] }, "gpt.h.14.ln_1.bias": { "scale": 0.159767284989357, "shape": [ 1024 ] }, "gpt.h.14.ln_1.weight": { "scale": 0.16219459474086761, "shape": [ 1024 ] }, "gpt.h.14.ln_2.bias": { "scale": 0.13014180958271027, "shape": [ 1024 ] }, "gpt.h.14.ln_2.weight": { "scale": 0.21348479390144348, "shape": [ 1024 ] }, "gpt.h.14.mlp.c_fc.bias": { "scale": 0.01531550008803606, "shape": [ 4096 ] }, "gpt.h.14.mlp.c_fc.weight": { "scale": 0.08244454115629196, "shape": [ 1024, 4096 ] }, "gpt.h.14.mlp.c_proj.bias": { "scale": 0.1048700362443924, "shape": [ 1024 ] }, "gpt.h.14.mlp.c_proj.weight": { "scale": 0.28695791959762573, "shape": [ 4096, 1024 ] }, "gpt.h.15.attn.c_attn.bias": { "scale": 0.04081624746322632, "shape": [ 3072 ] }, "gpt.h.15.attn.c_attn.weight": { "scale": 0.07819346338510513, "shape": [ 1024, 3072 ] }, "gpt.h.15.attn.c_proj.bias": { "scale": 0.08492325991392136, "shape": [ 1024 ] }, "gpt.h.15.attn.c_proj.weight": { "scale": 0.11697079986333847, "shape": [ 1024, 1024 ] }, "gpt.h.15.ln_1.bias": { "scale": 0.14451484382152557, "shape": [ 1024 ] }, "gpt.h.15.ln_1.weight": { "scale": 0.16579607129096985, "shape": [ 1024 ] }, "gpt.h.15.ln_2.bias": { "scale": 0.11252031475305557, "shape": [ 1024 ] }, "gpt.h.15.ln_2.weight": { "scale": 0.20037208497524261, "shape": [ 1024 ] }, "gpt.h.15.mlp.c_fc.bias": { "scale": 0.020076142624020576, "shape": [ 4096 ] }, "gpt.h.15.mlp.c_fc.weight": { "scale": 0.0844724252820015, "shape": [ 1024, 4096 ] }, "gpt.h.15.mlp.c_proj.bias": { "scale": 0.09757417440414429, "shape": [ 1024 ] }, "gpt.h.15.mlp.c_proj.weight": { "scale": 0.310793936252594, "shape": [ 4096, 1024 ] }, "gpt.h.16.attn.c_attn.bias": { "scale": 0.04743470624089241, "shape": [ 3072 ] }, "gpt.h.16.attn.c_attn.weight": { "scale": 0.12722057104110718, "shape": [ 1024, 3072 ] }, "gpt.h.16.attn.c_proj.bias": { "scale": 0.09606064110994339, "shape": [ 1024 ] }, "gpt.h.16.attn.c_proj.weight": { "scale": 0.07591201364994049, "shape": [ 1024, 1024 ] }, "gpt.h.16.ln_1.bias": { "scale": 0.15709154307842255, "shape": [ 1024 ] }, "gpt.h.16.ln_1.weight": { "scale": 0.15911130607128143, "shape": [ 1024 ] }, "gpt.h.16.ln_2.bias": { "scale": 0.12005039304494858, "shape": [ 1024 ] }, "gpt.h.16.ln_2.weight": { "scale": 0.18429037928581238, "shape": [ 1024 ] }, "gpt.h.16.mlp.c_fc.bias": { "scale": 0.022201305255293846, "shape": [ 4096 ] }, "gpt.h.16.mlp.c_fc.weight": { "scale": 0.08008849620819092, "shape": [ 1024, 4096 ] }, "gpt.h.16.mlp.c_proj.bias": { "scale": 0.09774907678365707, "shape": [ 1024 ] }, "gpt.h.16.mlp.c_proj.weight": { "scale": 0.16212008893489838, "shape": [ 4096, 1024 ] }, "gpt.h.17.attn.c_attn.bias": { "scale": 0.04417693614959717, "shape": [ 3072 ] }, "gpt.h.17.attn.c_attn.weight": { "scale": 0.1320490539073944, "shape": [ 1024, 3072 ] }, "gpt.h.17.attn.c_proj.bias": { "scale": 0.06338126957416534, "shape": [ 1024 ] }, "gpt.h.17.attn.c_proj.weight": { "scale": 0.13893575966358185, "shape": [ 1024, 1024 ] }, "gpt.h.17.ln_1.bias": { "scale": 0.15823052823543549, "shape": [ 1024 ] }, "gpt.h.17.ln_1.weight": { "scale": 0.16002225875854492, "shape": [ 1024 ] }, "gpt.h.17.ln_2.bias": { "scale": 0.13184016942977905, "shape": [ 1024 ] }, "gpt.h.17.ln_2.weight": { "scale": 0.1869189441204071, "shape": [ 1024 ] }, "gpt.h.17.mlp.c_fc.bias": { "scale": 0.02481200359761715, "shape": [ 4096 ] }, "gpt.h.17.mlp.c_fc.weight": { "scale": 0.0685386210680008, "shape": [ 1024, 4096 ] }, "gpt.h.17.mlp.c_proj.bias": { "scale": 0.08835189044475555, "shape": [ 1024 ] }, "gpt.h.17.mlp.c_proj.weight": { "scale": 0.18417593836784363, "shape": [ 4096, 1024 ] }, "gpt.h.18.attn.c_attn.bias": { "scale": 0.037581078708171844, "shape": [ 3072 ] }, "gpt.h.18.attn.c_attn.weight": { "scale": 0.12254094332456589, "shape": [ 1024, 3072 ] }, "gpt.h.18.attn.c_proj.bias": { "scale": 0.11249931156635284, "shape": [ 1024 ] }, "gpt.h.18.attn.c_proj.weight": { "scale": 0.09757298976182938, "shape": [ 1024, 1024 ] }, "gpt.h.18.ln_1.bias": { "scale": 0.1457567662000656, "shape": [ 1024 ] }, "gpt.h.18.ln_1.weight": { "scale": 0.15891136229038239, "shape": [ 1024 ] }, "gpt.h.18.ln_2.bias": { "scale": 0.11753548681735992, "shape": [ 1024 ] }, "gpt.h.18.ln_2.weight": { "scale": 0.17903394997119904, "shape": [ 1024 ] }, "gpt.h.18.mlp.c_fc.bias": { "scale": 0.021563095971941948, "shape": [ 4096 ] }, "gpt.h.18.mlp.c_fc.weight": { "scale": 0.06829343736171722, "shape": [ 1024, 4096 ] }, "gpt.h.18.mlp.c_proj.bias": { "scale": 0.06819847971200943, "shape": [ 1024 ] }, "gpt.h.18.mlp.c_proj.weight": { "scale": 0.12814833223819733, "shape": [ 4096, 1024 ] }, "gpt.h.19.attn.c_attn.bias": { "scale": 0.04691386595368385, "shape": [ 3072 ] }, "gpt.h.19.attn.c_attn.weight": { "scale": 0.11684088408946991, "shape": [ 1024, 3072 ] }, "gpt.h.19.attn.c_proj.bias": { "scale": 0.05939425155520439, "shape": [ 1024 ] }, "gpt.h.19.attn.c_proj.weight": { "scale": 0.12523870170116425, "shape": [ 1024, 1024 ] }, "gpt.h.19.ln_1.bias": { "scale": 0.14781410992145538, "shape": [ 1024 ] }, "gpt.h.19.ln_1.weight": { "scale": 0.15612022578716278, "shape": [ 1024 ] }, "gpt.h.19.ln_2.bias": { "scale": 0.10865359008312225, "shape": [ 1024 ] }, "gpt.h.19.ln_2.weight": { "scale": 0.17884387075901031, "shape": [ 1024 ] }, "gpt.h.19.mlp.c_fc.bias": { "scale": 0.023445727303624153, "shape": [ 4096 ] }, "gpt.h.19.mlp.c_fc.weight": { "scale": 0.07771860063076019, "shape": [ 1024, 4096 ] }, "gpt.h.19.mlp.c_proj.bias": { "scale": 0.05867428332567215, "shape": [ 1024 ] }, "gpt.h.19.mlp.c_proj.weight": { "scale": 0.1523352414369583, "shape": [ 4096, 1024 ] }, "gpt.h.2.attn.c_attn.bias": { "scale": 0.04482164978981018, "shape": [ 3072 ] }, "gpt.h.2.attn.c_attn.weight": { "scale": 0.06159922480583191, "shape": [ 1024, 3072 ] }, "gpt.h.2.attn.c_proj.bias": { "scale": 0.3751963675022125, "shape": [ 1024 ] }, "gpt.h.2.attn.c_proj.weight": { "scale": 0.4305261969566345, "shape": [ 1024, 1024 ] }, "gpt.h.2.ln_1.bias": { "scale": 0.16870178282260895, "shape": [ 1024 ] }, "gpt.h.2.ln_1.weight": { "scale": 0.09723272174596786, "shape": [ 1024 ] }, "gpt.h.2.ln_2.bias": { "scale": 0.14638309180736542, "shape": [ 1024 ] }, "gpt.h.2.ln_2.weight": { "scale": 0.24350528419017792, "shape": [ 1024 ] }, "gpt.h.2.mlp.c_fc.bias": { "scale": 0.054711032658815384, "shape": [ 4096 ] }, "gpt.h.2.mlp.c_fc.weight": { "scale": 0.19500534236431122, "shape": [ 1024, 4096 ] }, "gpt.h.2.mlp.c_proj.bias": { "scale": 0.2771886885166168, "shape": [ 1024 ] }, "gpt.h.2.mlp.c_proj.weight": { "scale": 0.7297950983047485, "shape": [ 4096, 1024 ] }, "gpt.h.20.attn.c_attn.bias": { "scale": 0.03594465181231499, "shape": [ 3072 ] }, "gpt.h.20.attn.c_attn.weight": { "scale": 0.12434620410203934, "shape": [ 1024, 3072 ] }, "gpt.h.20.attn.c_proj.bias": { "scale": 0.09055911749601364, "shape": [ 1024 ] }, "gpt.h.20.attn.c_proj.weight": { "scale": 0.10791827738285065, "shape": [ 1024, 1024 ] }, "gpt.h.20.ln_1.bias": { "scale": 0.13368119299411774, "shape": [ 1024 ] }, "gpt.h.20.ln_1.weight": { "scale": 0.15670302510261536, "shape": [ 1024 ] }, "gpt.h.20.ln_2.bias": { "scale": 0.11155271530151367, "shape": [ 1024 ] }, "gpt.h.20.ln_2.weight": { "scale": 0.16725917160511017, "shape": [ 1024 ] }, "gpt.h.20.mlp.c_fc.bias": { "scale": 0.015253099612891674, "shape": [ 4096 ] }, "gpt.h.20.mlp.c_fc.weight": { "scale": 0.06497155874967575, "shape": [ 1024, 4096 ] }, "gpt.h.20.mlp.c_proj.bias": { "scale": 0.04290299862623215, "shape": [ 1024 ] }, "gpt.h.20.mlp.c_proj.weight": { "scale": 0.2691304385662079, "shape": [ 4096, 1024 ] }, "gpt.h.21.attn.c_attn.bias": { "scale": 0.04429187998175621, "shape": [ 3072 ] }, "gpt.h.21.attn.c_attn.weight": { "scale": 0.10078362375497818, "shape": [ 1024, 3072 ] }, "gpt.h.21.attn.c_proj.bias": { "scale": 0.07908321917057037, "shape": [ 1024 ] }, "gpt.h.21.attn.c_proj.weight": { "scale": 0.1681288182735443, "shape": [ 1024, 1024 ] }, "gpt.h.21.ln_1.bias": { "scale": 0.12408334761857986, "shape": [ 1024 ] }, "gpt.h.21.ln_1.weight": { "scale": 0.1594894379377365, "shape": [ 1024 ] }, "gpt.h.21.ln_2.bias": { "scale": 0.10880132019519806, "shape": [ 1024 ] }, "gpt.h.21.ln_2.weight": { "scale": 0.1579473465681076, "shape": [ 1024 ] }, "gpt.h.21.mlp.c_fc.bias": { "scale": 0.02153877541422844, "shape": [ 4096 ] }, "gpt.h.21.mlp.c_fc.weight": { "scale": 0.07017097622156143, "shape": [ 1024, 4096 ] }, "gpt.h.21.mlp.c_proj.bias": { "scale": 0.025096621364355087, "shape": [ 1024 ] }, "gpt.h.21.mlp.c_proj.weight": { "scale": 0.20477719604969025, "shape": [ 4096, 1024 ] }, "gpt.h.22.attn.c_attn.bias": { "scale": 0.04005954787135124, "shape": [ 3072 ] }, "gpt.h.22.attn.c_attn.weight": { "scale": 0.07027842104434967, "shape": [ 1024, 3072 ] }, "gpt.h.22.attn.c_proj.bias": { "scale": 0.06767906248569489, "shape": [ 1024 ] }, "gpt.h.22.attn.c_proj.weight": { "scale": 0.09001224488019943, "shape": [ 1024, 1024 ] }, "gpt.h.22.ln_1.bias": { "scale": 0.11637736856937408, "shape": [ 1024 ] }, "gpt.h.22.ln_1.weight": { "scale": 0.15651044249534607, "shape": [ 1024 ] }, "gpt.h.22.ln_2.bias": { "scale": 0.11879222095012665, "shape": [ 1024 ] }, "gpt.h.22.ln_2.weight": { "scale": 0.1595410406589508, "shape": [ 1024 ] }, "gpt.h.22.mlp.c_fc.bias": { "scale": 0.01718810759484768, "shape": [ 4096 ] }, "gpt.h.22.mlp.c_fc.weight": { "scale": 0.07537717372179031, "shape": [ 1024, 4096 ] }, "gpt.h.22.mlp.c_proj.bias": { "scale": 0.024833859875798225, "shape": [ 1024 ] }, "gpt.h.22.mlp.c_proj.weight": { "scale": 0.17492838203907013, "shape": [ 4096, 1024 ] }, "gpt.h.23.attn.c_attn.bias": { "scale": 0.0368594266474247, "shape": [ 3072 ] }, "gpt.h.23.attn.c_attn.weight": { "scale": 0.0888095423579216, "shape": [ 1024, 3072 ] }, "gpt.h.23.attn.c_proj.bias": { "scale": 0.02792450785636902, "shape": [ 1024 ] }, "gpt.h.23.attn.c_proj.weight": { "scale": 0.10428722202777863, "shape": [ 1024, 1024 ] }, "gpt.h.23.ln_1.bias": { "scale": 0.10264807939529419, "shape": [ 1024 ] }, "gpt.h.23.ln_1.weight": { "scale": 0.14564886689186096, "shape": [ 1024 ] }, "gpt.h.23.ln_2.bias": { "scale": 0.11791384220123291, "shape": [ 1024 ] }, "gpt.h.23.ln_2.weight": { "scale": 0.15296445786952972, "shape": [ 1024 ] }, "gpt.h.23.mlp.c_fc.bias": { "scale": 0.018314138054847717, "shape": [ 4096 ] }, "gpt.h.23.mlp.c_fc.weight": { "scale": 0.0671987384557724, "shape": [ 1024, 4096 ] }, "gpt.h.23.mlp.c_proj.bias": { "scale": 0.030889278277754784, "shape": [ 1024 ] }, "gpt.h.23.mlp.c_proj.weight": { "scale": 0.13600651919841766, "shape": [ 4096, 1024 ] }, "gpt.h.24.attn.c_attn.bias": { "scale": 0.04183034226298332, "shape": [ 3072 ] }, "gpt.h.24.attn.c_attn.weight": { "scale": 0.05768841132521629, "shape": [ 1024, 3072 ] }, "gpt.h.24.attn.c_proj.bias": { "scale": 0.02998465485870838, "shape": [ 1024 ] }, "gpt.h.24.attn.c_proj.weight": { "scale": 0.09570259600877762, "shape": [ 1024, 1024 ] }, "gpt.h.24.ln_1.bias": { "scale": 0.1029733270406723, "shape": [ 1024 ] }, "gpt.h.24.ln_1.weight": { "scale": 0.1645420342683792, "shape": [ 1024 ] }, "gpt.h.24.ln_2.bias": { "scale": 0.11531977355480194, "shape": [ 1024 ] }, "gpt.h.24.ln_2.weight": { "scale": 0.16749481856822968, "shape": [ 1024 ] }, "gpt.h.24.mlp.c_fc.bias": { "scale": 0.02433849684894085, "shape": [ 4096 ] }, "gpt.h.24.mlp.c_fc.weight": { "scale": 0.056723203510046005, "shape": [ 1024, 4096 ] }, "gpt.h.24.mlp.c_proj.bias": { "scale": 0.03132357448339462, "shape": [ 1024 ] }, "gpt.h.24.mlp.c_proj.weight": { "scale": 0.08369418233633041, "shape": [ 4096, 1024 ] }, "gpt.h.25.attn.c_attn.bias": { "scale": 0.043894506990909576, "shape": [ 3072 ] }, "gpt.h.25.attn.c_attn.weight": { "scale": 0.05882854387164116, "shape": [ 1024, 3072 ] }, "gpt.h.25.attn.c_proj.bias": { "scale": 0.03485613688826561, "shape": [ 1024 ] }, "gpt.h.25.attn.c_proj.weight": { "scale": 0.08835429698228836, "shape": [ 1024, 1024 ] }, "gpt.h.25.ln_1.bias": { "scale": 0.09291279315948486, "shape": [ 1024 ] }, "gpt.h.25.ln_1.weight": { "scale": 0.17914152145385742, "shape": [ 1024 ] }, "gpt.h.25.ln_2.bias": { "scale": 0.11765044182538986, "shape": [ 1024 ] }, "gpt.h.25.ln_2.weight": { "scale": 0.1694125235080719, "shape": [ 1024 ] }, "gpt.h.25.mlp.c_fc.bias": { "scale": 0.02206212468445301, "shape": [ 4096 ] }, "gpt.h.25.mlp.c_fc.weight": { "scale": 0.06992777436971664, "shape": [ 1024, 4096 ] }, "gpt.h.25.mlp.c_proj.bias": { "scale": 0.04068372771143913, "shape": [ 1024 ] }, "gpt.h.25.mlp.c_proj.weight": { "scale": 0.09146194905042648, "shape": [ 4096, 1024 ] }, "gpt.h.26.attn.c_attn.bias": { "scale": 0.08172182738780975, "shape": [ 3072 ] }, "gpt.h.26.attn.c_attn.weight": { "scale": 0.06202762946486473, "shape": [ 1024, 3072 ] }, "gpt.h.26.attn.c_proj.bias": { "scale": 0.03825494274497032, "shape": [ 1024 ] }, "gpt.h.26.attn.c_proj.weight": { "scale": 0.10804062336683273, "shape": [ 1024, 1024 ] }, "gpt.h.26.ln_1.bias": { "scale": 0.08116239309310913, "shape": [ 1024 ] }, "gpt.h.26.ln_1.weight": { "scale": 0.17098096013069153, "shape": [ 1024 ] }, "gpt.h.26.ln_2.bias": { "scale": 0.11734277009963989, "shape": [ 1024 ] }, "gpt.h.26.ln_2.weight": { "scale": 0.1784631311893463, "shape": [ 1024 ] }, "gpt.h.26.mlp.c_fc.bias": { "scale": 0.031606484204530716, "shape": [ 4096 ] }, "gpt.h.26.mlp.c_fc.weight": { "scale": 0.05273488909006119, "shape": [ 1024, 4096 ] }, "gpt.h.26.mlp.c_proj.bias": { "scale": 0.05072355270385742, "shape": [ 1024 ] }, "gpt.h.26.mlp.c_proj.weight": { "scale": 0.14328084886074066, "shape": [ 4096, 1024 ] }, "gpt.h.27.attn.c_attn.bias": { "scale": 0.05690082535147667, "shape": [ 3072 ] }, "gpt.h.27.attn.c_attn.weight": { "scale": 0.07180392742156982, "shape": [ 1024, 3072 ] }, "gpt.h.27.attn.c_proj.bias": { "scale": 0.05289801210165024, "shape": [ 1024 ] }, "gpt.h.27.attn.c_proj.weight": { "scale": 0.0980907455086708, "shape": [ 1024, 1024 ] }, "gpt.h.27.ln_1.bias": { "scale": 0.08269622176885605, "shape": [ 1024 ] }, "gpt.h.27.ln_1.weight": { "scale": 0.17179779708385468, "shape": [ 1024 ] }, "gpt.h.27.ln_2.bias": { "scale": 0.11928660422563553, "shape": [ 1024 ] }, "gpt.h.27.ln_2.weight": { "scale": 0.18073512613773346, "shape": [ 1024 ] }, "gpt.h.27.mlp.c_fc.bias": { "scale": 0.030272051692008972, "shape": [ 4096 ] }, "gpt.h.27.mlp.c_fc.weight": { "scale": 0.05421888828277588, "shape": [ 1024, 4096 ] }, "gpt.h.27.mlp.c_proj.bias": { "scale": 0.04355442896485329, "shape": [ 1024 ] }, "gpt.h.27.mlp.c_proj.weight": { "scale": 0.15102733671665192, "shape": [ 4096, 1024 ] }, "gpt.h.28.attn.c_attn.bias": { "scale": 0.04516652598977089, "shape": [ 3072 ] }, "gpt.h.28.attn.c_attn.weight": { "scale": 0.05591177940368652, "shape": [ 1024, 3072 ] }, "gpt.h.28.attn.c_proj.bias": { "scale": 0.07696392387151718, "shape": [ 1024 ] }, "gpt.h.28.attn.c_proj.weight": { "scale": 0.1706492155790329, "shape": [ 1024, 1024 ] }, "gpt.h.28.ln_1.bias": { "scale": 0.08867383003234863, "shape": [ 1024 ] }, "gpt.h.28.ln_1.weight": { "scale": 0.18088868260383606, "shape": [ 1024 ] }, "gpt.h.28.ln_2.bias": { "scale": 0.10548854619264603, "shape": [ 1024 ] }, "gpt.h.28.ln_2.weight": { "scale": 0.19724524021148682, "shape": [ 1024 ] }, "gpt.h.28.mlp.c_fc.bias": { "scale": 0.03599608689546585, "shape": [ 4096 ] }, "gpt.h.28.mlp.c_fc.weight": { "scale": 0.15464694797992706, "shape": [ 1024, 4096 ] }, "gpt.h.28.mlp.c_proj.bias": { "scale": 0.10096704214811325, "shape": [ 1024 ] }, "gpt.h.28.mlp.c_proj.weight": { "scale": 0.5808261632919312, "shape": [ 4096, 1024 ] }, "gpt.h.29.attn.c_attn.bias": { "scale": 0.061315275728702545, "shape": [ 3072 ] }, "gpt.h.29.attn.c_attn.weight": { "scale": 0.072987399995327, "shape": [ 1024, 3072 ] }, "gpt.h.29.attn.c_proj.bias": { "scale": 0.0334136076271534, "shape": [ 1024 ] }, "gpt.h.29.attn.c_proj.weight": { "scale": 0.33243221044540405, "shape": [ 1024, 1024 ] }, "gpt.h.29.ln_1.bias": { "scale": 0.0834951177239418, "shape": [ 1024 ] }, "gpt.h.29.ln_1.weight": { "scale": 0.17551641166210175, "shape": [ 1024 ] }, "gpt.h.29.ln_2.bias": { "scale": 0.060361869633197784, "shape": [ 1024 ] }, "gpt.h.29.ln_2.weight": { "scale": 0.20771968364715576, "shape": [ 1024 ] }, "gpt.h.29.mlp.c_fc.bias": { "scale": 0.04004308953881264, "shape": [ 4096 ] }, "gpt.h.29.mlp.c_fc.weight": { "scale": 0.257427453994751, "shape": [ 1024, 4096 ] }, "gpt.h.29.mlp.c_proj.bias": { "scale": 0.08023141324520111, "shape": [ 1024 ] }, "gpt.h.29.mlp.c_proj.weight": { "scale": 1.5732485055923462, "shape": [ 4096, 1024 ] }, "gpt.h.3.attn.c_attn.bias": { "scale": 0.036733418703079224, "shape": [ 3072 ] }, "gpt.h.3.attn.c_attn.weight": { "scale": 0.09035025537014008, "shape": [ 1024, 3072 ] }, "gpt.h.3.attn.c_proj.bias": { "scale": 0.38110747933387756, "shape": [ 1024 ] }, "gpt.h.3.attn.c_proj.weight": { "scale": 0.5139561891555786, "shape": [ 1024, 1024 ] }, "gpt.h.3.ln_1.bias": { "scale": 0.14589600265026093, "shape": [ 1024 ] }, "gpt.h.3.ln_1.weight": { "scale": 0.11667633056640625, "shape": [ 1024 ] }, "gpt.h.3.ln_2.bias": { "scale": 0.11972484737634659, "shape": [ 1024 ] }, "gpt.h.3.ln_2.weight": { "scale": 0.2382904291152954, "shape": [ 1024 ] }, "gpt.h.3.mlp.c_fc.bias": { "scale": 0.04690921679139137, "shape": [ 4096 ] }, "gpt.h.3.mlp.c_fc.weight": { "scale": 0.16555476188659668, "shape": [ 1024, 4096 ] }, "gpt.h.3.mlp.c_proj.bias": { "scale": 0.2362823784351349, "shape": [ 1024 ] }, "gpt.h.3.mlp.c_proj.weight": { "scale": 0.9494528770446777, "shape": [ 4096, 1024 ] }, "gpt.h.4.attn.c_attn.bias": { "scale": 0.03971828892827034, "shape": [ 3072 ] }, "gpt.h.4.attn.c_attn.weight": { "scale": 0.0667421743273735, "shape": [ 1024, 3072 ] }, "gpt.h.4.attn.c_proj.bias": { "scale": 0.3155006468296051, "shape": [ 1024 ] }, "gpt.h.4.attn.c_proj.weight": { "scale": 0.31384560465812683, "shape": [ 1024, 1024 ] }, "gpt.h.4.ln_1.bias": { "scale": 0.13793586194515228, "shape": [ 1024 ] }, "gpt.h.4.ln_1.weight": { "scale": 0.1345834881067276, "shape": [ 1024 ] }, "gpt.h.4.ln_2.bias": { "scale": 0.08576243370771408, "shape": [ 1024 ] }, "gpt.h.4.ln_2.weight": { "scale": 0.23912283778190613, "shape": [ 1024 ] }, "gpt.h.4.mlp.c_fc.bias": { "scale": 0.016857421025633812, "shape": [ 4096 ] }, "gpt.h.4.mlp.c_fc.weight": { "scale": 0.0949544683098793, "shape": [ 1024, 4096 ] }, "gpt.h.4.mlp.c_proj.bias": { "scale": 0.14176210761070251, "shape": [ 1024 ] }, "gpt.h.4.mlp.c_proj.weight": { "scale": 1.0221376419067383, "shape": [ 4096, 1024 ] }, "gpt.h.5.attn.c_attn.bias": { "scale": 0.03044235333800316, "shape": [ 3072 ] }, "gpt.h.5.attn.c_attn.weight": { "scale": 0.0556764118373394, "shape": [ 1024, 3072 ] }, "gpt.h.5.attn.c_proj.bias": { "scale": 0.17702117562294006, "shape": [ 1024 ] }, "gpt.h.5.attn.c_proj.weight": { "scale": 0.13661186397075653, "shape": [ 1024, 1024 ] }, "gpt.h.5.ln_1.bias": { "scale": 0.10966886579990387, "shape": [ 1024 ] }, "gpt.h.5.ln_1.weight": { "scale": 0.17159949243068695, "shape": [ 1024 ] }, "gpt.h.5.ln_2.bias": { "scale": 0.06587665528059006, "shape": [ 1024 ] }, "gpt.h.5.ln_2.weight": { "scale": 0.2513478100299835, "shape": [ 1024 ] }, "gpt.h.5.mlp.c_fc.bias": { "scale": 0.021238749846816063, "shape": [ 4096 ] }, "gpt.h.5.mlp.c_fc.weight": { "scale": 0.08227789402008057, "shape": [ 1024, 4096 ] }, "gpt.h.5.mlp.c_proj.bias": { "scale": 0.09128402918577194, "shape": [ 1024 ] }, "gpt.h.5.mlp.c_proj.weight": { "scale": 0.8960160613059998, "shape": [ 4096, 1024 ] }, "gpt.h.6.attn.c_attn.bias": { "scale": 0.04128195717930794, "shape": [ 3072 ] }, "gpt.h.6.attn.c_attn.weight": { "scale": 0.09308914095163345, "shape": [ 1024, 3072 ] }, "gpt.h.6.attn.c_proj.bias": { "scale": 0.09955250471830368, "shape": [ 1024 ] }, "gpt.h.6.attn.c_proj.weight": { "scale": 0.09972423315048218, "shape": [ 1024, 1024 ] }, "gpt.h.6.ln_1.bias": { "scale": 0.10598546266555786, "shape": [ 1024 ] }, "gpt.h.6.ln_1.weight": { "scale": 0.20339453220367432, "shape": [ 1024 ] }, "gpt.h.6.ln_2.bias": { "scale": 0.083857461810112, "shape": [ 1024 ] }, "gpt.h.6.ln_2.weight": { "scale": 0.25439128279685974, "shape": [ 1024 ] }, "gpt.h.6.mlp.c_fc.bias": { "scale": 0.026426810771226883, "shape": [ 4096 ] }, "gpt.h.6.mlp.c_fc.weight": { "scale": 0.1172819659113884, "shape": [ 1024, 4096 ] }, "gpt.h.6.mlp.c_proj.bias": { "scale": 0.08432779461145401, "shape": [ 1024 ] }, "gpt.h.6.mlp.c_proj.weight": { "scale": 0.8800371289253235, "shape": [ 4096, 1024 ] }, "gpt.h.7.attn.c_attn.bias": { "scale": 0.04523950815200806, "shape": [ 3072 ] }, "gpt.h.7.attn.c_attn.weight": { "scale": 0.09843175113201141, "shape": [ 1024, 3072 ] }, "gpt.h.7.attn.c_proj.bias": { "scale": 0.09096702188253403, "shape": [ 1024 ] }, "gpt.h.7.attn.c_proj.weight": { "scale": 0.09420859813690186, "shape": [ 1024, 1024 ] }, "gpt.h.7.ln_1.bias": { "scale": 0.152985617518425, "shape": [ 1024 ] }, "gpt.h.7.ln_1.weight": { "scale": 0.15078113973140717, "shape": [ 1024 ] }, "gpt.h.7.ln_2.bias": { "scale": 0.1413334757089615, "shape": [ 1024 ] }, "gpt.h.7.ln_2.weight": { "scale": 0.2576432526111603, "shape": [ 1024 ] }, "gpt.h.7.mlp.c_fc.bias": { "scale": 0.033283643424510956, "shape": [ 4096 ] }, "gpt.h.7.mlp.c_fc.weight": { "scale": 0.07020364701747894, "shape": [ 1024, 4096 ] }, "gpt.h.7.mlp.c_proj.bias": { "scale": 0.10713233798742294, "shape": [ 1024 ] }, "gpt.h.7.mlp.c_proj.weight": { "scale": 0.42597928643226624, "shape": [ 4096, 1024 ] }, "gpt.h.8.attn.c_attn.bias": { "scale": 0.03788977861404419, "shape": [ 3072 ] }, "gpt.h.8.attn.c_attn.weight": { "scale": 0.08913753926753998, "shape": [ 1024, 3072 ] }, "gpt.h.8.attn.c_proj.bias": { "scale": 0.08845029026269913, "shape": [ 1024 ] }, "gpt.h.8.attn.c_proj.weight": { "scale": 0.09520888328552246, "shape": [ 1024, 1024 ] }, "gpt.h.8.ln_1.bias": { "scale": 0.12517669796943665, "shape": [ 1024 ] }, "gpt.h.8.ln_1.weight": { "scale": 0.1648232787847519, "shape": [ 1024 ] }, "gpt.h.8.ln_2.bias": { "scale": 0.14098228514194489, "shape": [ 1024 ] }, "gpt.h.8.ln_2.weight": { "scale": 0.24974539875984192, "shape": [ 1024 ] }, "gpt.h.8.mlp.c_fc.bias": { "scale": 0.026503393426537514, "shape": [ 4096 ] }, "gpt.h.8.mlp.c_fc.weight": { "scale": 0.06782539933919907, "shape": [ 1024, 4096 ] }, "gpt.h.8.mlp.c_proj.bias": { "scale": 0.09713318198919296, "shape": [ 1024 ] }, "gpt.h.8.mlp.c_proj.weight": { "scale": 0.43732672929763794, "shape": [ 4096, 1024 ] }, "gpt.h.9.attn.c_attn.bias": { "scale": 0.04425312206149101, "shape": [ 3072 ] }, "gpt.h.9.attn.c_attn.weight": { "scale": 0.1181648001074791, "shape": [ 1024, 3072 ] }, "gpt.h.9.attn.c_proj.bias": { "scale": 0.07080474495887756, "shape": [ 1024 ] }, "gpt.h.9.attn.c_proj.weight": { "scale": 0.10802309960126877, "shape": [ 1024, 1024 ] }, "gpt.h.9.ln_1.bias": { "scale": 0.15359099209308624, "shape": [ 1024 ] }, "gpt.h.9.ln_1.weight": { "scale": 0.15670429170131683, "shape": [ 1024 ] }, "gpt.h.9.ln_2.bias": { "scale": 0.1377403736114502, "shape": [ 1024 ] }, "gpt.h.9.ln_2.weight": { "scale": 0.2576030492782593, "shape": [ 1024 ] }, "gpt.h.9.mlp.c_fc.bias": { "scale": 0.031440090388059616, "shape": [ 4096 ] }, "gpt.h.9.mlp.c_fc.weight": { "scale": 0.07645577937364578, "shape": [ 1024, 4096 ] }, "gpt.h.9.mlp.c_proj.bias": { "scale": 0.10798899829387665, "shape": [ 1024 ] }, "gpt.h.9.mlp.c_proj.weight": { "scale": 0.4369252622127533, "shape": [ 4096, 1024 ] }, "gpt.ln_f.bias": { "scale": 0.20075297355651855, "shape": [ 1024 ] }, "gpt.ln_f.weight": { "scale": 0.41444164514541626, "shape": [ 1024 ] }, "gpt.wpe.emb.weight": { "scale": 0.0773041620850563, "shape": [ 608, 1024 ] }, "gpt.wte.weight": { "scale": 0.08020960539579391, "shape": [ 1026, 1024 ] }, "mel_head.bias": { "scale": 0.028449567034840584, "shape": [ 1026 ] }, "mel_head.weight": { "scale": 0.07584048807621002, "shape": [ 1026, 1024 ] } }