{ "model.layers.26.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010766946059993482, "l1_avg": 0.00931929416126675, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008345548412762582, "l1_avg": 0.0006767219165340066, "l0_avg": 1.0 }, "original": { "l2_avg": 0.003155256077041962, "l1_avg": 0.0022022065189149643, "l0_avg": 0.9999898274739584 }, "merged": { "l2_avg": 0.0031555024837898295, "l1_avg": 0.0022027111715740627, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.123568149932461e-05, "l1_avg": 3.999663620359368e-05, "l0_avg": 0.9999972873263889 }, "num_elements": 1474560, "num_changed": 1474556, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9481, 29372, 7367, 0, 0, 0, 0, 32, 9735, 28682, 7455, 0, 0, 0, 0 ], "lora_B": [ 113, 8139, 0, 0, 0, 0, 0, 0, 112, 8020, 0, 0, 0, 0, 0, 0 ], "original": [ 4211, 621965, 109785, 1876, 2, 0, 0, 0, 4062, 621345, 109382, 1930, 2, 0, 0, 0 ], "merged": [ 4099, 621055, 110781, 1899, 2, 0, 0, 0, 4163, 620178, 110424, 1957, 2, 0, 0, 0 ] } }, "model.layers.26.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00904296657185818, "l1_avg": 0.007825018838047981, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000839955347100236, "l1_avg": 0.0006810474726888869, "l0_avg": 1.0 }, "original": { "l2_avg": 0.5739066136674896, "l1_avg": 0.4265228271484375, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.5738916863032905, "l1_avg": 0.4265228271484375, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3251016474070836e-05, "l1_avg": 3.397064542190896e-05, "l0_avg": 0.9996445549858941 }, "num_elements": 11796480, "num_changed": 11792287, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16271, 47836, 1100, 0, 0, 0, 0, 71, 16336, 48245, 1149, 0, 0, 0, 0 ], "lora_B": [ 652, 45513, 0, 0, 0, 0, 0, 0, 625, 45370, 0, 0, 0, 0, 0, 0 ], "original": [ 160, 39149, 117348, 466748, 4835430, 413361, 25312, 0, 150, 39530, 118057, 467304, 4833691, 415223, 25017, 0 ], "merged": [ 148, 38965, 116991, 465273, 4833039, 417399, 25675, 0, 148, 39361, 117706, 465913, 4831303, 419192, 25367, 0 ] } }, "model.layers.26.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010805541013970962, "l1_avg": 0.009349275297588772, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008316474593994693, "l1_avg": 0.0006737917428836226, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018660558041330703, "l1_avg": 0.01316273742251926, "l0_avg": 0.9999978807237413 }, "merged": { "l2_avg": 0.018656948373648615, "l1_avg": 0.013162826167212592, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.194742611834147e-05, "l1_avg": 4.056890288160907e-05, "l0_avg": 0.9999892340766059 }, "num_elements": 11796480, "num_changed": 11796353, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9723, 29077, 7438, 0, 0, 0, 0, 33, 9578, 28856, 7421, 0, 0, 0, 0 ], "lora_B": [ 904, 64804, 0, 0, 0, 0, 0, 0, 974, 64390, 0, 0, 0, 0, 0, 0 ], "original": [ 5694, 1379743, 2738757, 1717055, 52163, 0, 0, 0, 5481, 1379254, 2742659, 1722895, 52779, 0, 0, 0 ], "merged": [ 5539, 1373990, 2736017, 1724854, 52830, 0, 0, 0, 5454, 1373678, 2740052, 1730605, 53461, 0, 0, 0 ] } }, "model.layers.26.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01077978277168715, "l1_avg": 0.009322817458046808, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008329921984113753, "l1_avg": 0.0006745918653905392, "l0_avg": 1.0 }, "original": { "l2_avg": 0.09150312186697204, "l1_avg": 0.0706755585140652, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.09149651227592806, "l1_avg": 0.07067557440863716, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.15914864621935e-05, "l1_avg": 4.0135918081634575e-05, "l0_avg": 0.9999525282118056 }, "num_elements": 1474560, "num_changed": 1474490, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 42, 9649, 28737, 7437, 0, 0, 0, 0, 32, 9691, 29166, 7406, 0, 0, 0, 0 ], "lora_B": [ 111, 8020, 0, 0, 0, 0, 0, 0, 112, 8141, 0, 0, 0, 0, 0, 0 ], "original": [ 109, 28045, 83366, 286419, 339710, 3, 0, 0, 91, 28126, 83120, 286220, 339351, 0, 0, 0 ], "merged": [ 111, 27943, 83098, 285780, 340724, 3, 0, 0, 119, 27960, 82844, 285599, 340379, 0, 0, 0 ] } }, "model.layers.27.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010790132247777645, "l1_avg": 0.009337523248460558, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008659385493956506, "l1_avg": 0.0007031520362943411, "l0_avg": 1.0 }, "original": { "l2_avg": 0.016746883240619712, "l1_avg": 0.007028773095872667, "l0_avg": 0.9999844021267361 }, "merged": { "l2_avg": 0.016746565955118076, "l1_avg": 0.007029466496573554, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.347201907625691e-05, "l1_avg": 4.1479596661196816e-05, "l0_avg": 0.9999945746527777 }, "num_elements": 1474560, "num_changed": 1474552, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 45, 9613, 28967, 7443, 0, 0, 0, 0, 27, 9667, 28929, 7469, 0, 0, 0, 0 ], "lora_B": [ 103, 8052, 0, 0, 0, 0, 0, 0, 122, 8107, 0, 0, 0, 0, 0, 0 ], "original": [ 4864, 433282, 223885, 66807, 8907, 2, 0, 0, 4965, 433009, 223375, 66743, 8721, 0, 0, 0 ], "merged": [ 4549, 432610, 224458, 67096, 8973, 2, 0, 0, 4700, 432492, 223857, 67039, 8784, 0, 0, 0 ] } }, "model.layers.27.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009039407797337041, "l1_avg": 0.007823619991540909, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008432656401432955, "l1_avg": 0.0006827777458561791, "l0_avg": 1.0 }, "original": { "l2_avg": 1.032636173068293, "l1_avg": 0.75189208984375, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 1.03262856722082, "l1_avg": 0.75189208984375, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.334834734520061e-05, "l1_avg": 3.416586098157697e-05, "l0_avg": 0.9993773566351997 }, "num_elements": 11796480, "num_changed": 11789135, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 69, 16165, 48104, 1083, 0, 0, 0, 0, 59, 16395, 48101, 1096, 0, 0, 0, 0 ], "lora_B": [ 638, 45435, 0, 0, 0, 0, 0, 0, 631, 45456, 0, 0, 0, 0, 0, 0 ], "original": [ 88, 23473, 70541, 281150, 3960564, 1226605, 335317, 0, 89, 23658, 70494, 280977, 3962119, 1225679, 335726, 0 ], "merged": [ 95, 23363, 70331, 280275, 3954174, 1230846, 338678, 0, 101, 23525, 70255, 280184, 3955617, 1230027, 339009, 0 ] } }, "model.layers.27.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010792848808545367, "l1_avg": 0.009342168437110053, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008573143962342228, "l1_avg": 0.0006956140277907252, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014658488383523691, "l1_avg": 0.010131521357430352, "l0_avg": 0.9999967787000869 }, "merged": { "l2_avg": 0.014656044916169661, "l1_avg": 0.010131658448113336, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.418228331472899e-05, "l1_avg": 4.121837102704578e-05, "l0_avg": 0.9999916076660156 }, "num_elements": 11796480, "num_changed": 11796381, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 40, 9608, 29306, 7448, 0, 0, 0, 0, 31, 9528, 28886, 7313, 0, 0, 0, 0 ], "lora_B": [ 880, 64718, 0, 0, 0, 0, 0, 0, 891, 64583, 0, 0, 0, 0, 0, 0 ], "original": [ 9549, 1965362, 2666681, 1240615, 18617, 0, 0, 0, 9424, 1961997, 2666379, 1239053, 18803, 0, 0, 0 ], "merged": [ 9608, 1958707, 2666571, 1247292, 18878, 0, 0, 0, 9473, 1955004, 2666264, 1245616, 19067, 0, 0, 0 ] } }, "model.layers.27.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010795555944991255, "l1_avg": 0.00933817360136244, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007879803306423128, "l1_avg": 0.0006345532601699233, "l0_avg": 1.0 }, "original": { "l2_avg": 0.08900514253691306, "l1_avg": 0.0657051510281033, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.0889988785043362, "l1_avg": 0.06570514572991265, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.023143288236367e-05, "l1_avg": 3.822926535374588e-05, "l0_avg": 0.9999491373697916 }, "num_elements": 1474560, "num_changed": 1474485, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9572, 28966, 7349, 0, 0, 0, 0, 41, 9699, 29072, 7429, 0, 0, 0, 0 ], "lora_B": [ 133, 8072, 0, 0, 0, 0, 0, 0, 89, 8090, 0, 0, 0, 0, 0, 0 ], "original": [ 141, 32502, 95392, 306684, 301929, 37, 0, 0, 126, 32557, 95108, 306508, 303537, 39, 0, 0 ], "merged": [ 148, 32335, 95073, 306127, 302942, 39, 0, 0, 121, 32440, 94844, 305864, 304587, 40, 0, 0 ] } }, "model.layers.25.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002000422851398021, "l1_avg": 0.0017060213618808322, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008185398028650394, "l1_avg": 0.0006615045169989268, "l0_avg": 1.0 }, "original": { "l2_avg": 4.0613539418571944, "l1_avg": 3.7896387924382715, "l0_avg": 0.8775889719268422 }, "merged": { "l2_avg": 4.06205742333506, "l1_avg": 3.789996624228395, "l0_avg": 0.8640339688901548 }, "diff": { "l2_avg": 0.25089770907746645, "l1_avg": 0.051717039508584105, "l0_avg": 0.048337044421537426 }, "num_elements": 1061683200, "num_changed": 51318628, "precision": "mxfp4", "fp4_dist_before": [ 64986169, 87070506, 108547529, 58971136, 87587357, 62672034, 44973823, 16098348, 64975563, 87049572, 108549531, 58950984, 87570784, 62663456, 44941781, 16074627 ], "fp4_dist_after": [ 72177511, 122683360, 108803937, 71748466, 74388234, 50804407, 26339533, 3955674, 72175340, 122661159, 108801809, 71723045, 74365687, 50785194, 26319747, 3950097 ], "bf16_dists": null }, "model.layers.25.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010765967141866659, "l1_avg": 0.009310300482643976, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008544194232000362, "l1_avg": 0.0007012253834141625, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01918210850821601, "l1_avg": 0.018842505937741127, "l0_avg": 0.8818231403680495 }, "merged": { "l2_avg": 0.019182174735599095, "l1_avg": 0.018842494634934412, "l0_avg": 0.8676299036285023 }, "diff": { "l2_avg": 0.0013812993963559469, "l1_avg": 0.00028921801366923767, "l0_avg": 0.05055580374635296 }, "num_elements": 2123366400, "num_changed": 107348495, "precision": "mxfp4", "fp4_dist_before": [ 125452403, 171888598, 209541789, 122138342, 178129394, 132043861, 91110448, 30364319, 125480370, 172011104, 209803968, 122392484, 178522837, 132444987, 91508449, 30533047 ], "fp4_dist_after": [ 140549992, 239195221, 211975588, 145569229, 152682393, 108355715, 55266237, 7109173, 140520223, 239399944, 212325817, 145887649, 153061661, 108768136, 55540117, 7159305 ], "bf16_dists": null }, "model.layers.26.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020022993232592175, "l1_avg": 0.0017070838146739536, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008172544137203389, "l1_avg": 0.0006606092469559775, "l0_avg": 1.0 }, "original": { "l2_avg": 4.757513841532697, "l1_avg": 4.247316261574074, "l0_avg": 0.8775068937701943 }, "merged": { "l2_avg": 4.758053800731113, "l1_avg": 4.247750289351852, "l0_avg": 0.8644967180416908 }, "diff": { "l2_avg": 0.2786144806898216, "l1_avg": 0.05556074731143904, "l0_avg": 0.046482247246636285 }, "num_elements": 1061683200, "num_changed": 49349421, "precision": "mxfp4", "fp4_dist_before": [ 65028997, 87021464, 108739228, 58952611, 87401871, 62428763, 44909620, 16452015, 65019876, 86999475, 108695549, 58920541, 87370359, 62416780, 44889493, 16436558 ], "fp4_dist_after": [ 71926272, 122657776, 108889726, 72396216, 74655798, 50721249, 25785775, 3893262, 71935286, 122617574, 108850639, 72363775, 74625405, 50706771, 25771844, 3885832 ], "bf16_dists": null }, "model.layers.26.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010747190205917924, "l1_avg": 0.00928418238957723, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008567289710686904, "l1_avg": 0.0007040916217697992, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019176065921783447, "l1_avg": 0.01892027113172743, "l0_avg": 0.8833653862093702 }, "merged": { "l2_avg": 0.019176728195614286, "l1_avg": 0.01892025982892072, "l0_avg": 0.8692425028483073 }, "diff": { "l2_avg": 0.0013815913763311175, "l1_avg": 0.0002891710363788369, "l0_avg": 0.0505540692364728 }, "num_elements": 2123366400, "num_changed": 107344812, "precision": "mxfp4", "fp4_dist_before": [ 123826687, 170161019, 209599497, 121972296, 179119840, 133025361, 92278414, 30868991, 123831333, 170265757, 209825308, 122149054, 179490477, 133329598, 92611662, 31011106 ], "fp4_dist_after": [ 138806993, 238395375, 212474237, 146304459, 153550059, 108729623, 55421354, 7160482, 138839083, 238565892, 212789138, 146544358, 153902638, 109037194, 55642129, 7203386 ], "bf16_dists": null }, "model.layers.27.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010778423272037312, "l1_avg": 0.009323409530851576, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008572019542205724, "l1_avg": 0.000702155547009574, "l0_avg": 0.9921875 }, "original": { "l2_avg": 0.01982214715745714, "l1_avg": 0.019469127890504437, "l0_avg": 0.8831083707456235 }, "merged": { "l2_avg": 0.01982258823182848, "l1_avg": 0.019469114703896603, "l0_avg": 0.8690021086327824 }, "diff": { "l2_avg": 0.0013948198821809557, "l1_avg": 0.00029413670669367284, "l0_avg": 0.05057332027105638 }, "num_elements": 2123366400, "num_changed": 107385689, "precision": "mxfp4", "fp4_dist_before": [ 124097559, 168859436, 209959306, 120700426, 179172337, 133165364, 93289506, 31434952, 124106199, 168983237, 210242044, 120931986, 179590844, 133561858, 93665745, 31605601 ], "fp4_dist_after": [ 140056864, 238021974, 212895279, 145782491, 153432926, 108321228, 55677265, 7479395, 138099657, 238255078, 213218327, 146093172, 153861213, 108712865, 55929003, 7529663 ], "bf16_dists": null }, "model.layers.28.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010810012069319511, "l1_avg": 0.009359150462680392, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008048431482166052, "l1_avg": 0.0006504426710307598, "l0_avg": 1.0 }, "original": { "l2_avg": 0.003463627937286467, "l1_avg": 0.002391345136695438, "l0_avg": 0.9999898274739584 }, "merged": { "l2_avg": 0.00346374063646836, "l1_avg": 0.0023916984597841897, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.9752574868402915e-05, "l1_avg": 3.860517301493221e-05, "l0_avg": 0.9999993218315972 }, "num_elements": 1474560, "num_changed": 1474559, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9567, 29219, 7555, 0, 0, 0, 0, 41, 9575, 28695, 7475, 0, 0, 0, 0 ], "lora_B": [ 139, 8093, 0, 0, 0, 0, 0, 0, 120, 8032, 0, 0, 0, 0, 0, 0 ], "original": [ 3620, 601769, 128862, 2632, 13, 0, 0, 0, 3662, 603012, 128418, 2556, 16, 0, 0, 0 ], "merged": [ 3568, 600572, 129964, 2655, 13, 0, 0, 0, 3823, 601850, 129523, 2575, 17, 0, 0, 0 ] } }, "model.layers.28.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009057530942196726, "l1_avg": 0.007840946316719055, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008484044479615634, "l1_avg": 0.0006878553993172116, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7318254741763778, "l1_avg": 0.534382332695855, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.731817655080845, "l1_avg": 0.534382332695855, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3981197580516975e-05, "l1_avg": 3.459338719646136e-05, "l0_avg": 0.9995767381456163 }, "num_elements": 11796480, "num_changed": 11791487, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 72, 16224, 47868, 1225, 0, 0, 0, 0, 73, 16198, 48184, 1228, 0, 0, 0, 0 ], "lora_B": [ 664, 45310, 0, 0, 0, 0, 0, 0, 637, 45549, 0, 0, 0, 0, 0, 0 ], "original": [ 123, 31709, 95264, 379902, 4573104, 740027, 79244, 0, 136, 31707, 94886, 379570, 4572277, 739316, 79215, 0 ], "merged": [ 127, 31569, 94964, 378692, 4568209, 745376, 80425, 0, 104, 31603, 94626, 378339, 4567589, 744412, 80445, 0 ] } }, "model.layers.28.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010775784503150443, "l1_avg": 0.009319879611333211, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008292231101113944, "l1_avg": 0.0006714356131851673, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018121926761810386, "l1_avg": 0.012966130839453803, "l0_avg": 0.999998050265842 }, "merged": { "l2_avg": 0.018118183814090805, "l1_avg": 0.012966214285956488, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1620560297225415e-05, "l1_avg": 4.030338540259335e-05, "l0_avg": 0.999991692437066 }, "num_elements": 11796480, "num_changed": 11796382, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 58, 9741, 28992, 7396, 0, 0, 0, 0, 36, 9545, 29084, 7308, 0, 0, 0, 0 ], "lora_B": [ 937, 64470, 0, 0, 0, 0, 0, 0, 948, 64717, 0, 0, 0, 0, 0, 0 ], "original": [ 5815, 1425983, 2699823, 1731421, 41329, 0, 0, 0, 5993, 1422916, 2695433, 1726794, 40973, 0, 0, 0 ], "merged": [ 5938, 1420159, 2697217, 1739040, 41921, 0, 0, 0, 6102, 1417109, 2692992, 1734415, 41587, 0, 0, 0 ] } }, "model.layers.28.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010789667314567326, "l1_avg": 0.009341112772623698, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008449797751381993, "l1_avg": 0.0006853116792626679, "l0_avg": 1.0 }, "original": { "l2_avg": 0.09382251658121907, "l1_avg": 0.07140800158182779, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.09381513419578497, "l1_avg": 0.07140800688001844, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2714177207163084e-05, "l1_avg": 4.110819556646877e-05, "l0_avg": 0.9999484592013889 }, "num_elements": 1474560, "num_changed": 1474484, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 41, 9670, 29090, 7383, 0, 0, 0, 0, 33, 9604, 28869, 7470, 0, 0, 0, 0 ], "lora_B": [ 118, 8102, 0, 0, 0, 0, 0, 0, 110, 8054, 0, 0, 0, 0, 0, 0 ], "original": [ 118, 28482, 83261, 286892, 339137, 27, 0, 0, 90, 28075, 83079, 286356, 339020, 23, 0, 0 ], "merged": [ 87, 28345, 82994, 286266, 340184, 27, 0, 0, 98, 27972, 82788, 285773, 340003, 23, 0, 0 ] } }, "model.layers.29.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010805650964392321, "l1_avg": 0.009358567661709255, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007964848773553967, "l1_avg": 0.0006428650231100619, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01831603753760458, "l1_avg": 0.006918079985512628, "l0_avg": 0.9999864366319444 }, "merged": { "l2_avg": 0.018315886748455287, "l1_avg": 0.006918765438927545, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.901686727079641e-05, "l1_avg": 3.7851626984775066e-05, "l0_avg": 0.9999891493055556 }, "num_elements": 1474560, "num_changed": 1474544, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9496, 28970, 7574, 0, 0, 0, 0, 36, 9580, 29112, 7360, 0, 0, 0, 0 ], "lora_B": [ 145, 8000, 0, 0, 0, 0, 0, 0, 108, 8131, 0, 0, 0, 0, 0, 0 ], "original": [ 5186, 473777, 182667, 63572, 11312, 1, 0, 0, 5292, 474531, 183534, 63263, 11422, 3, 0, 0 ], "merged": [ 5238, 472838, 183206, 63754, 11392, 1, 0, 0, 5304, 473774, 184116, 63440, 11494, 3, 0, 0 ] } }, "model.layers.29.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009019692291857055, "l1_avg": 0.007794913370162249, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008406885789726741, "l1_avg": 0.0006801109347078535, "l0_avg": 1.0 }, "original": { "l2_avg": 1.2352355490154205, "l1_avg": 0.8746747334798177, "l0_avg": 1.0 }, "merged": { "l2_avg": 1.2352298624004876, "l1_avg": 0.874674818250868, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.339004508088877e-05, "l1_avg": 3.388719633221626e-05, "l0_avg": 0.9992626614040798 }, "num_elements": 11796480, "num_changed": 11787782, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16437, 48005, 1091, 0, 0, 0, 0, 66, 16613, 47640, 1156, 0, 0, 0, 0 ], "lora_B": [ 651, 45243, 0, 0, 0, 0, 0, 0, 616, 45650, 0, 0, 0, 0, 0, 0 ], "original": [ 97, 22653, 68116, 272118, 3676438, 1268935, 590540, 0, 100, 22693, 68582, 271901, 3674729, 1268940, 590638, 0 ], "merged": [ 83, 22570, 67896, 271300, 3670770, 1271307, 594972, 0, 99, 22593, 68370, 271035, 3668848, 1271635, 595002, 0 ] } }, "model.layers.29.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010789284058812876, "l1_avg": 0.009333509869045681, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008381325587598926, "l1_avg": 0.0006798229878768325, "l0_avg": 1.0 }, "original": { "l2_avg": 0.015157132319795322, "l1_avg": 0.00999078220791287, "l0_avg": 0.9999954223632812 }, "merged": { "l2_avg": 0.015154674413770565, "l1_avg": 0.009990967644585503, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2978133752035916e-05, "l1_avg": 4.034021662341224e-05, "l0_avg": 0.9999938117133247 }, "num_elements": 11796480, "num_changed": 11796407, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 25, 9639, 28862, 7511, 0, 0, 0, 0, 38, 9631, 29223, 7231, 0, 0, 0, 0 ], "lora_B": [ 881, 64912, 0, 0, 0, 0, 0, 0, 881, 64398, 0, 0, 0, 0, 0, 0 ], "original": [ 12035, 2169583, 2490683, 1201503, 28635, 0, 0, 0, 11896, 2164897, 2489137, 1199559, 28552, 0, 0, 0 ], "merged": [ 11828, 2162766, 2490894, 1207573, 29013, 0, 0, 0, 11887, 2158718, 2489228, 1205607, 28966, 0, 0, 0 ] } }, "model.layers.29.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.0108124890952407, "l1_avg": 0.009356691439946493, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000817171938251704, "l1_avg": 0.0006587974494323134, "l0_avg": 1.0 }, "original": { "l2_avg": 0.10857068807724782, "l1_avg": 0.07766112751430936, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.10856436749873997, "l1_avg": 0.07766112751430936, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.305007819866542e-05, "l1_avg": 3.9018271490931514e-05, "l0_avg": 0.9999416775173611 }, "num_elements": 1474560, "num_changed": 1474474, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 28, 9515, 29150, 7352, 0, 0, 0, 0, 32, 9650, 28902, 7531, 0, 0, 0, 0 ], "lora_B": [ 119, 7908, 0, 0, 0, 0, 0, 0, 115, 8242, 0, 0, 0, 0, 0, 0 ], "original": [ 117, 28901, 85592, 281281, 341604, 119, 1, 0, 102, 28864, 84741, 281774, 341344, 119, 1, 0 ], "merged": [ 116, 28794, 85333, 280672, 342572, 123, 1, 0, 116, 28734, 84491, 281184, 342301, 122, 1, 0 ] } }, "model.layers.3.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010762721607732837, "l1_avg": 0.00931224160724216, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008383524254895747, "l1_avg": 0.0006790679763071239, "l0_avg": 1.0 }, "original": { "l2_avg": 0.023332343308555187, "l1_avg": 0.014238279395633274, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.023331531246157434, "l1_avg": 0.01423833900027805, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.137493873918931e-05, "l1_avg": 3.9990479126572606e-05, "l0_avg": 0.9999884711371527 }, "num_elements": 1474560, "num_changed": 1474543, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 28, 9498, 29179, 7351, 0, 0, 0, 0, 35, 9761, 28963, 7345, 0, 0, 0, 0 ], "lora_B": [ 104, 8129, 0, 0, 0, 0, 0, 0, 116, 8035, 0, 0, 0, 0, 0, 0 ], "original": [ 981, 215679, 304510, 196664, 18972, 1, 0, 0, 923, 215384, 305210, 197262, 18973, 1, 0, 0 ], "merged": [ 947, 214876, 304528, 197255, 19167, 1, 0, 0, 934, 214547, 305300, 197857, 19147, 1, 0, 0 ] } }, "model.layers.3.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009036711716121361, "l1_avg": 0.007817039266228676, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000819172312297911, "l1_avg": 0.0006622512307431962, "l0_avg": 1.0 }, "original": { "l2_avg": 0.048183163692889434, "l1_avg": 0.031027240223354763, "l0_avg": 0.9999994066026475 }, "merged": { "l2_avg": 0.04817969841191463, "l1_avg": 0.03102724552154541, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.2257585600862566e-05, "l1_avg": 3.3145387553506426e-05, "l0_avg": 0.9999760097927517 }, "num_elements": 11796480, "num_changed": 11796197, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 78, 16428, 47762, 999, 0, 0, 0, 0, 48, 16296, 48439, 1022, 0, 0, 0, 0 ], "lora_B": [ 695, 45406, 0, 0, 0, 0, 0, 0, 667, 45392, 0, 0, 0, 0, 0, 0 ], "original": [ 2329, 613699, 1673843, 2942640, 667463, 149, 30, 0, 2423, 613368, 1672028, 2942314, 666035, 134, 25, 0 ], "merged": [ 2299, 611041, 1669575, 2944829, 672235, 151, 30, 0, 2381, 610647, 1667684, 2944608, 670838, 137, 25, 0 ] } }, "model.layers.3.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010788861535050796, "l1_avg": 0.009336900711059571, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008400954329839986, "l1_avg": 0.0006819110130891204, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01594474959069028, "l1_avg": 0.011490448315938314, "l0_avg": 0.9999967787000869 }, "merged": { "l2_avg": 0.015941711916502427, "l1_avg": 0.011490541034274632, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3044018864319804e-05, "l1_avg": 4.0564737800094816e-05, "l0_avg": 0.9999910142686632 }, "num_elements": 11796480, "num_changed": 11796374, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 42, 9665, 28924, 7415, 0, 0, 0, 0, 41, 9531, 29143, 7399, 0, 0, 0, 0 ], "lora_B": [ 917, 64589, 0, 0, 0, 0, 0, 0, 876, 64690, 0, 0, 0, 0, 0, 0 ], "original": [ 6754, 1590300, 2798632, 1482117, 20891, 0, 0, 0, 6579, 1589714, 2799856, 1480332, 21305, 0, 0, 0 ], "merged": [ 6895, 1583818, 2797086, 1489572, 21294, 0, 0, 0, 6786, 1583110, 2798408, 1487852, 21659, 0, 0, 0 ] } }, "model.layers.3.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010814645694219643, "l1_avg": 0.009359280930625068, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008026993018575013, "l1_avg": 0.0006486807833425701, "l0_avg": 1.0 }, "original": { "l2_avg": 0.03911333251689973, "l1_avg": 0.029167109065585665, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.03911073140407444, "l1_avg": 0.02916712760925293, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1104118334704575e-05, "l1_avg": 3.854390751156542e-05, "l0_avg": 0.9999701605902778 }, "num_elements": 1474560, "num_changed": 1474516, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9508, 28901, 7493, 0, 0, 0, 0, 28, 9639, 29154, 7388, 0, 0, 0, 0 ], "lora_B": [ 122, 8004, 0, 0, 0, 0, 0, 0, 138, 8120, 0, 0, 0, 0, 0, 0 ], "original": [ 318, 76638, 201504, 381644, 76251, 0, 0, 0, 312, 76945, 201345, 382535, 77068, 0, 0, 0 ], "merged": [ 337, 76372, 200953, 381803, 76926, 0, 0, 0, 303, 76589, 200882, 382620, 77775, 0, 0, 0 ] } }, "model.layers.27.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001994298078341751, "l1_avg": 0.0017022747132513258, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008189234022145576, "l1_avg": 0.0006612297147512436, "l0_avg": 1.0 }, "original": { "l2_avg": 5.172785623487348, "l1_avg": 4.545165895061729, "l0_avg": 0.8781153408097632 }, "merged": { "l2_avg": 5.173085334410093, "l1_avg": 4.545679976851852, "l0_avg": 0.8656067497347608 }, "diff": { "l2_avg": 0.29276644083760006, "l1_avg": 0.05736385392554012, "l0_avg": 0.044707727314513404 }, "num_elements": 1061683200, "num_changed": 47465443, "precision": "mxfp4", "fp4_dist_before": [ 64706798, 87186013, 108517089, 59260905, 87512903, 62327191, 44866163, 16578408, 64696097, 87180312, 108493612, 59230465, 87462664, 62273593, 44833087, 16557900 ], "fp4_dist_after": [ 71332183, 122239817, 108718199, 72968979, 75354398, 50988398, 25534157, 3802944, 71350873, 122248812, 108663706, 72913163, 75317937, 50944891, 25510837, 3793906 ], "bf16_dists": null }, "model.layers.28.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001993763569858057, "l1_avg": 0.0017016258504655626, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008157822070293471, "l1_avg": 0.0006595191028383043, "l0_avg": 1.0 }, "original": { "l2_avg": 5.726502272563353, "l1_avg": 4.972302758487654, "l0_avg": 0.8773102428294994 }, "merged": { "l2_avg": 5.726521933599885, "l1_avg": 4.972933063271605, "l0_avg": 0.8648221267888575 }, "diff": { "l2_avg": 0.32972769096317506, "l1_avg": 0.06470050576292438, "l0_avg": 0.044336093855492864 }, "num_elements": 1061683200, "num_changed": 47070886, "precision": "mxfp4", "fp4_dist_before": [ 65128265, 88960602, 108384819, 60095389, 87078507, 61653249, 43729242, 15874083, 65129389, 88952177, 108371159, 60083245, 87052110, 61634634, 43696774, 15859556 ], "fp4_dist_after": [ 71756097, 122552874, 108351263, 72549756, 75181145, 51103011, 25668786, 3739655, 71759980, 122534728, 108334402, 72525602, 75153341, 51087975, 25647044, 3737541 ], "bf16_dists": null }, "model.layers.28.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010759081006596212, "l1_avg": 0.009307051367229886, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008570590186728056, "l1_avg": 0.0007053390973144108, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019779033131069608, "l1_avg": 0.019491040265118633, "l0_avg": 0.883017717997233 }, "merged": { "l2_avg": 0.019779890113406712, "l1_avg": 0.019491123152367864, "l0_avg": 0.8688193478996371 }, "diff": { "l2_avg": 0.00139568911658393, "l1_avg": 0.0002951524875782154, "l0_avg": 0.05096444259455175 }, "num_elements": 2123366400, "num_changed": 108216185, "precision": "mxfp4", "fp4_dist_before": [ 124191516, 168342949, 210202277, 120134704, 179113975, 132985826, 93653653, 31779337, 124204731, 168468828, 210557840, 120465025, 179630505, 133512754, 94140771, 31981709 ], "fp4_dist_after": [ 139269413, 238791539, 213113192, 145963096, 153109069, 107697673, 55146496, 7319523, 139275176, 239056692, 213553655, 146417661, 153626235, 108183605, 55465811, 7377564 ], "bf16_dists": null }, "model.layers.29.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001985215043786637, "l1_avg": 0.0016966135965453253, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008216605296012736, "l1_avg": 0.000664299229780833, "l0_avg": 1.0 }, "original": { "l2_avg": 5.79639821650962, "l1_avg": 5.1767655285493825, "l0_avg": 0.8782469111313055 }, "merged": { "l2_avg": 5.7964341818203495, "l1_avg": 5.177458526234568, "l0_avg": 0.8660369750599802 }, "diff": { "l2_avg": 0.34188042940078883, "l1_avg": 0.06670924810715663, "l0_avg": 0.04340526062765239 }, "num_elements": 1061683200, "num_changed": 46082636, "precision": "mxfp4", "fp4_dist_before": [ 64634999, 89040611, 107921163, 60762929, 87292001, 62033260, 43482525, 15723135, 64628210, 89022495, 107928168, 60742517, 87280323, 62019874, 43458219, 15712771 ], "fp4_dist_after": [ 71112688, 121849240, 107969669, 73000140, 75740210, 51826889, 25723586, 3664658, 71113605, 121827191, 107957658, 72978712, 75729853, 51812987, 25711496, 3664618 ], "bf16_dists": null }, "model.layers.29.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010774385997568268, "l1_avg": 0.009332007831997342, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008565526577144397, "l1_avg": 0.000705041570795907, "l0_avg": 1.0 }, "original": { "l2_avg": 0.020131920443640816, "l1_avg": 0.01991726533866223, "l0_avg": 0.8832663660873602 }, "merged": { "l2_avg": 0.020132837030622693, "l1_avg": 0.01991721824363426, "l0_avg": 0.8689539572633342 }, "diff": { "l2_avg": 0.0014092466897434658, "l1_avg": 0.00030103580451306, "l0_avg": 0.051408352322048614 }, "num_elements": 2123366400, "num_changed": 109158768, "precision": "mxfp4", "fp4_dist_before": [ 123916106, 167698515, 210197846, 119997346, 179525433, 133521802, 94086447, 31782504, 123952170, 167797708, 210473232, 120193408, 179922792, 133892641, 94458096, 31950354 ], "fp4_dist_after": [ 139109587, 238500857, 213334947, 145892329, 153189747, 107837056, 55468925, 7392946, 139149177, 238699505, 213663580, 146172541, 153578237, 108205757, 55726351, 7444858 ], "bf16_dists": null }, "model.layers.20.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01077971287463357, "l1_avg": 0.009326177173190647, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008267820812761784, "l1_avg": 0.000668535940349102, "l0_avg": 1.0 }, "original": { "l2_avg": 0.011110650291801675, "l1_avg": 0.005703594949510362, "l0_avg": 0.9999966091579862 }, "merged": { "l2_avg": 0.011110371488947515, "l1_avg": 0.0057037949562072756, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.097647276740807e-05, "l1_avg": 4.0100152707762186e-05, "l0_avg": 0.9999966091579862 }, "num_elements": 1474560, "num_changed": 1474555, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9521, 28940, 7359, 0, 0, 0, 0, 39, 9722, 28941, 7607, 0, 0, 0, 0 ], "lora_B": [ 123, 8121, 0, 0, 0, 0, 0, 0, 100, 8040, 0, 0, 0, 0, 0, 0 ], "original": [ 2012, 412409, 275271, 43705, 2861, 1, 0, 0, 2143, 412187, 277108, 43976, 2885, 2, 0, 0 ], "merged": [ 2092, 411129, 276303, 43968, 2891, 1, 0, 0, 2109, 410899, 277983, 44277, 2906, 2, 0, 0 ] } }, "model.layers.20.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00906222768162766, "l1_avg": 0.007846260443329811, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008315652464203699, "l1_avg": 0.0006733985410796271, "l0_avg": 1.0 }, "original": { "l2_avg": 0.5230913780395154, "l1_avg": 0.3821019066704644, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.5230795783135294, "l1_avg": 0.3821019066704644, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3284961233619706e-05, "l1_avg": 3.390424470934603e-05, "l0_avg": 0.9996790568033854 }, "num_elements": 11796480, "num_changed": 11792694, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 65, 16318, 47997, 1100, 0, 0, 0, 0, 64, 16145, 48192, 1191, 0, 0, 0, 0 ], "lora_B": [ 647, 45590, 0, 0, 0, 0, 0, 0, 636, 45287, 0, 0, 0, 0, 0, 0 ], "original": [ 242, 52376, 153542, 566086, 4798057, 304046, 24474, 0, 193, 52204, 153085, 564232, 4798179, 305034, 24730, 0 ], "merged": [ 218, 52178, 153117, 564305, 4796917, 307288, 24791, 0, 207, 51972, 152617, 562588, 4797046, 308139, 25097, 0 ] } }, "model.layers.20.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010780598760885664, "l1_avg": 0.009319801463021173, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008505089149706659, "l1_avg": 0.0006889156647957861, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01121986563755112, "l1_avg": 0.007476954989963107, "l0_avg": 0.9999957614474826 }, "merged": { "l2_avg": 0.011218405110473598, "l1_avg": 0.007477120558420817, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.284496217811559e-05, "l1_avg": 4.134531805498733e-05, "l0_avg": 0.9999957614474826 }, "num_elements": 11796480, "num_changed": 11796430, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9603, 29057, 7463, 0, 0, 0, 0, 43, 9665, 28790, 7490, 0, 0, 0, 0 ], "lora_B": [ 898, 64567, 0, 0, 0, 0, 0, 0, 887, 64720, 0, 0, 0, 0, 0, 0 ], "original": [ 13088, 2512481, 2655695, 709569, 8409, 0, 0, 0, 13149, 2512983, 2650928, 711654, 8524, 0, 0, 0 ], "merged": [ 13002, 2504461, 2658508, 714586, 8538, 0, 0, 0, 13011, 2505238, 2653849, 716643, 8644, 0, 0, 0 ] } }, "model.layers.20.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010777037152593785, "l1_avg": 0.00931810008154975, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008292080019600689, "l1_avg": 0.0006730236927978694, "l0_avg": 1.0 }, "original": { "l2_avg": 0.09784263069592887, "l1_avg": 0.06403142081366645, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.0978370326487614, "l1_avg": 0.06403147379557292, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.134074489442179e-05, "l1_avg": 4.0287397698395784e-05, "l0_avg": 0.9999464246961806 }, "num_elements": 1474560, "num_changed": 1474481, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9739, 28890, 7422, 0, 0, 0, 0, 40, 9715, 28959, 7363, 0, 0, 0, 0 ], "lora_B": [ 123, 8076, 0, 0, 0, 0, 0, 0, 112, 8073, 0, 0, 0, 0, 0, 0 ], "original": [ 111, 33628, 98582, 316751, 287355, 382, 78, 0, 135, 33636, 98574, 317556, 287288, 404, 80, 0 ], "merged": [ 128, 33497, 98257, 316104, 288465, 381, 80, 0, 112, 33500, 98263, 316987, 288299, 407, 80, 0 ] } }, "model.layers.30.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010786892637148316, "l1_avg": 0.009331688616010877, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008125023450702429, "l1_avg": 0.0006593838334083557, "l0_avg": 1.0 }, "original": { "l2_avg": 0.004045844869388282, "l1_avg": 0.002253444989522298, "l0_avg": 0.9999864366319444 }, "merged": { "l2_avg": 0.004046011758420702, "l1_avg": 0.002253915038373735, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.034365041760061e-05, "l1_avg": 3.94337655355533e-05, "l0_avg": 0.9999993218315972 }, "num_elements": 1474560, "num_changed": 1474559, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 42, 9630, 28783, 7528, 0, 0, 0, 0, 41, 9748, 29004, 7384, 0, 0, 0, 0 ], "lora_B": [ 114, 8139, 0, 0, 0, 0, 0, 0, 106, 8025, 0, 0, 0, 0, 0, 0 ], "original": [ 4182, 619344, 111010, 2325, 301, 0, 0, 0, 4272, 620041, 110395, 2354, 336, 0, 0, 0 ], "merged": [ 4203, 618211, 112051, 2343, 302, 0, 0, 0, 4294, 618971, 111460, 2387, 338, 0, 0, 0 ] } }, "model.layers.30.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009065461135161854, "l1_avg": 0.007843557745218277, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008481676618755652, "l1_avg": 0.0006886688371499379, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7615882926608433, "l1_avg": 0.5662435743543837, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.7615809711441169, "l1_avg": 0.5662435743543837, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.465642670275478e-05, "l1_avg": 3.481312758392758e-05, "l0_avg": 0.9995451185438368 }, "num_elements": 11796480, "num_changed": 11791114, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 52, 16243, 47670, 1367, 0, 0, 0, 0, 63, 16345, 47960, 1372, 0, 0, 0, 0 ], "lora_B": [ 619, 45341, 0, 0, 0, 0, 0, 0, 641, 45559, 0, 0, 0, 0, 0, 0 ], "original": [ 126, 30380, 91149, 361722, 4467756, 845888, 100812, 0, 122, 30123, 91089, 361452, 4468881, 847226, 99754, 0 ], "merged": [ 123, 30265, 90869, 360563, 4463020, 850843, 102145, 0, 107, 29993, 90823, 360370, 4464045, 852152, 101162, 0 ] } }, "model.layers.30.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010787176152163392, "l1_avg": 0.00933884514702691, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008493948222992861, "l1_avg": 0.0006910503725521266, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018427869977207443, "l1_avg": 0.01305855909983317, "l0_avg": 0.9999983893500434 }, "merged": { "l2_avg": 0.018424195890840565, "l1_avg": 0.013058642546335856, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2994325194090665e-05, "l1_avg": 4.116384726431635e-05, "l0_avg": 0.9999916076660156 }, "num_elements": 11796480, "num_changed": 11796381, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 43, 9640, 29001, 7402, 0, 0, 0, 0, 39, 9523, 29065, 7447, 0, 0, 0, 0 ], "lora_B": [ 878, 64526, 0, 0, 0, 0, 0, 0, 874, 64794, 0, 0, 0, 0, 0, 0 ], "original": [ 6498, 1372591, 2739907, 1732286, 43552, 3, 0, 0, 6502, 1373944, 2743554, 1733809, 43834, 0, 0, 0 ], "merged": [ 6557, 1367013, 2736787, 1740303, 44132, 3, 0, 0, 6443, 1368345, 2740777, 1741730, 44390, 0, 0, 0 ] } }, "model.layers.30.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010784727399207696, "l1_avg": 0.009321362442440457, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008291946724057198, "l1_avg": 0.0006713047623634338, "l0_avg": 1.0 }, "original": { "l2_avg": 0.09833387032991739, "l1_avg": 0.07573429743448894, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.0983268335029504, "l1_avg": 0.0757343504163954, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.230280801350026e-05, "l1_avg": 4.028390006472667e-05, "l0_avg": 0.9999430338541667 }, "num_elements": 1474560, "num_changed": 1474476, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 39, 9635, 28934, 7477, 0, 0, 0, 0, 45, 9666, 28957, 7407, 0, 0, 0, 0 ], "lora_B": [ 107, 8075, 0, 0, 0, 0, 0, 0, 118, 8084, 0, 0, 0, 0, 0, 0 ], "original": [ 98, 25885, 77865, 272074, 360059, 16, 0, 0, 112, 26457, 77897, 271885, 362191, 21, 0, 0 ], "merged": [ 99, 25748, 77606, 271381, 361153, 17, 0, 0, 94, 26371, 77595, 271263, 363212, 21, 0, 0 ] } }, "model.layers.2.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020336592830809685, "l1_avg": 0.0017260968685150146, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007377161807391934, "l1_avg": 0.0005894208119975196, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0547906370707183, "l1_avg": 0.0423538057303723, "l0_avg": 0.8805751225977768 }, "merged": { "l2_avg": 0.054802610522081935, "l1_avg": 0.0423537454487365, "l0_avg": 0.8669240296917197 }, "diff": { "l2_avg": 0.003357817879140355, "l1_avg": 0.000593499489772467, "l0_avg": 0.048910564846462674 }, "num_elements": 1061683200, "num_changed": 51927525, "precision": "mxfp4", "fp4_dist_before": [ 63400327, 85929975, 107611388, 59849439, 88652513, 63895968, 45453637, 16212964, 63391059, 85896238, 107551357, 59790305, 88587386, 63852511, 45409461, 16198672 ], "fp4_dist_after": [ 70640121, 121947554, 108433682, 73258609, 75502524, 51610722, 25995661, 3610050, 70644401, 121890038, 108367635, 73195535, 75429269, 51576925, 25974473, 3606001 ], "bf16_dists": null }, "model.layers.2.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010730993349361647, "l1_avg": 0.009282155831654867, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008486811892453234, "l1_avg": 0.0006908608393536674, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01956046952141656, "l1_avg": 0.020537746099778164, "l0_avg": 0.859332054985894 }, "merged": { "l2_avg": 0.01956085761388143, "l1_avg": 0.020537763053988235, "l0_avg": 0.8436974537225417 }, "diff": { "l2_avg": 0.0014389187097549438, "l1_avg": 0.00033076757266197674, "l0_avg": 0.054252329696843654 }, "num_elements": 2123366400, "num_changed": 115197574, "precision": "mxfp4", "fp4_dist_before": [ 149294487, 173179958, 210585589, 113656353, 169965209, 124578801, 89430947, 30885264, 149395101, 173294988, 210673591, 113686468, 169977556, 124535509, 89374778, 30851801 ], "fp4_dist_after": [ 165941805, 245002256, 209735200, 136447345, 142174259, 100414763, 54045074, 7865664, 165945770, 245166917, 209800288, 136454836, 142153323, 100359026, 54006781, 7853093 ], "bf16_dists": null }, "model.layers.3.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020378198415846927, "l1_avg": 0.0017286078797446357, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007521364730104881, "l1_avg": 0.0006033498379919264, "l0_avg": 1.0 }, "original": { "l2_avg": 0.08045138051388905, "l1_avg": 0.06251861949025848, "l0_avg": 0.8774585017451534 }, "merged": { "l2_avg": 0.08042313275942041, "l1_avg": 0.06251869860990548, "l0_avg": 0.8636997590241609 }, "diff": { "l2_avg": 0.004954643489743982, "l1_avg": 0.0009055054629290545, "l0_avg": 0.04879861525547357 }, "num_elements": 1061683200, "num_changed": 51808670, "precision": "mxfp4", "fp4_dist_before": [ 65053858, 88517275, 108843169, 60143562, 87307694, 61851233, 43580391, 15673236, 65046392, 88501443, 108807195, 60097490, 87271034, 61798364, 43530528, 15660336 ], "fp4_dist_after": [ 72360874, 124002722, 108880539, 72441379, 73977531, 50357829, 25387518, 3564788, 72346802, 123966002, 108838797, 72398499, 73940722, 50312047, 25347553, 3559598 ], "bf16_dists": null }, "model.layers.3.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010717844162996032, "l1_avg": 0.0092696832285987, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008514652909861647, "l1_avg": 0.0006938665277428098, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018427687221103245, "l1_avg": 0.019744160970052084, "l0_avg": 0.868210115785952 }, "merged": { "l2_avg": 0.018427757422129314, "l1_avg": 0.019744127061631943, "l0_avg": 0.8533203845553928 }, "diff": { "l2_avg": 0.0013792149722576142, "l1_avg": 0.00031005147062701944, "l0_avg": 0.051989156934950084 }, "num_elements": 2123366400, "num_changed": 110392029, "precision": "mxfp4", "fp4_dist_before": [ 139894867, 175379146, 209383143, 119148172, 172823256, 126999818, 88121303, 29630329, 139943345, 175483061, 209571674, 119219266, 172932810, 127058432, 88137684, 29640094 ], "fp4_dist_after": [ 155703227, 242553237, 209760807, 140419137, 146667872, 104457790, 54495541, 7326523, 155751340, 242732613, 209905630, 140505883, 146766487, 104502240, 54493368, 7324705 ], "bf16_dists": null }, "model.layers.20.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010748046530158812, "l1_avg": 0.009298345777723524, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008585734875570073, "l1_avg": 0.000702471203274197, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019724296198950872, "l1_avg": 0.018105713644145448, "l0_avg": 0.877213892524625 }, "merged": { "l2_avg": 0.019730424880981444, "l1_avg": 0.018105766390576775, "l0_avg": 0.8634405461064092 }, "diff": { "l2_avg": 0.0014511032236946954, "l1_avg": 0.00027247052133819204, "l0_avg": 0.048608094203619306 }, "num_elements": 2123366400, "num_changed": 103212794, "precision": "mxfp4", "fp4_dist_before": [ 130342756, 176544011, 212704908, 120894334, 174776168, 126826241, 88136575, 30629046, 130377139, 176659404, 212900162, 121106992, 175094527, 127175353, 88438499, 30760285 ], "fp4_dist_after": [ 144971005, 244079397, 213499750, 144692256, 149944394, 104374978, 52357300, 6947721, 144994751, 244267758, 213740034, 144971914, 150279442, 104713133, 52553484, 6979083 ], "bf16_dists": null }, "model.layers.21.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010771066844713989, "l1_avg": 0.009318417972988553, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008377618505619466, "l1_avg": 0.000681149133015424, "l0_avg": 1.0 }, "original": { "l2_avg": 0.053839793516870994, "l1_avg": 0.021034638086954754, "l0_avg": 0.9999911838107639 }, "merged": { "l2_avg": 0.05383888249909402, "l1_avg": 0.021035121546851263, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.178050669828769e-05, "l1_avg": 4.049870185554028e-05, "l0_avg": 0.9999864366319444 }, "num_elements": 1474560, "num_changed": 1474540, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 26, 9608, 28985, 7373, 0, 0, 0, 0, 37, 9619, 29018, 7494, 0, 0, 0, 0 ], "lora_B": [ 118, 8074, 0, 0, 0, 0, 0, 0, 107, 8085, 0, 0, 0, 0, 0, 0 ], "original": [ 4597, 332337, 188076, 147520, 64495, 114, 14, 0, 4606, 332897, 187265, 147717, 64798, 116, 8, 0 ], "merged": [ 4324, 332002, 188215, 147698, 64752, 115, 14, 0, 4353, 332598, 187419, 147904, 65041, 117, 8, 0 ] } }, "model.layers.21.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009042230319098257, "l1_avg": 0.007820227183401585, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008354607309471092, "l1_avg": 0.0006770810733238857, "l0_avg": 1.0 }, "original": { "l2_avg": 0.907006907306668, "l1_avg": 0.6183514065212674, "l0_avg": 0.9999998304578993 }, "merged": { "l2_avg": 0.907003139924275, "l1_avg": 0.6183514912923177, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.306456325495372e-05, "l1_avg": 3.391810848067204e-05, "l0_avg": 0.999492899576823 }, "num_elements": 11796480, "num_changed": 11790498, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 68, 16492, 47955, 1231, 0, 0, 0, 0, 53, 16202, 47871, 1200, 0, 0, 0, 0 ], "lora_B": [ 618, 45259, 0, 0, 0, 0, 0, 0, 636, 45647, 0, 0, 0, 0, 0, 0 ], "original": [ 116, 33012, 98476, 373586, 4324610, 881646, 189445, 0, 135, 33223, 98037, 374194, 4318786, 882278, 188936, 0 ], "merged": [ 126, 32865, 98187, 372534, 4319480, 886412, 191306, 0, 119, 33104, 97697, 373022, 4313581, 887273, 190774, 0 ] } }, "model.layers.21.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010794490211264225, "l1_avg": 0.009332219759623209, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008729116936335395, "l1_avg": 0.0007092058076523244, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012157117517205886, "l1_avg": 0.0073843830161624486, "l0_avg": 0.9999925401475694 }, "merged": { "l2_avg": 0.012155778052829087, "l1_avg": 0.007384726074006823, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.493750649071962e-05, "l1_avg": 4.286409045259158e-05, "l0_avg": 0.9999950832790798 }, "num_elements": 11796480, "num_changed": 11796422, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 27, 9570, 28917, 7392, 0, 0, 0, 0, 34, 9745, 28999, 7476, 0, 0, 0, 0 ], "lora_B": [ 826, 64910, 0, 0, 0, 0, 0, 0, 924, 64412, 0, 0, 0, 0, 0, 0 ], "original": [ 20358, 2971271, 2097971, 795437, 15230, 0, 0, 0, 20110, 2971702, 2097240, 792121, 15040, 0, 0, 0 ], "merged": [ 20224, 2964496, 2099739, 799996, 15457, 0, 0, 0, 20348, 2964718, 2099548, 796691, 15263, 0, 0, 0 ] } }, "model.layers.21.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010778957358166804, "l1_avg": 0.009329879283905029, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007955392356961966, "l1_avg": 0.0006371060153469443, "l0_avg": 1.0 }, "original": { "l2_avg": 0.09734348091448373, "l1_avg": 0.06765524546305339, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.09733749961156181, "l1_avg": 0.06765520307752822, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.0041197792283485e-05, "l1_avg": 3.773680008533928e-05, "l0_avg": 0.9999464246961806 }, "num_elements": 1474560, "num_changed": 1474481, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9711, 28852, 7311, 0, 0, 0, 0, 41, 9451, 29337, 7422, 0, 0, 0, 0 ], "lora_B": [ 134, 8195, 0, 0, 0, 0, 0, 0, 128, 7927, 0, 0, 0, 0, 0, 0 ], "original": [ 115, 31781, 93401, 303335, 307870, 275, 14, 0, 126, 32351, 93442, 303677, 307889, 274, 10, 0 ], "merged": [ 114, 31692, 93110, 302724, 308894, 277, 14, 0, 118, 32202, 93157, 303102, 308869, 277, 10, 0 ] } }, "model.layers.22.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010796605971515235, "l1_avg": 0.00934712224536472, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008611490484327078, "l1_avg": 0.0006989951943978667, "l0_avg": 1.0 }, "original": { "l2_avg": 0.006499489048114691, "l1_avg": 0.0034466332859463163, "l0_avg": 0.9999905056423611 }, "merged": { "l2_avg": 0.006499540096524608, "l1_avg": 0.003447045882542928, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.354420471840474e-05, "l1_avg": 4.179206832001607e-05, "l0_avg": 0.9999986436631945 }, "num_elements": 1474560, "num_changed": 1474558, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9596, 28838, 7394, 0, 0, 0, 0, 46, 9579, 29196, 7477, 0, 0, 0, 0 ], "lora_B": [ 116, 7960, 0, 0, 0, 0, 0, 0, 84, 8224, 0, 0, 0, 0, 0, 0 ], "original": [ 3067, 529820, 189162, 13611, 736, 0, 0, 0, 3006, 530849, 189730, 13869, 710, 0, 0, 0 ], "merged": [ 3010, 528490, 190318, 13711, 745, 0, 0, 0, 3069, 529624, 190904, 13975, 714, 0, 0, 0 ] } }, "model.layers.22.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009036404175836133, "l1_avg": 0.007812882773578167, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008302949263735968, "l1_avg": 0.0006729288233651055, "l0_avg": 1.0 }, "original": { "l2_avg": 0.5406267304709854, "l1_avg": 0.39375084771050345, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.5406145042488795, "l1_avg": 0.39375084771050345, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.293868477162665e-05, "l1_avg": 3.369286294198699e-05, "l0_avg": 0.9996807522243923 }, "num_elements": 11796480, "num_changed": 11792714, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 65, 16230, 47781, 1053, 0, 0, 0, 0, 68, 16628, 48099, 1148, 0, 0, 0, 0 ], "lora_B": [ 646, 45319, 0, 0, 0, 0, 0, 0, 621, 45574, 0, 0, 0, 0, 0, 0 ], "original": [ 201, 44710, 135681, 534032, 4822405, 336604, 24942, 0, 186, 45113, 135629, 533562, 4822286, 336100, 25029, 0 ], "merged": [ 186, 44495, 135304, 532236, 4821111, 339958, 25283, 0, 167, 44915, 135254, 531849, 4820836, 339505, 25381, 0 ] } }, "model.layers.22.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010772965060202738, "l1_avg": 0.009326036771138508, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008539909690566246, "l1_avg": 0.0006916338461451232, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01462056243818822, "l1_avg": 0.010086721844143338, "l0_avg": 0.9999968634711371 }, "merged": { "l2_avg": 0.014618301120218764, "l1_avg": 0.010086852974361843, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.258036312451557e-05, "l1_avg": 4.1472606567872893e-05, "l0_avg": 0.999993642171224 }, "num_elements": 11796480, "num_changed": 11796405, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 41, 9578, 29108, 7441, 0, 0, 0, 0, 39, 9619, 28991, 7343, 0, 0, 0, 0 ], "lora_B": [ 913, 64553, 0, 0, 0, 0, 0, 0, 878, 64728, 0, 0, 0, 0, 0, 0 ], "original": [ 8233, 1852939, 2821475, 1195790, 20179, 0, 0, 0, 8235, 1853092, 2820515, 1195512, 20510, 0, 0, 0 ], "merged": [ 8165, 1845665, 2821575, 1202787, 20402, 0, 0, 0, 8185, 1846185, 2820228, 1202537, 20751, 0, 0, 0 ] } }, "model.layers.22.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010760989103236281, "l1_avg": 0.009312045574188233, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008442169055342674, "l1_avg": 0.0006820269045419991, "l0_avg": 1.0 }, "original": { "l2_avg": 0.10051231488686335, "l1_avg": 0.06882318390740289, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.10050656605054657, "l1_avg": 0.06882323688930936, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2326466988096264e-05, "l1_avg": 4.08066643608941e-05, "l0_avg": 0.9999498155381944 }, "num_elements": 1474560, "num_changed": 1474486, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 26, 9578, 29035, 7362, 0, 0, 0, 0, 45, 9552, 29213, 7349, 0, 0, 0, 0 ], "lora_B": [ 117, 8115, 0, 0, 0, 0, 0, 0, 134, 8018, 0, 0, 0, 0, 0, 0 ], "original": [ 130, 30715, 90491, 298976, 318024, 351, 60, 0, 105, 30676, 90073, 297842, 316697, 359, 61, 0 ], "merged": [ 103, 30589, 90221, 298316, 319105, 353, 60, 0, 112, 30533, 89751, 297230, 317765, 359, 63, 0 ] } }, "model.layers.23.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010784600956223134, "l1_avg": 0.009333602587381999, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008398827048949897, "l1_avg": 0.0006806112360209227, "l0_avg": 1.0 }, "original": { "l2_avg": 0.026107510792300906, "l1_avg": 0.01140317784415351, "l0_avg": 0.9999911838107639 }, "merged": { "l2_avg": 0.026106950045151976, "l1_avg": 0.011403585804833307, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.179717719465111e-05, "l1_avg": 4.061065717703766e-05, "l0_avg": 0.9999925401475694 }, "num_elements": 1474560, "num_changed": 1474549, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9634, 29250, 7395, 0, 0, 0, 0, 35, 9666, 28743, 7402, 0, 0, 0, 0 ], "lora_B": [ 128, 8080, 0, 0, 0, 0, 0, 0, 122, 8054, 0, 0, 0, 0, 0, 0 ], "original": [ 2877, 347257, 245107, 122146, 20247, 6, 0, 0, 2817, 346485, 244668, 122719, 20222, 9, 0, 0 ], "merged": [ 2777, 346517, 245453, 122595, 20354, 6, 0, 0, 2765, 345669, 244903, 123151, 20361, 9, 0, 0 ] } }, "model.layers.23.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009039553994217814, "l1_avg": 0.007819723337888718, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008301222453100517, "l1_avg": 0.0006720126503043705, "l0_avg": 1.0 }, "original": { "l2_avg": 0.8038526364967319, "l1_avg": 0.5722463819715712, "l0_avg": 0.9999998304578993 }, "merged": { "l2_avg": 0.8038480872047855, "l1_avg": 0.572246339586046, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.299852629366878e-05, "l1_avg": 3.350639664050606e-05, "l0_avg": 0.9995093451605903 }, "num_elements": 11796480, "num_changed": 11790692, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 53, 16320, 48006, 1257, 0, 0, 0, 0, 50, 16456, 47804, 1126, 0, 0, 0, 0 ], "lora_B": [ 606, 44978, 0, 0, 0, 0, 0, 0, 691, 45885, 0, 0, 0, 0, 0, 0 ], "original": [ 114, 31195, 94169, 373063, 4451301, 817187, 129026, 0, 126, 31279, 94322, 374639, 4453934, 817832, 128293, 0 ], "merged": [ 125, 31056, 93886, 371872, 4446201, 822236, 130674, 0, 128, 31114, 94039, 373464, 4448918, 822853, 129914, 0 ] } }, "model.layers.23.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010790148740340849, "l1_avg": 0.009329810407426623, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008604829008163181, "l1_avg": 0.0006986594526097178, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012875610207322336, "l1_avg": 0.008607690864139133, "l0_avg": 0.9999952528211805 }, "merged": { "l2_avg": 0.012873886452170765, "l1_avg": 0.008607943852742514, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3859222395724944e-05, "l1_avg": 4.169020232641035e-05, "l0_avg": 0.999993896484375 }, "num_elements": 11796480, "num_changed": 11796408, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9675, 28911, 7504, 0, 0, 0, 0, 44, 9649, 28926, 7420, 0, 0, 0, 0 ], "lora_B": [ 891, 64692, 0, 0, 0, 0, 0, 0, 915, 64574, 0, 0, 0, 0, 0, 0 ], "original": [ 14462, 2318624, 2599252, 953126, 13190, 0, 0, 0, 14435, 2316936, 2598670, 954376, 13409, 0, 0, 0 ], "merged": [ 14441, 2311379, 2600418, 958955, 13372, 0, 0, 0, 14439, 2309683, 2599977, 960229, 13587, 0, 0, 0 ] } }, "model.layers.23.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010808987174320415, "l1_avg": 0.009344257248772515, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008043375564739108, "l1_avg": 0.0006496068090200424, "l0_avg": 1.0 }, "original": { "l2_avg": 0.084807888869072, "l1_avg": 0.06196324560377333, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.08480207720394303, "l1_avg": 0.0619632879892985, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.166462926202726e-05, "l1_avg": 3.959390386525128e-05, "l0_avg": 0.99996337890625 }, "num_elements": 1474560, "num_changed": 1474506, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9490, 29095, 7389, 0, 0, 0, 0, 48, 9608, 29079, 7416, 0, 0, 0, 0 ], "lora_B": [ 116, 8078, 0, 0, 0, 0, 0, 0, 127, 8063, 0, 0, 0, 0, 0, 0 ], "original": [ 142, 33709, 99627, 317531, 285948, 61, 2, 0, 142, 34151, 99911, 317786, 285503, 47, 0, 0 ], "merged": [ 148, 33557, 99327, 316969, 286994, 63, 2, 0, 129, 33971, 99589, 317189, 286575, 47, 0, 0 ] } }, "model.layers.20.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002030883587466815, "l1_avg": 0.0017247060934702555, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000731367820115957, "l1_avg": 0.0005821262796719869, "l0_avg": 1.0 }, "original": { "l2_avg": 2.4045152763106703, "l1_avg": 2.0074358603395064, "l0_avg": 0.8699720519266011 }, "merged": { "l2_avg": 2.404280542715977, "l1_avg": 2.0075828269675924, "l0_avg": 0.8555212543628834 }, "diff": { "l2_avg": 0.14973108133912885, "l1_avg": 0.029946942741488233, "l0_avg": 0.05061127179934655 }, "num_elements": 1061683200, "num_changed": 53733137, "precision": "mxfp4", "fp4_dist_before": [ 69025875, 91153369, 111387359, 58057493, 84642296, 58861441, 42260480, 15567076, 69022613, 91118135, 111349414, 58052920, 84602068, 58809090, 42224037, 15549534 ], "fp4_dist_after": [ 76696287, 127916526, 110072071, 69596079, 70302264, 47448640, 24975967, 3946200, 76694370, 127871045, 110042576, 69570864, 70255069, 47406744, 24951880, 3936618 ], "bf16_dists": null }, "model.layers.21.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020244170066477255, "l1_avg": 0.001720605625046624, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007659171385225286, "l1_avg": 0.0006136181867784924, "l0_avg": 1.0 }, "original": { "l2_avg": 2.8834401435751977, "l1_avg": 2.415666232638889, "l0_avg": 0.8729031155433183 }, "merged": { "l2_avg": 2.8833010777070447, "l1_avg": 2.415836226851852, "l0_avg": 0.8597204768804856 }, "diff": { "l2_avg": 0.17085154522390056, "l1_avg": 0.03273703492717978, "l0_avg": 0.04641446148907697 }, "num_elements": 1061683200, "num_changed": 49277454, "precision": "mxfp4", "fp4_dist_before": [ 67461442, 90088993, 110579461, 58503509, 85544861, 59386540, 43146535, 16241616, 67475185, 90089663, 110547151, 58481367, 85490866, 59340315, 43084399, 16221297 ], "fp4_dist_after": [ 74459322, 126155441, 109737288, 71473646, 72599817, 48313271, 24485597, 3727863, 74473091, 126169367, 109681480, 71435828, 72535181, 48272626, 24444969, 3718413 ], "bf16_dists": null }, "model.layers.21.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010719845584892378, "l1_avg": 0.009263752566443548, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008575750984630596, "l1_avg": 0.0007037789457374149, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019388441244761148, "l1_avg": 0.018510713930483216, "l0_avg": 0.8803407565458321 }, "merged": { "l2_avg": 0.019390572441948785, "l1_avg": 0.018510719581886576, "l0_avg": 0.8662978457227165 }, "diff": { "l2_avg": 0.0013935221566094293, "l1_avg": 0.00028024879502661434, "l0_avg": 0.049886514169198495 }, "num_elements": 2123366400, "num_changed": 105927348, "precision": "mxfp4", "fp4_dist_before": [ 127060646, 173731316, 210794589, 122023649, 177038021, 130139070, 89754173, 30300610, 127019771, 173831259, 211019942, 122206626, 177405368, 130503804, 90094291, 30443265 ], "fp4_dist_after": [ 141953493, 240937244, 212668221, 145324261, 151713497, 106970185, 54217982, 7043869, 141945169, 241087923, 212964686, 145587090, 152092374, 107322815, 54449945, 7087646 ], "bf16_dists": null }, "model.layers.22.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020199546243924297, "l1_avg": 0.0017175926102532281, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000768431567220983, "l1_avg": 0.0006156521124972237, "l0_avg": 1.0 }, "original": { "l2_avg": 3.130351112943868, "l1_avg": 2.7752736786265433, "l0_avg": 0.874039623119213 }, "merged": { "l2_avg": 3.1302674336542378, "l1_avg": 2.7755164930555556, "l0_avg": 0.8609791715645496 }, "diff": { "l2_avg": 0.19619268817821656, "l1_avg": 0.038364721227575234, "l0_avg": 0.045864537556966144 }, "num_elements": 1061683200, "num_changed": 48693609, "precision": "mxfp4", "fp4_dist_before": [ 66858935, 91135646, 109806186, 60003011, 85749492, 59755892, 42189413, 15455563, 66871081, 91104027, 109771737, 59970033, 85701124, 59710301, 42158555, 15442204 ], "fp4_dist_after": [ 73801399, 125295474, 108996218, 71667252, 73185977, 49467281, 24914861, 3632722, 73794679, 125257936, 108952091, 71626832, 73147128, 49423737, 24890839, 3628774 ], "bf16_dists": null }, "model.layers.22.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010755526872263079, "l1_avg": 0.009294064839680989, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008589188496840976, "l1_avg": 0.000703904777765274, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019145807955000135, "l1_avg": 0.018444686701268326, "l0_avg": 0.8810588426943179 }, "merged": { "l2_avg": 0.019146772225697835, "l1_avg": 0.018444690468870564, "l0_avg": 0.8671551504252869 }, "diff": { "l2_avg": 0.0013634593122535282, "l1_avg": 0.00027582053784970886, "l0_avg": 0.0494805281839253 }, "num_elements": 2123366400, "num_changed": 105065291, "precision": "mxfp4", "fp4_dist_before": [ 126255630, 173030622, 210470288, 122158008, 177476582, 130580156, 90172639, 30522305, 126300027, 173168748, 210714619, 122391758, 177849236, 131010036, 90570207, 30695539 ], "fp4_dist_after": [ 141046908, 240231131, 212548588, 145884898, 152409717, 107391090, 54192151, 6996050, 141031382, 240444550, 212861253, 146213500, 152790172, 107811373, 54468590, 7045047 ], "bf16_dists": null }, "model.layers.24.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010819099471644836, "l1_avg": 0.009368165996339587, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008699767058715224, "l1_avg": 0.0007025087252259254, "l0_avg": 1.0 }, "original": { "l2_avg": 0.004480044188176106, "l1_avg": 0.0029819644159740873, "l0_avg": 0.9999918619791667 }, "merged": { "l2_avg": 0.004480137253354184, "l1_avg": 0.002982382641898261, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3591565616980094e-05, "l1_avg": 4.174705698258347e-05, "l0_avg": 0.9999986436631945 }, "num_elements": 1474560, "num_changed": 1474558, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9453, 28858, 7473, 0, 0, 0, 0, 44, 9535, 29180, 7584, 0, 0, 0, 0 ], "lora_B": [ 126, 8111, 0, 0, 0, 0, 0, 0, 123, 8024, 0, 0, 0, 0, 0, 0 ], "original": [ 3056, 548800, 178586, 6773, 53, 0, 0, 0, 3167, 548833, 178549, 6684, 59, 0, 0, 0 ], "merged": [ 3129, 547571, 179682, 6859, 53, 0, 0, 0, 3097, 547498, 179857, 6755, 59, 0, 0, 0 ] } }, "model.layers.24.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009028850670329571, "l1_avg": 0.00781028438359499, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008391099068960002, "l1_avg": 0.0006794935713211695, "l0_avg": 1.0 }, "original": { "l2_avg": 0.64126058691858, "l1_avg": 0.47400021023220484, "l0_avg": 0.9999998304578993 }, "merged": { "l2_avg": 0.6412482896137874, "l1_avg": 0.47400025261773004, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.324666491555505e-05, "l1_avg": 3.3978688427143626e-05, "l0_avg": 0.9996036105685764 }, "num_elements": 11796480, "num_changed": 11791804, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 57, 16549, 48088, 1079, 0, 0, 0, 0, 67, 16241, 47868, 1123, 0, 0, 0, 0 ], "lora_B": [ 628, 45482, 0, 0, 0, 0, 0, 0, 641, 45409, 0, 0, 0, 0, 0, 0 ], "original": [ 148, 35798, 107259, 427080, 4724309, 562138, 39971, 0, 126, 35494, 107339, 426547, 4727643, 562594, 40034, 0 ], "merged": [ 144, 35578, 106953, 425685, 4720793, 566924, 40585, 0, 142, 35354, 107026, 425167, 4724099, 567396, 40634, 0 ] } }, "model.layers.24.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010786800750010466, "l1_avg": 0.009334439039230346, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008458328373736711, "l1_avg": 0.0006751060718670487, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01720783228866994, "l1_avg": 0.012095442083146836, "l0_avg": 0.9999974568684896 }, "merged": { "l2_avg": 0.017204519169071272, "l1_avg": 0.01209552420510186, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2252130662390074e-05, "l1_avg": 4.017627539320125e-05, "l0_avg": 0.9999899970160591 }, "num_elements": 11796480, "num_changed": 11796362, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 37, 9548, 28896, 7276, 0, 0, 0, 0, 33, 9630, 29223, 7517, 0, 0, 0, 0 ], "lora_B": [ 919, 64654, 0, 0, 0, 0, 0, 0, 974, 64525, 0, 0, 0, 0, 0, 0 ], "original": [ 6348, 1518333, 2782556, 1550571, 38346, 0, 0, 0, 6276, 1518947, 2782292, 1554702, 38109, 0, 0, 0 ], "merged": [ 6381, 1512144, 2780437, 1558274, 38842, 0, 0, 0, 6332, 1512651, 2780509, 1562307, 38603, 0, 0, 0 ] } }, "model.layers.24.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010778310221401091, "l1_avg": 0.009323243962393866, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008590805809944868, "l1_avg": 0.0006984431529417634, "l0_avg": 1.0 }, "original": { "l2_avg": 0.07881692287582331, "l1_avg": 0.05986563894483778, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.07881124943408119, "l1_avg": 0.059865575366550024, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3353423557593474e-05, "l1_avg": 4.149440210312605e-05, "l0_avg": 0.9999525282118056 }, "num_elements": 1474560, "num_changed": 1474490, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 41, 9621, 28897, 7421, 0, 0, 0, 0, 28, 9668, 29100, 7384, 0, 0, 0, 0 ], "lora_B": [ 106, 8114, 0, 0, 0, 0, 0, 0, 116, 8048, 0, 0, 0, 0, 0, 0 ], "original": [ 131, 34121, 100706, 321973, 281355, 3, 0, 0, 123, 34380, 100260, 320729, 280777, 2, 0, 0 ], "merged": [ 130, 33935, 100367, 321372, 282457, 3, 0, 0, 136, 34205, 100012, 320093, 281848, 2, 0, 0 ] } }, "model.layers.25.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010779846385859507, "l1_avg": 0.0093255877494812, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007874944130890071, "l1_avg": 0.0006364521686919034, "l0_avg": 1.0 }, "original": { "l2_avg": 0.029491209878100347, "l1_avg": 0.011532900068495009, "l0_avg": 0.9999857584635417 }, "merged": { "l2_avg": 0.02949079520793979, "l1_avg": 0.011533357037438286, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.864190074733019e-05, "l1_avg": 3.7881144736376074e-05, "l0_avg": 0.9999925401475694 }, "num_elements": 1474560, "num_changed": 1474549, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 47, 9568, 28982, 7466, 0, 0, 0, 0, 49, 9712, 28986, 7350, 0, 0, 0, 0 ], "lora_B": [ 127, 8116, 0, 0, 0, 0, 0, 0, 121, 8020, 0, 0, 0, 0, 0, 0 ], "original": [ 4528, 401078, 199214, 105009, 26824, 17, 0, 0, 4679, 401594, 199290, 105455, 26860, 12, 0, 0 ], "merged": [ 4356, 400521, 199604, 105230, 26956, 17, 0, 0, 4394, 401128, 199676, 105632, 27034, 12, 0, 0 ] } }, "model.layers.25.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009032682740821522, "l1_avg": 0.007818262092769146, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008361176847147294, "l1_avg": 0.0006787609722879198, "l0_avg": 1.0 }, "original": { "l2_avg": 0.8117830475169391, "l1_avg": 0.5716776106092665, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.8117789247211127, "l1_avg": 0.5716776106092665, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.299274735454313e-05, "l1_avg": 3.368981803456942e-05, "l0_avg": 0.9995328267415364 }, "num_elements": 11796480, "num_changed": 11790969, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 67, 16268, 48228, 1075, 0, 0, 0, 0, 61, 16273, 48055, 1045, 0, 0, 0, 0 ], "lora_B": [ 643, 45208, 0, 0, 0, 0, 0, 0, 626, 45683, 0, 0, 0, 0, 0, 0 ], "original": [ 137, 32221, 97154, 386218, 4419807, 825612, 137463, 0, 122, 32142, 97522, 386448, 4418142, 825185, 138307, 0 ], "merged": [ 124, 32070, 96902, 384943, 4415067, 830423, 139079, 0, 108, 32008, 97230, 385220, 4413547, 829771, 139988, 0 ] } }, "model.layers.25.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010795721655983448, "l1_avg": 0.009343740675184462, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008427170986702779, "l1_avg": 0.0006810717168264091, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014872922635845375, "l1_avg": 0.009592245684729682, "l0_avg": 0.9999960157606337 }, "merged": { "l2_avg": 0.014870393647133955, "l1_avg": 0.009592440393235948, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3259080435234624e-05, "l1_avg": 4.0632194011575644e-05, "l0_avg": 0.9999921162923177 }, "num_elements": 11796480, "num_changed": 11796387, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 39, 9529, 28952, 7406, 0, 0, 0, 0, 49, 9662, 29082, 7441, 0, 0, 0, 0 ], "lora_B": [ 961, 64449, 0, 0, 0, 0, 0, 0, 950, 64712, 0, 0, 0, 0, 0, 0 ], "original": [ 13231, 2358957, 2352431, 1148268, 28244, 0, 0, 0, 13422, 2357763, 2349286, 1146287, 28591, 0, 0, 0 ], "merged": [ 13303, 2352283, 2353138, 1153798, 28615, 0, 0, 0, 13322, 2351109, 2350045, 1151893, 28974, 0, 0, 0 ] } }, "model.layers.25.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010797007290553195, "l1_avg": 0.009336858325534396, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000804371724370867, "l1_avg": 0.0006448579370044172, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0981751207701186, "l1_avg": 0.07090628412034776, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.0981684169358563, "l1_avg": 0.070906310611301, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.112263565455176e-05, "l1_avg": 3.87831823900342e-05, "l0_avg": 0.9999423556857638 }, "num_elements": 1474560, "num_changed": 1474475, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9550, 28953, 7373, 0, 0, 0, 0, 35, 9737, 28901, 7562, 0, 0, 0, 0 ], "lora_B": [ 107, 8131, 0, 0, 0, 0, 0, 0, 119, 8027, 0, 0, 0, 0, 0, 0 ], "original": [ 97, 29859, 88874, 294969, 323442, 129, 0, 0, 104, 30424, 88683, 294333, 323525, 117, 4, 0 ], "merged": [ 119, 29719, 88580, 294425, 324419, 130, 0, 0, 101, 30254, 88414, 293660, 324616, 119, 4, 0 ] } }, "model.layers.4.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010698106500110372, "l1_avg": 0.009250824981265598, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008520695274535441, "l1_avg": 0.0006950206226772732, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018361657195621068, "l1_avg": 0.019673968656563463, "l0_avg": 0.8730530420939128 }, "merged": { "l2_avg": 0.01836204926172892, "l1_avg": 0.019674011983989197, "l0_avg": 0.8583575453581633 }, "diff": { "l2_avg": 0.0013679358694288467, "l1_avg": 0.00030545063960699386, "l0_avg": 0.0517106600160952 }, "num_elements": 2123366400, "num_changed": 109800678, "precision": "mxfp4", "fp4_dist_before": [ 134746115, 173786862, 208954748, 120467095, 174941392, 129211867, 89239649, 29692360, 134808790, 173928620, 209198230, 120636129, 175183344, 129419362, 89397777, 29754060 ], "fp4_dist_after": [ 150382730, 240676499, 210272048, 142197683, 148956676, 106231642, 55099365, 7262012, 150376099, 240903601, 210542298, 142412652, 149164837, 106404647, 55202514, 7281097 ], "bf16_dists": null }, "model.layers.23.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020084456154882075, "l1_avg": 0.001710525651772817, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007948405294762041, "l1_avg": 0.0006400759021441142, "l0_avg": 1.0 }, "original": { "l2_avg": 3.2615487301845296, "l1_avg": 3.0401063368055556, "l0_avg": 0.8769792128197941 }, "merged": { "l2_avg": 3.2615412973536455, "l1_avg": 3.040370611496914, "l0_avg": 0.863701432781455 }, "diff": { "l2_avg": 0.20961200497534702, "l1_avg": 0.042712794174382715, "l0_avg": 0.04691104370870708 }, "num_elements": 1061683200, "num_changed": 49804667, "precision": "mxfp4", "fp4_dist_before": [ 65306874, 89870303, 108599758, 60715197, 86883361, 61441356, 42854979, 15253611, 65302229, 89855639, 108591982, 60677162, 86865998, 61415796, 42817728, 15231227 ], "fp4_dist_after": [ 72339490, 123453660, 108421436, 72051315, 74275641, 50906422, 25784624, 3677324, 72366409, 123427329, 108425306, 72013078, 74239467, 50864048, 25764683, 3672968 ], "bf16_dists": null }, "model.layers.23.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010744322463777876, "l1_avg": 0.009288077221976386, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008544443583848801, "l1_avg": 0.0007008546756373512, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019668798976474337, "l1_avg": 0.019160295651282792, "l0_avg": 0.8816407719364873 }, "merged": { "l2_avg": 0.019669885105556912, "l1_avg": 0.019160282464674962, "l0_avg": 0.8672053725631149 }, "diff": { "l2_avg": 0.001398082905345493, "l1_avg": 0.000294346927124777, "l0_avg": 0.05148156578158155 }, "num_elements": 2123366400, "num_changed": 109314227, "precision": "mxfp4", "fp4_dist_before": [ 125658923, 170924926, 210117193, 121268157, 178292705, 132174931, 91754092, 30600924, 125661085, 171001664, 210346415, 121421779, 178638493, 132563404, 92158828, 30782881 ], "fp4_dist_after": [ 140982415, 239576920, 212657899, 145069813, 152106653, 107705067, 55418673, 7277347, 140989235, 239738600, 212919056, 145311221, 152478545, 108102331, 55700389, 7332236 ], "bf16_dists": null }, "model.layers.24.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002013011983939422, "l1_avg": 0.001713620788521237, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007806835782810662, "l1_avg": 0.0006285067233774397, "l0_avg": 1.0 }, "original": { "l2_avg": 3.573751124651164, "l1_avg": 3.338803047839506, "l0_avg": 0.8758075412703149 }, "merged": { "l2_avg": 3.5737295454647264, "l1_avg": 3.3391372492283953, "l0_avg": 0.8620955073980637 }, "diff": { "l2_avg": 0.2315679580139064, "l1_avg": 0.04794540593653549, "l0_avg": 0.048482144202715086 }, "num_elements": 1061683200, "num_changed": 51472678, "precision": "mxfp4", "fp4_dist_before": [ 65922572, 89681799, 108941894, 60132987, 86545875, 61438476, 42913399, 15271440, 65930475, 89673427, 108943354, 60141935, 86539633, 61422031, 42921754, 15262149 ], "fp4_dist_after": [ 73202656, 123916385, 108698915, 71376591, 73368837, 50493703, 25954451, 3838285, 73208227, 123919050, 108685871, 71367475, 73374475, 50487982, 25956557, 3833740 ], "bf16_dists": null }, "model.layers.24.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010761263467210153, "l1_avg": 0.009320998191833496, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008564608687466087, "l1_avg": 0.0007027107808325026, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01953603161705865, "l1_avg": 0.01901042608567226, "l0_avg": 0.8821097790753399 }, "merged": { "l2_avg": 0.019537376032935247, "l1_avg": 0.019010456226490162, "l0_avg": 0.8678220767739379 }, "diff": { "l2_avg": 0.0013940725061628554, "l1_avg": 0.00029004514953236523, "l0_avg": 0.050966728116259165 }, "num_elements": 2123366400, "num_changed": 108221038, "precision": "mxfp4", "fp4_dist_before": [ 125161636, 171272210, 210042987, 121712205, 178386487, 132188528, 91590181, 30588126, 125162498, 171356984, 210221640, 121883112, 178693095, 132492108, 91898292, 30716311 ], "fp4_dist_after": [ 140339326, 239447011, 212570648, 145505670, 152558498, 108025866, 55305514, 7203157, 140322835, 239587837, 212819785, 145726454, 152858080, 108334155, 55523192, 7238372 ], "bf16_dists": null }, "model.layers.5.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010772961133401975, "l1_avg": 0.00931628942489624, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008557713008485734, "l1_avg": 0.0006940073799341917, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01818343104656436, "l1_avg": 0.012079694535997179, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.018182711656664612, "l1_avg": 0.012079849508073595, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.297912581738216e-05, "l1_avg": 4.13146439111895e-05, "l0_avg": 0.9999959309895833 }, "num_elements": 1474560, "num_changed": 1474554, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9733, 28980, 7307, 0, 0, 0, 0, 35, 9605, 28917, 7550, 0, 0, 0, 0 ], "lora_B": [ 94, 8079, 0, 0, 0, 0, 0, 0, 117, 8094, 0, 0, 0, 0, 0, 0 ], "original": [ 928, 216397, 330194, 181223, 7779, 0, 0, 0, 945, 216012, 332320, 180912, 7850, 0, 0, 0 ], "merged": [ 930, 215574, 330281, 181905, 7881, 0, 0, 0, 940, 215062, 332426, 181603, 7958, 0, 0, 0 ] } }, "model.layers.5.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009034230320415282, "l1_avg": 0.00781539548188448, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008243071442304066, "l1_avg": 0.0006668103237946828, "l0_avg": 1.0 }, "original": { "l2_avg": 0.055528324297795334, "l1_avg": 0.035152064429389104, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.055524441406036386, "l1_avg": 0.03515208032396105, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.214325198015814e-05, "l1_avg": 3.280039462778303e-05, "l0_avg": 0.9999695671929254 }, "num_elements": 11796480, "num_changed": 11796121, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 62, 16184, 48135, 1108, 0, 0, 0, 0, 60, 16562, 47872, 1089, 0, 0, 0, 0 ], "lora_B": [ 666, 45441, 0, 0, 0, 0, 0, 0, 643, 45410, 0, 0, 0, 0, 0, 0 ], "original": [ 2262, 566056, 1552728, 2919938, 857681, 257, 20, 0, 2279, 566163, 1553850, 2916014, 858978, 240, 14, 0 ], "merged": [ 2246, 563674, 1548928, 2921125, 862757, 263, 20, 0, 2260, 563687, 1549955, 2917089, 864218, 244, 14, 0 ] } }, "model.layers.5.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.01078247184484953, "l1_avg": 0.009334115187327068, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008375995492987441, "l1_avg": 0.0006764185964129865, "l0_avg": 1.0 }, "original": { "l2_avg": 0.016304067018435165, "l1_avg": 0.011417382293277316, "l0_avg": 0.9999970330132378 }, "merged": { "l2_avg": 0.01630116262428481, "l1_avg": 0.011417514748043485, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2757045945050874e-05, "l1_avg": 4.037901655667358e-05, "l0_avg": 0.9999915228949653 }, "num_elements": 11796480, "num_changed": 11796380, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 30, 9672, 29137, 7409, 0, 0, 0, 0, 39, 9500, 28950, 7423, 0, 0, 0, 0 ], "lora_B": [ 959, 64830, 0, 0, 0, 0, 0, 0, 867, 64416, 0, 0, 0, 0, 0, 0 ], "original": [ 7319, 1667380, 2749498, 1447883, 27246, 0, 0, 0, 7142, 1664817, 2748608, 1449325, 27262, 0, 0, 0 ], "merged": [ 7216, 1660681, 2748577, 1454993, 27651, 0, 0, 0, 7228, 1658503, 2747368, 1456601, 27662, 0, 0, 0 ] } }, "model.layers.5.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010798238735272416, "l1_avg": 0.009350393878089057, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008171678055077791, "l1_avg": 0.0006584478542208672, "l0_avg": 1.0 }, "original": { "l2_avg": 0.034579665115546755, "l1_avg": 0.026100669966803658, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.03457720222610831, "l1_avg": 0.026100709703233506, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1156933804964536e-05, "l1_avg": 3.880331189268165e-05, "l0_avg": 0.9999762641059028 }, "num_elements": 1474560, "num_changed": 1474525, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9575, 29244, 7435, 0, 0, 0, 0, 34, 9538, 28762, 7538, 0, 0, 0, 0 ], "lora_B": [ 99, 8019, 0, 0, 0, 0, 0, 0, 122, 8144, 0, 0, 0, 0, 0, 0 ], "original": [ 310, 82438, 218614, 379322, 55991, 0, 0, 0, 333, 82834, 218164, 380475, 56079, 0, 0, 0 ], "merged": [ 333, 82024, 218037, 379684, 56561, 0, 0, 0, 317, 82537, 217645, 380787, 56635, 0, 0, 0 ] } }, "model.layers.6.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01078786491301719, "l1_avg": 0.00933735900455051, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008475874783471227, "l1_avg": 0.0006894966936670244, "l0_avg": 1.0 }, "original": { "l2_avg": 0.013932914253176387, "l1_avg": 0.009702063931359185, "l0_avg": 0.9999952528211805 }, "merged": { "l2_avg": 0.013932265545690369, "l1_avg": 0.009702152676052518, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.239760957379147e-05, "l1_avg": 4.095471878018644e-05, "l0_avg": 0.9999905056423611 }, "num_elements": 1474560, "num_changed": 1474546, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 46, 9716, 28998, 7467, 0, 0, 0, 0, 39, 9579, 29051, 7264, 0, 0, 0, 0 ], "lora_B": [ 106, 8126, 0, 0, 0, 0, 0, 0, 109, 8043, 0, 0, 0, 0, 0, 0 ], "original": [ 1019, 228131, 369319, 136403, 2537, 0, 0, 0, 972, 228052, 369627, 136194, 2306, 0, 0, 0 ], "merged": [ 1006, 227271, 369252, 137325, 2557, 0, 0, 0, 996, 227181, 369567, 137079, 2326, 0, 0, 0 ] } }, "model.layers.6.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009038333052700014, "l1_avg": 0.007824741303920746, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008314129847207915, "l1_avg": 0.0006738426370753182, "l0_avg": 1.0 }, "original": { "l2_avg": 0.042997481780791616, "l1_avg": 0.028255184491475422, "l0_avg": 0.9999994913736979 }, "merged": { "l2_avg": 0.042993119080897695, "l1_avg": 0.028255216280619302, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.298137169509308e-05, "l1_avg": 3.362005938672357e-05, "l0_avg": 0.9999747382269966 }, "num_elements": 11796480, "num_changed": 11796182, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 52, 16294, 48205, 1131, 0, 0, 0, 0, 62, 16297, 47884, 1147, 0, 0, 0, 0 ], "lora_B": [ 613, 45274, 0, 0, 0, 0, 0, 0, 613, 45660, 0, 0, 0, 0, 0, 0 ], "original": [ 2460, 643759, 1752393, 2977926, 522307, 163, 15, 0, 2552, 643891, 1749930, 2978672, 522256, 143, 13, 0 ], "merged": [ 2569, 640898, 1748018, 2981181, 526208, 164, 15, 0, 2629, 641077, 1745499, 2981873, 526190, 145, 14, 0 ] } }, "model.layers.6.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.01078013853983626, "l1_avg": 0.009327860010994806, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008580438165948772, "l1_avg": 0.0006974077550694346, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014632543202891818, "l1_avg": 0.01064317226409912, "l0_avg": 0.9999967787000869 }, "merged": { "l2_avg": 0.014629896483480611, "l1_avg": 0.010643288824293348, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.293156383268551e-05, "l1_avg": 4.125621376766099e-05, "l0_avg": 0.9999927096896701 }, "num_elements": 11796480, "num_changed": 11796394, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9671, 28884, 7435, 0, 0, 0, 0, 48, 9613, 29016, 7457, 0, 0, 0, 0 ], "lora_B": [ 817, 64739, 0, 0, 0, 0, 0, 0, 857, 64659, 0, 0, 0, 0, 0, 0 ], "original": [ 7553, 1694106, 2836149, 1350586, 10707, 0, 0, 0, 7653, 1694423, 2835491, 1349123, 10689, 0, 0, 0 ], "merged": [ 7655, 1687523, 2834732, 1358312, 10910, 0, 0, 0, 7631, 1687866, 2834246, 1356677, 10928, 0, 0, 0 ] } }, "model.layers.6.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01077620624155237, "l1_avg": 0.009323281712002224, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008347448310814798, "l1_avg": 0.0006749710300937295, "l0_avg": 1.0 }, "original": { "l2_avg": 0.04069512640017998, "l1_avg": 0.03152393500010173, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.04069237135676478, "l1_avg": 0.03152395354376899, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.181884822636093e-05, "l1_avg": 3.9833618534935845e-05, "l0_avg": 0.999981689453125 }, "num_elements": 1474560, "num_changed": 1474533, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 44, 9703, 29011, 7342, 0, 0, 0, 0, 36, 9663, 28960, 7401, 0, 0, 0, 0 ], "lora_B": [ 107, 8102, 0, 0, 0, 0, 0, 0, 101, 8074, 0, 0, 0, 0, 0, 0 ], "original": [ 232, 64683, 178903, 402958, 89095, 0, 0, 0, 249, 64275, 180610, 403782, 89773, 0, 0, 0 ], "merged": [ 228, 64366, 178447, 402860, 89939, 0, 0, 0, 245, 64040, 180092, 403722, 90621, 0, 0, 0 ] } }, "model.layers.7.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010770072578760842, "l1_avg": 0.009317102697160509, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008315400336869061, "l1_avg": 0.0006750680622644722, "l0_avg": 1.0 }, "original": { "l2_avg": 0.016864327568554518, "l1_avg": 0.010634958744049072, "l0_avg": 0.9999925401475694 }, "merged": { "l2_avg": 0.016863634880899956, "l1_avg": 0.010635082589255438, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.115281986760286e-05, "l1_avg": 3.961154984103309e-05, "l0_avg": 0.9999925401475694 }, "num_elements": 1474560, "num_changed": 1474549, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9660, 28864, 7485, 0, 0, 0, 0, 33, 9668, 29091, 7323, 0, 0, 0, 0 ], "lora_B": [ 98, 8078, 0, 0, 0, 0, 0, 0, 99, 8109, 0, 0, 0, 0, 0, 0 ], "original": [ 1306, 269723, 302990, 157445, 5569, 1, 1, 0, 1310, 269006, 303638, 157877, 5693, 1, 0, 0 ], "merged": [ 1323, 268868, 303049, 158167, 5634, 1, 1, 0, 1367, 268008, 303819, 158561, 5761, 1, 0, 0 ] } }, "model.layers.7.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009038340296689602, "l1_avg": 0.007817857898771763, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008301829143818372, "l1_avg": 0.0006727387507756551, "l0_avg": 1.0 }, "original": { "l2_avg": 0.060754043533155895, "l1_avg": 0.0396838903427124, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.060748921137048266, "l1_avg": 0.039683898289998375, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3314272429365124e-05, "l1_avg": 3.361018736743265e-05, "l0_avg": 0.9999666002061632 }, "num_elements": 11796480, "num_changed": 11796086, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 79, 16534, 47602, 1411, 0, 0, 0, 0, 55, 16145, 47800, 1446, 0, 0, 0, 0 ], "lora_B": [ 600, 45538, 0, 0, 0, 0, 0, 0, 649, 45373, 0, 0, 0, 0, 0, 0 ], "original": [ 1839, 473183, 1331765, 3013416, 1075628, 368, 34, 0, 1876, 473981, 1330237, 3019408, 1074333, 382, 30, 0 ], "merged": [ 1823, 471181, 1328012, 3012941, 1081870, 375, 34, 0, 1874, 471932, 1326675, 3018709, 1080636, 388, 30, 0 ] } }, "model.layers.7.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010777694499041481, "l1_avg": 0.009327399730682372, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008313311933995106, "l1_avg": 0.0006735086208209395, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014769896056196693, "l1_avg": 0.010669123464160496, "l0_avg": 0.9999972873263889 }, "merged": { "l2_avg": 0.014767160483427159, "l1_avg": 0.010669224129782783, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.249858159109325e-05, "l1_avg": 3.975418706734975e-05, "l0_avg": 0.999991946750217 }, "num_elements": 11796480, "num_changed": 11796385, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9527, 29170, 7256, 0, 0, 0, 0, 37, 9632, 29190, 7299, 0, 0, 0, 0 ], "lora_B": [ 945, 64448, 0, 0, 0, 0, 0, 0, 895, 64784, 0, 0, 0, 0, 0, 0 ], "original": [ 8132, 1703870, 2830265, 1343477, 12182, 0, 0, 0, 7822, 1706287, 2827607, 1344761, 12077, 0, 0, 0 ], "merged": [ 7834, 1697664, 2829108, 1350788, 12443, 0, 0, 0, 8007, 1699708, 2826497, 1352105, 12326, 0, 0, 0 ] } }, "model.layers.7.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01081958482421912, "l1_avg": 0.009372735685772366, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008273483836092055, "l1_avg": 0.0006685453117825091, "l0_avg": 1.0 }, "original": { "l2_avg": 0.037099895269448194, "l1_avg": 0.02862698237101237, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.03709730986382595, "l1_avg": 0.028626969125535754, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1544604142461115e-05, "l1_avg": 3.934708527392811e-05, "l0_avg": 0.9999742296006945 }, "num_elements": 1474560, "num_changed": 1474522, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9306, 29029, 7453, 0, 0, 0, 0, 34, 9671, 29058, 7578, 0, 0, 0, 0 ], "lora_B": [ 120, 8027, 0, 0, 0, 0, 0, 0, 120, 8117, 0, 0, 0, 0, 0, 0 ], "original": [ 299, 70518, 195966, 403118, 67195, 0, 0, 0, 271, 70909, 196039, 402982, 67263, 0, 0, 0 ], "merged": [ 248, 70211, 195470, 403269, 67877, 0, 0, 0, 269, 70594, 195557, 403146, 67919, 0, 0, 0 ] } }, "model.layers.4.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020404646173286848, "l1_avg": 0.0017304119136598375, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007539397580907955, "l1_avg": 0.0006055277254846361, "l0_avg": 1.0 }, "original": { "l2_avg": 0.1028455483853657, "l1_avg": 0.08412422839506173, "l0_avg": 0.8775528161319686 }, "merged": { "l2_avg": 0.10285520656985113, "l1_avg": 0.08412446952160493, "l0_avg": 0.8637402795862269 }, "diff": { "l2_avg": 0.006441677028275048, "l1_avg": 0.0012037416152012202, "l0_avg": 0.049160565034842785 }, "num_elements": 1061683200, "num_changed": 52192946, "precision": "mxfp4", "fp4_dist_before": [ 65005712, 87549511, 108937004, 59473771, 87420821, 62149225, 44306515, 16089606, 64994406, 87542158, 108895637, 59452136, 87382108, 62140549, 44273977, 16070064 ], "fp4_dist_after": [ 72333669, 123954563, 109090314, 72506531, 73904624, 50137580, 25360502, 3637083, 72330987, 123932682, 109060389, 72475643, 73855364, 50127848, 25347879, 3627542 ], "bf16_dists": null }, "model.layers.5.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020420263539346773, "l1_avg": 0.0017315245336956449, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007704136689910274, "l1_avg": 0.0006190724670886993, "l0_avg": 1.0 }, "original": { "l2_avg": 0.13272244151676074, "l1_avg": 0.11090077341338735, "l0_avg": 0.8797932547110099 }, "merged": { "l2_avg": 0.13274240226421552, "l1_avg": 0.11090159475067515, "l0_avg": 0.8664019992027754 }, "diff": { "l2_avg": 0.008254620438882938, "l1_avg": 0.001547347410225574, "l0_avg": 0.04779431660970052 }, "num_elements": 1061683200, "num_changed": 50742423, "precision": "mxfp4", "fp4_dist_before": [ 63815425, 87166244, 107880223, 60328240, 88097326, 63073430, 44533538, 16015264, 63806057, 87161268, 107869781, 60296843, 88067120, 63050069, 44509020, 16013352 ], "fp4_dist_after": [ 70919074, 122494812, 108372830, 73344027, 75270185, 51407656, 25585338, 3510729, 70919679, 122487789, 108349235, 73322949, 75239682, 51382414, 25566544, 3510257 ], "bf16_dists": null }, "model.layers.5.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010741260354916486, "l1_avg": 0.009292061461342706, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008539641106515869, "l1_avg": 0.0006964626411596934, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018294099966684976, "l1_avg": 0.01948886259102527, "l0_avg": 0.8760210192645037 }, "merged": { "l2_avg": 0.018294578128390842, "l1_avg": 0.019488853172019677, "l0_avg": 0.8612912175684799 }, "diff": { "l2_avg": 0.0013610189159711202, "l1_avg": 0.00030463280501189053, "l0_avg": 0.05205302721188392 }, "num_elements": 2123366400, "num_changed": 110527649, "precision": "mxfp4", "fp4_dist_before": [ 131631832, 172796340, 208177975, 121602685, 176487118, 131146175, 89976493, 29436485, 131620970, 172896771, 208387989, 121718061, 176646594, 131293169, 90086898, 29460845 ], "fp4_dist_after": [ 147270366, 239084680, 210231209, 143074946, 150435834, 107874461, 56022787, 7265362, 147259202, 239264164, 210427490, 143212687, 150591408, 107985875, 56090914, 7275015 ], "bf16_dists": null }, "model.layers.6.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002043166453588728, "l1_avg": 0.001732067929373847, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007627212626366959, "l1_avg": 0.0006127946078777313, "l0_avg": 1.0 }, "original": { "l2_avg": 0.17607023169509053, "l1_avg": 0.13871083200713735, "l0_avg": 0.8795213082395954 }, "merged": { "l2_avg": 0.17600171777815124, "l1_avg": 0.13871148003472222, "l0_avg": 0.8656621014630353 }, "diff": { "l2_avg": 0.010531183397795511, "l1_avg": 0.001972601855242694, "l0_avg": 0.04945856447573061 }, "num_elements": 1061683200, "num_changed": 52509327, "precision": "mxfp4", "fp4_dist_before": [ 63963793, 86830572, 107903538, 59952604, 88198366, 63369227, 44863086, 15928345, 63946410, 86797865, 107869608, 59912762, 88148379, 63315974, 44777077, 15905594 ], "fp4_dist_after": [ 71316557, 122610051, 108482804, 72769607, 74863022, 51305187, 26022601, 3632277, 71307733, 122568460, 108438984, 72717973, 74796796, 51258728, 25968935, 3623485 ], "bf16_dists": null }, "model.layers.6.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010734328682299897, "l1_avg": 0.009283513493008084, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008575196324022847, "l1_avg": 0.0007001336250040266, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01838554408815172, "l1_avg": 0.019190099268783758, "l0_avg": 0.8756417568819023 }, "merged": { "l2_avg": 0.01838574806849162, "l1_avg": 0.019190082314573687, "l0_avg": 0.8609388511563525 }, "diff": { "l2_avg": 0.0013549397389094034, "l1_avg": 0.00029968202849965035, "l0_avg": 0.051994682123631605 }, "num_elements": 2123366400, "num_changed": 110403761, "precision": "mxfp4", "fp4_dist_before": [ 132002410, 172129850, 208812735, 120598571, 176337901, 130830325, 90594718, 30000841, 132055705, 172223533, 208961793, 120683230, 176459655, 130948725, 90689615, 30036793 ], "fp4_dist_after": [ 147648638, 239649127, 210760018, 143008312, 150184919, 107120315, 55654386, 7325342, 147629133, 239793190, 210907561, 143118241, 150278122, 107226476, 55727807, 7334813 ], "bf16_dists": null }, "model.layers.8.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010781431242647382, "l1_avg": 0.009331438276502822, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008318949257954955, "l1_avg": 0.000674999610055238, "l0_avg": 1.0 }, "original": { "l2_avg": 0.013458143621587156, "l1_avg": 0.009617459774017335, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.013457375539357948, "l1_avg": 0.009617517391840616, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.11229823174316e-05, "l1_avg": 3.997757772190703e-05, "l0_avg": 0.9999952528211805 }, "num_elements": 1474560, "num_changed": 1474553, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9616, 29204, 7481, 0, 0, 0, 0, 33, 9666, 28749, 7380, 0, 0, 0, 0 ], "lora_B": [ 109, 8047, 0, 0, 0, 0, 0, 0, 106, 8122, 0, 0, 0, 0, 0, 0 ], "original": [ 973, 228361, 370697, 136769, 1460, 0, 0, 0, 956, 227933, 369430, 136602, 1379, 0, 0, 0 ], "merged": [ 959, 227594, 370678, 137622, 1480, 0, 0, 0, 931, 226940, 369439, 137513, 1404, 0, 0, 0 ] } }, "model.layers.8.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009026369933168, "l1_avg": 0.007807998917996883, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000823530030359444, "l1_avg": 0.0006667687661117978, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0722211869562678, "l1_avg": 0.05007237328423394, "l0_avg": 0.9999997456868489 }, "merged": { "l2_avg": 0.07221287028192826, "l1_avg": 0.050072367986043295, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.262449617282679e-05, "l1_avg": 3.323618374350998e-05, "l0_avg": 0.9999551561143664 }, "num_elements": 11796480, "num_changed": 11795951, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 52, 16185, 48073, 1144, 0, 0, 0, 0, 75, 16568, 47864, 1111, 0, 0, 0, 0 ], "lora_B": [ 662, 45554, 0, 0, 0, 0, 0, 0, 691, 45253, 0, 0, 0, 0, 0, 0 ], "original": [ 1449, 358975, 1025245, 2860808, 1652885, 571, 68, 0, 1352, 358448, 1026447, 2856508, 1653114, 536, 74, 0 ], "merged": [ 1397, 357476, 1022260, 2857298, 1660893, 579, 69, 0, 1400, 356881, 1023498, 2853060, 1661055, 539, 75, 0 ] } }, "model.layers.8.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010776133988418335, "l1_avg": 0.009314872158898247, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008363525952728789, "l1_avg": 0.000680675613693893, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0142865049095491, "l1_avg": 0.010290181636810303, "l0_avg": 0.9999975416395399 }, "merged": { "l2_avg": 0.014284056999527154, "l1_avg": 0.010290318065219455, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.274482426973784e-05, "l1_avg": 4.0372279990050526e-05, "l0_avg": 0.9999910142686632 }, "num_elements": 11796480, "num_changed": 11796374, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9681, 28610, 7410, 0, 0, 0, 0, 45, 9730, 29234, 7416, 0, 0, 0, 0 ], "lora_B": [ 891, 64639, 0, 0, 0, 0, 0, 0, 867, 64675, 0, 0, 0, 0, 0, 0 ], "original": [ 8315, 1775398, 2832803, 1272740, 10332, 0, 0, 0, 8258, 1775777, 2831332, 1271121, 10404, 0, 0, 0 ], "merged": [ 8331, 1768437, 2832300, 1280024, 10560, 0, 0, 0, 8253, 1768864, 2830626, 1278474, 10611, 0, 0, 0 ] } }, "model.layers.8.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010814242804461376, "l1_avg": 0.009356847074296739, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008226762292906642, "l1_avg": 0.0006694967742078006, "l0_avg": 1.0 }, "original": { "l2_avg": 0.045009329619059714, "l1_avg": 0.03469284905327691, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.0450062949874302, "l1_avg": 0.03469282256232368, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.284949844282567e-05, "l1_avg": 4.049704617096318e-05, "l0_avg": 0.9999708387586805 }, "num_elements": 1474560, "num_changed": 1474517, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9560, 28947, 7415, 0, 0, 0, 0, 33, 9491, 29175, 7503, 0, 0, 0, 0 ], "lora_B": [ 107, 8064, 0, 0, 0, 0, 0, 0, 108, 8105, 0, 0, 0, 0, 0, 0 ], "original": [ 244, 59510, 166504, 396452, 115004, 0, 0, 0, 228, 59545, 166727, 395518, 114828, 0, 0, 0 ], "merged": [ 237, 59170, 166142, 396268, 115878, 0, 0, 0, 244, 59281, 166240, 395399, 115701, 0, 0, 0 ] } }, "model.layers.9.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010794954359114392, "l1_avg": 0.009343953927357991, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008009890443645418, "l1_avg": 0.0006431396468542516, "l0_avg": 1.0 }, "original": { "l2_avg": 0.021762318832521277, "l1_avg": 0.012549552652570937, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.021761736095288073, "l1_avg": 0.012549648020002578, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.921143411296809e-05, "l1_avg": 3.81711068459683e-05, "l0_avg": 0.9999884711371527 }, "num_elements": 1474560, "num_changed": 1474543, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 42, 9634, 28868, 7439, 0, 0, 0, 0, 27, 9598, 28931, 7621, 0, 0, 0, 0 ], "lora_B": [ 134, 8123, 0, 0, 0, 0, 0, 0, 122, 8005, 0, 0, 0, 0, 0, 0 ], "original": [ 1122, 254094, 300735, 164258, 17049, 0, 0, 0, 1246, 254283, 300459, 164499, 16815, 0, 0, 0 ], "merged": [ 1182, 253225, 300908, 164838, 17177, 0, 0, 0, 1218, 253397, 300559, 165133, 16923, 0, 0, 0 ] } }, "model.layers.9.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009037366309362296, "l1_avg": 0.007817314937710762, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008382669209422412, "l1_avg": 0.0006795832680331337, "l0_avg": 1.0 }, "original": { "l2_avg": 0.16035449099671278, "l1_avg": 0.11237069235907661, "l0_avg": 0.9999996609157986 }, "merged": { "l2_avg": 0.16034452164990837, "l1_avg": 0.11237069235907661, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3380938479368695e-05, "l1_avg": 3.411272644168801e-05, "l0_avg": 0.999906243218316 }, "num_elements": 11796480, "num_changed": 11795374, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 66, 16343, 48019, 1153, 0, 0, 0, 0, 67, 16303, 47850, 1271, 0, 0, 0, 0 ], "lora_B": [ 661, 45475, 0, 0, 0, 0, 0, 0, 596, 45428, 0, 0, 0, 0, 0, 0 ], "original": [ 694, 168120, 483178, 1703422, 3538113, 4967, 379, 0, 659, 167906, 482479, 1701481, 3539697, 5030, 355, 0 ], "merged": [ 636, 167491, 481722, 1699091, 3544534, 5015, 386, 0, 674, 167159, 481019, 1697101, 3546219, 5074, 359, 0 ] } }, "model.layers.9.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.01081409986891361, "l1_avg": 0.009356022543377347, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008433460086754014, "l1_avg": 0.0006851624348200858, "l0_avg": 1.0 }, "original": { "l2_avg": 0.013875715841991586, "l1_avg": 0.009847031699286567, "l0_avg": 0.9999978807237413 }, "merged": { "l2_avg": 0.01387366666141514, "l1_avg": 0.009847160180409749, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.367309456732437e-05, "l1_avg": 4.136027095632421e-05, "l0_avg": 0.999991946750217 }, "num_elements": 11796480, "num_changed": 11796385, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9528, 28927, 7474, 0, 0, 0, 0, 40, 9676, 28970, 7510, 0, 0, 0, 0 ], "lora_B": [ 881, 64865, 0, 0, 0, 0, 0, 0, 890, 64436, 0, 0, 0, 0, 0, 0 ], "original": [ 8018, 1868935, 2845896, 1162408, 11975, 0, 0, 0, 8317, 1871880, 2846253, 1160855, 11943, 0, 0, 0 ], "merged": [ 8007, 1861764, 2846119, 1169198, 12210, 0, 0, 0, 8106, 1865069, 2846058, 1167784, 12165, 0, 0, 0 ] } }, "model.layers.9.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01086135263321296, "l1_avg": 0.009388493167029487, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000781990063842386, "l1_avg": 0.000628103909548372, "l0_avg": 1.0 }, "original": { "l2_avg": 0.04543239999180562, "l1_avg": 0.03402172724405925, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.0454295444222909, "l1_avg": 0.03402175108591716, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.202647168106899e-05, "l1_avg": 3.926820225185818e-05, "l0_avg": 0.9999721950954861 }, "num_elements": 1474560, "num_changed": 1474519, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 45, 9615, 28886, 7506, 0, 0, 0, 0, 36, 9486, 29018, 7568, 0, 0, 0, 0 ], "lora_B": [ 128, 8060, 0, 0, 0, 0, 0, 0, 126, 8070, 0, 0, 0, 0, 0, 0 ], "original": [ 257, 63799, 174747, 391162, 107767, 0, 0, 0, 257, 63558, 174856, 390152, 108005, 0, 0, 0 ], "merged": [ 266, 63495, 174256, 391101, 108587, 0, 0, 0, 251, 63275, 174414, 390100, 108815, 0, 0, 0 ] } }, "model.layers.7.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020420574526100925, "l1_avg": 0.001731216079658932, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000766474057040716, "l1_avg": 0.0006162363621923659, "l0_avg": 1.0 }, "original": { "l2_avg": 0.23229763422641928, "l1_avg": 0.19562062958140433, "l0_avg": 0.8773027594295549 }, "merged": { "l2_avg": 0.2323404928883717, "l1_avg": 0.19562183521412038, "l0_avg": 0.8637842729356554 }, "diff": { "l2_avg": 0.014872433361769863, "l1_avg": 0.0027718885445300444, "l0_avg": 0.047947522387092496 }, "num_elements": 1061683200, "num_changed": 50905079, "precision": "mxfp4", "fp4_dist_before": [ 65134183, 88292541, 108879905, 59758330, 87227183, 61767423, 43964503, 16030232, 65131416, 88257135, 108817267, 59714956, 87140744, 61701648, 43879758, 15985976 ], "fp4_dist_after": [ 72309496, 124037911, 108896720, 72644102, 74176580, 50249439, 25182240, 3554866, 72308453, 123994636, 108805915, 72591828, 74091895, 50172636, 25121442, 3545041 ], "bf16_dists": null }, "model.layers.7.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.01074353722222365, "l1_avg": 0.0092946277724372, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008572226680945963, "l1_avg": 0.0006999401582611931, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01874063809712728, "l1_avg": 0.019104623676818096, "l0_avg": 0.8777095333146461 }, "merged": { "l2_avg": 0.018741254011789957, "l1_avg": 0.019104676423249423, "l0_avg": 0.8630692733953028 }, "diff": { "l2_avg": 0.0013544018897745345, "l1_avg": 0.0002954027093487021, "l0_avg": 0.05197732713487413 }, "num_elements": 2123366400, "num_changed": 110366910, "precision": "mxfp4", "fp4_dist_before": [ 129802701, 171236183, 209976059, 119996925, 176968510, 131038757, 91572054, 30680170, 129864767, 171271913, 210085142, 120099045, 177123335, 131181666, 91715945, 30753228 ], "fp4_dist_after": [ 145391619, 240378705, 212020382, 143621625, 150533347, 106629131, 55355674, 7384824, 145362485, 240451381, 212167336, 143744519, 150686780, 106788216, 55447121, 7403255 ], "bf16_dists": null }, "model.layers.8.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002038389891411718, "l1_avg": 0.0017289607061280144, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007532092258938817, "l1_avg": 0.0006054655959208806, "l0_avg": 1.0 }, "original": { "l2_avg": 0.32512811734509767, "l1_avg": 0.2641814959490741, "l0_avg": 0.8758150538691768 }, "merged": { "l2_avg": 0.3251259294553616, "l1_avg": 0.2641846908757716, "l0_avg": 0.861651968308437 }, "diff": { "l2_avg": 0.020269479676278148, "l1_avg": 0.0038632910928608458, "l0_avg": 0.05012097770785108 }, "num_elements": 1061683200, "num_changed": 53212600, "precision": "mxfp4", "fp4_dist_before": [ 65925505, 88681253, 109386701, 59410625, 86863084, 61463569, 43651460, 15702457, 65919566, 88671638, 109323029, 59356734, 86742924, 61383633, 43549254, 15651768 ], "fp4_dist_after": [ 73437029, 124906222, 109185812, 71631995, 73041291, 49684266, 25498829, 3692368, 73444752, 124857226, 109112684, 71558350, 72924754, 49597843, 25432772, 3677007 ], "bf16_dists": null }, "model.layers.8.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010746242806984748, "l1_avg": 0.009301769733428954, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008488715409123012, "l1_avg": 0.0006911588625775443, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01871579753028022, "l1_avg": 0.018708349157262733, "l0_avg": 0.8782272809817467 }, "merged": { "l2_avg": 0.01871590084499783, "l1_avg": 0.018708369879075037, "l0_avg": 0.8637494150797526 }, "diff": { "l2_avg": 0.0013391941785812378, "l1_avg": 0.0002860362735795386, "l0_avg": 0.05137036782723886 }, "num_elements": 2123366400, "num_changed": 109078113, "precision": "mxfp4", "fp4_dist_before": [ 129269819, 172032754, 211167121, 119759389, 176579158, 130117824, 91285916, 31000497, 129298281, 172091556, 211284824, 119866806, 176775177, 130320571, 91434230, 31082477 ], "fp4_dist_after": [ 144661088, 241742972, 212780974, 143892326, 150326053, 105865183, 54624192, 7342058, 144648826, 241817515, 212968877, 144035765, 150493127, 106079200, 54736479, 7351765 ], "bf16_dists": null }, "model.layers.9.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010720297626352874, "l1_avg": 0.009270293182796902, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008420474483766488, "l1_avg": 0.0006846715178754595, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018744774659474692, "l1_avg": 0.018565378071349344, "l0_avg": 0.8811745716613016 }, "merged": { "l2_avg": 0.018744806448618572, "l1_avg": 0.018565317789713542, "l0_avg": 0.8669643063015408 }, "diff": { "l2_avg": 0.0013312463959058126, "l1_avg": 0.0002782252688466767, "l0_avg": 0.05073527771749614 }, "num_elements": 2123366400, "num_changed": 107729584, "precision": "mxfp4", "fp4_dist_before": [ 126131758, 170355606, 210872534, 120449202, 178091814, 131536033, 92284402, 31415664, 126178164, 170453308, 211005387, 120580958, 178303455, 131747170, 92466192, 31494753 ], "fp4_dist_after": [ 141217810, 240152508, 213212543, 145455366, 152127417, 106981078, 54722002, 7272918, 141265712, 240276533, 213373214, 145645679, 152351429, 107175617, 54838706, 7297868 ], "bf16_dists": null }, "model.layers.15.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010781049557613236, "l1_avg": 0.009338645140329997, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007807556539773941, "l1_avg": 0.0006248349091038108, "l0_avg": 1.0 }, "original": { "l2_avg": 0.016609328980618336, "l1_avg": 0.008417503039042155, "l0_avg": 0.9999932183159722 }, "merged": { "l2_avg": 0.016608835774442526, "l1_avg": 0.008417747418085734, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.784002042124393e-05, "l1_avg": 3.73062522461017e-05, "l0_avg": 0.9999959309895833 }, "num_elements": 1474560, "num_changed": 1474554, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9545, 29314, 7464, 0, 0, 0, 0, 38, 9561, 28919, 7285, 0, 0, 0, 0 ], "lora_B": [ 146, 8075, 0, 0, 0, 0, 0, 0, 137, 8026, 0, 0, 0, 0, 0, 0 ], "original": [ 2547, 378893, 251180, 94893, 9457, 0, 0, 0, 2491, 378431, 251040, 95947, 9681, 0, 0, 0 ], "merged": [ 2522, 378000, 251641, 95275, 9533, 0, 0, 0, 2548, 377427, 251488, 96375, 9751, 0, 0, 0 ] } }, "model.layers.15.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00903176604686641, "l1_avg": 0.00780939357355237, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008258814968262413, "l1_avg": 0.0006675987607902951, "l0_avg": 1.0 }, "original": { "l2_avg": 0.3481244013748864, "l1_avg": 0.25645147959391273, "l0_avg": 0.9999998304578993 }, "merged": { "l2_avg": 0.34811366788920034, "l1_avg": 0.25645154317220054, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.28374361962698e-05, "l1_avg": 3.352399087614483e-05, "l0_avg": 0.9997828165690105 }, "num_elements": 11796480, "num_changed": 11793918, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16268, 47956, 1238, 0, 0, 0, 0, 75, 16399, 47857, 1215, 0, 0, 0, 0 ], "lora_B": [ 620, 45536, 0, 0, 0, 0, 0, 0, 659, 45345, 0, 0, 0, 0, 0, 0 ], "original": [ 408, 96496, 235600, 792637, 4700494, 69155, 2813, 0, 399, 96581, 236488, 790331, 4703473, 68838, 2767, 0 ], "merged": [ 379, 96096, 234956, 790582, 4702606, 70088, 2843, 0, 435, 96176, 235884, 788117, 4705691, 69827, 2800, 0 ] } }, "model.layers.15.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010814174478128104, "l1_avg": 0.009357402059766982, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008208470001962987, "l1_avg": 0.0006627262337133288, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0110323284074624, "l1_avg": 0.007643702295091417, "l0_avg": 0.9999959309895833 }, "merged": { "l2_avg": 0.011030626865650412, "l1_avg": 0.007643884420394898, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2032222939070334e-05, "l1_avg": 3.9965669727987716e-05, "l0_avg": 0.9999943203396268 }, "num_elements": 11796480, "num_changed": 11796413, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9527, 28741, 7605, 0, 0, 0, 0, 47, 9626, 29302, 7279, 0, 0, 0, 0 ], "lora_B": [ 975, 64591, 0, 0, 0, 0, 0, 0, 913, 64593, 0, 0, 0, 0, 0, 0 ], "original": [ 11499, 2387674, 2750803, 741423, 4765, 0, 0, 0, 11319, 2385860, 2755880, 742616, 4641, 0, 0, 0 ], "merged": [ 11385, 2379671, 2753860, 746778, 4838, 0, 0, 0, 11127, 2377324, 2758800, 747991, 4706, 0, 0, 0 ] } }, "model.layers.15.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010789261283368452, "l1_avg": 0.009316364924112955, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007767592323943973, "l1_avg": 0.0006252398015931249, "l0_avg": 1.0 }, "original": { "l2_avg": 0.052994720861186666, "l1_avg": 0.040967122713724775, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.05299125899363416, "l1_avg": 0.04096713331010607, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.953583387317368e-05, "l1_avg": 3.7917078265713324e-05, "l0_avg": 0.9999674479166667 }, "num_elements": 1474560, "num_changed": 1474512, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9771, 28747, 7405, 0, 0, 0, 0, 27, 9762, 28970, 7446, 0, 0, 0, 0 ], "lora_B": [ 113, 8074, 0, 0, 0, 0, 0, 0, 133, 8064, 0, 0, 0, 0, 0, 0 ], "original": [ 206, 49331, 139991, 386294, 160322, 0, 0, 0, 185, 49985, 139895, 387499, 160852, 0, 0, 0 ], "merged": [ 194, 49089, 139591, 385961, 161310, 0, 0, 0, 197, 49760, 139466, 387107, 161885, 0, 0, 0 ] } }, "model.layers.16.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010780118120472295, "l1_avg": 0.009321108791563245, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008109426125884056, "l1_avg": 0.0006516865105368197, "l0_avg": 1.0 }, "original": { "l2_avg": 0.005960511689932621, "l1_avg": 0.003513976600435045, "l0_avg": 0.9999918619791667 }, "merged": { "l2_avg": 0.005960453965961408, "l1_avg": 0.003514283233218723, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.988887473644311e-05, "l1_avg": 3.928144772847493e-05, "l0_avg": 0.9999979654947917 }, "num_elements": 1474560, "num_changed": 1474557, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 46, 9715, 28876, 7466, 0, 0, 0, 0, 23, 9623, 28931, 7480, 0, 0, 0, 0 ], "lora_B": [ 124, 8220, 0, 0, 0, 0, 0, 0, 120, 7920, 0, 0, 0, 0, 0, 0 ], "original": [ 3032, 516607, 203311, 14238, 342, 0, 0, 0, 3081, 517272, 202106, 14200, 371, 0, 0, 0 ], "merged": [ 3057, 515539, 204348, 14362, 346, 0, 0, 0, 3018, 515932, 203269, 14313, 376, 0, 0, 0 ] } }, "model.layers.16.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.0090279043418716, "l1_avg": 0.007811993360519409, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008174613561205396, "l1_avg": 0.0006614904436800215, "l0_avg": 1.0 }, "original": { "l2_avg": 0.2907728994602199, "l1_avg": 0.21354526943630642, "l0_avg": 0.9999996609157986 }, "merged": { "l2_avg": 0.2907589494829624, "l1_avg": 0.21354526943630642, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.189761235897486e-05, "l1_avg": 3.254670235845778e-05, "l0_avg": 0.9998148600260417 }, "num_elements": 11796480, "num_changed": 11794296, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16247, 48033, 1163, 0, 0, 0, 0, 88, 16359, 47978, 1140, 0, 0, 0, 0 ], "lora_B": [ 667, 45466, 0, 0, 0, 0, 0, 0, 697, 45330, 0, 0, 0, 0, 0, 0 ], "original": [ 497, 116246, 281826, 946267, 4521942, 32711, 796, 0, 507, 116017, 281188, 946579, 4518556, 32605, 743, 0 ], "merged": [ 463, 115795, 281090, 943643, 4525205, 33234, 805, 0, 501, 115632, 280450, 943803, 4521956, 33146, 757, 0 ] } }, "model.layers.16.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010754222440161785, "l1_avg": 0.009295562903086344, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008380707378942058, "l1_avg": 0.0006784809520468116, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012426008882180027, "l1_avg": 0.00884959962632921, "l0_avg": 0.9999972025553385 }, "merged": { "l2_avg": 0.012424376201720745, "l1_avg": 0.008849745988845826, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.19389572826258e-05, "l1_avg": 4.063158865190214e-05, "l0_avg": 0.9999949137369791 }, "num_elements": 11796480, "num_changed": 11796420, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 39, 9825, 28946, 7396, 0, 0, 0, 0, 34, 9637, 28986, 7297, 0, 0, 0, 0 ], "lora_B": [ 899, 64615, 0, 0, 0, 0, 0, 0, 918, 64640, 0, 0, 0, 0, 0, 0 ], "original": [ 11428, 2057904, 2835067, 987541, 5696, 0, 0, 0, 11391, 2055752, 2836589, 989255, 5857, 0, 0, 0 ], "merged": [ 11387, 2050620, 2835680, 994143, 5814, 0, 0, 0, 11240, 2048636, 2836989, 996015, 5956, 0, 0, 0 ] } }, "model.layers.16.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010783799888867518, "l1_avg": 0.009323357211218939, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008077243692241609, "l1_avg": 0.0006530964747071266, "l0_avg": 1.0 }, "original": { "l2_avg": 0.07024229790131147, "l1_avg": 0.05422021018134223, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.07023685064329327, "l1_avg": 0.05422021547953288, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.036328135360163e-05, "l1_avg": 3.9342237222525806e-05, "l0_avg": 0.9999565972222222 }, "num_elements": 1474560, "num_changed": 1474496, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 24, 9689, 29038, 7440, 0, 0, 0, 0, 30, 9684, 28888, 7367, 0, 0, 0, 0 ], "lora_B": [ 119, 8149, 0, 0, 0, 0, 0, 0, 127, 7989, 0, 0, 0, 0, 0, 0 ], "original": [ 137, 36697, 107493, 340996, 252477, 7, 0, 0, 181, 36822, 107414, 339674, 252658, 4, 0, 0 ], "merged": [ 146, 36560, 107116, 340408, 253588, 7, 0, 0, 163, 36658, 107096, 339054, 253759, 5, 0, 0 ] } }, "model.layers.9.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020345050559855563, "l1_avg": 0.0017267492082383897, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007400667145908095, "l1_avg": 0.0005938712507486344, "l0_avg": 1.0 }, "original": { "l2_avg": 0.4148553036564064, "l1_avg": 0.36437964168595677, "l0_avg": 0.8745363956027561 }, "merged": { "l2_avg": 0.4149421299107254, "l1_avg": 0.3643848861882716, "l0_avg": 0.860901528817636 }, "diff": { "l2_avg": 0.02772940068136224, "l1_avg": 0.005290178840543017, "l0_avg": 0.0479514444610219 }, "num_elements": 1061683200, "num_changed": 50909243, "precision": "mxfp4", "fp4_dist_before": [ 66607189, 90297190, 109991496, 59727031, 86120660, 60029185, 42636497, 15684275, 66595412, 90260335, 109936286, 59677256, 85989572, 59950352, 42550165, 15630299 ], "fp4_dist_after": [ 73837658, 125838515, 109352768, 71864192, 72897421, 49050536, 24693390, 3548804, 73840852, 125789959, 109279706, 71790832, 72775488, 48953950, 24631267, 3537862 ], "bf16_dists": null }, "model.layers.14.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020342067030682943, "l1_avg": 0.0017263652549849616, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0006835913187648649, "l1_avg": 0.0005444025413857566, "l0_avg": 1.0 }, "original": { "l2_avg": 0.9330830025437243, "l1_avg": 0.8644075520833333, "l0_avg": 0.8731042970257041 }, "merged": { "l2_avg": 0.9332293813583925, "l1_avg": 0.8644338348765432, "l0_avg": 0.8582823868739752 }, "diff": { "l2_avg": 0.06390168802751904, "l1_avg": 0.01347087530442226, "l0_avg": 0.0520440975236304 }, "num_elements": 1061683200, "num_changed": 55254344, "precision": "mxfp4", "fp4_dist_before": [ 67360310, 90401748, 110219947, 59343140, 85925499, 60359081, 42283133, 14932280, 67362726, 90395052, 110233833, 59346468, 85921188, 60364266, 42297586, 14936943 ], "fp4_dist_after": [ 75241485, 126285790, 109439730, 69974127, 71347410, 48939414, 25768279, 3841336, 75217724, 126273802, 109450853, 69980775, 71369026, 48948179, 25767998, 3837272 ], "bf16_dists": null }, "model.layers.14.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010751217484384155, "l1_avg": 0.009306601683298747, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008445731666272964, "l1_avg": 0.0006879296153783799, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018561837408277722, "l1_avg": 0.016982670536747686, "l0_avg": 0.8819712344511056 }, "merged": { "l2_avg": 0.01856282154719035, "l1_avg": 0.016982640395929782, "l0_avg": 0.8683067806856132 }, "diff": { "l2_avg": 0.0012622382077905868, "l1_avg": 0.0002427173838203336, "l0_avg": 0.04881870787820698 }, "num_elements": 2123366400, "num_changed": 103660004, "precision": "mxfp4", "fp4_dist_before": [ 125306520, 171613091, 211204028, 121311484, 177977949, 130831830, 91561109, 31604574, 125311795, 171650705, 211275525, 121395091, 178100111, 130921446, 91658733, 31642409 ], "fp4_dist_after": [ 139816223, 240412978, 213397694, 146814059, 153130714, 107092617, 53726776, 7022226, 139816734, 240466115, 213499197, 146916221, 153247704, 107186603, 53790683, 7029856 ], "bf16_dists": null }, "model.layers.15.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020353762353972913, "l1_avg": 0.001727180348502265, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007042398115560536, "l1_avg": 0.0005618224127425088, "l0_avg": 1.0 }, "original": { "l2_avg": 1.0966820679313316, "l1_avg": 0.9744214771412038, "l0_avg": 0.8714152253704307 }, "merged": { "l2_avg": 1.0966710385693745, "l1_avg": 0.9744592737268518, "l0_avg": 0.8566949029616368 }, "diff": { "l2_avg": 0.0715931169884903, "l1_avg": 0.014769524468315972, "l0_avg": 0.05173106158221209 }, "num_elements": 1061683200, "num_changed": 54921999, "precision": "mxfp4", "fp4_dist_before": [ 68258298, 89918373, 111056508, 57915237, 85425974, 59722299, 42976867, 15588521, 68257997, 89897166, 111038858, 57925363, 85407605, 59720191, 42982014, 15591929 ], "fp4_dist_after": [ 76080984, 127273097, 110092815, 69698273, 70682613, 47789672, 25335209, 3919669, 76063630, 127239443, 110081876, 69716131, 70674099, 47781610, 25332304, 3921775 ], "bf16_dists": null }, "model.layers.15.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010794971099358985, "l1_avg": 0.009353300597932603, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008491722356807143, "l1_avg": 0.0006916301118002997, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018389564090304903, "l1_avg": 0.01674142531406732, "l0_avg": 0.8816441100320699 }, "merged": { "l2_avg": 0.018391214476691353, "l1_avg": 0.016741393289448302, "l0_avg": 0.868256911760495 }, "diff": { "l2_avg": 0.0012457914650440215, "l1_avg": 0.00023464929910353672, "l0_avg": 0.04782234380274643 }, "num_elements": 2123366400, "num_changed": 101544358, "precision": "mxfp4", "fp4_dist_before": [ 125635633, 172189629, 211656416, 121395968, 177632445, 129954763, 91104664, 31838432, 125677287, 172271558, 211762524, 121438577, 177726620, 130049056, 91173535, 31859293 ], "fp4_dist_after": [ 139856160, 240817435, 213659509, 147295484, 153262267, 106628000, 52981426, 6920735, 139882687, 240918083, 213792463, 147377247, 153339529, 106691697, 53016977, 6926701 ], "bf16_dists": null }, "model.layers.17.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010781254536613055, "l1_avg": 0.00932963424258762, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008015065104700625, "l1_avg": 0.0006461135344579816, "l0_avg": 1.0 }, "original": { "l2_avg": 0.030082485546881917, "l1_avg": 0.012234352694617378, "l0_avg": 0.9999857584635417 }, "merged": { "l2_avg": 0.030081982916384274, "l1_avg": 0.012234886487325033, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.949356861215071e-05, "l1_avg": 3.851168375048373e-05, "l0_avg": 0.9999918619791667 }, "num_elements": 1474560, "num_changed": 1474548, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 38, 9491, 28909, 7456, 0, 0, 0, 0, 32, 9732, 29044, 7458, 0, 0, 0, 0 ], "lora_B": [ 119, 8135, 0, 0, 0, 0, 0, 0, 128, 8002, 0, 0, 0, 0, 0, 0 ], "original": [ 4306, 416520, 186243, 95394, 35195, 0, 0, 0, 4175, 416123, 186018, 95222, 35364, 0, 0, 0 ], "merged": [ 4240, 415855, 186590, 95597, 35344, 0, 0, 0, 4195, 415401, 186418, 95409, 35511, 0, 0, 0 ] } }, "model.layers.17.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009044566835012579, "l1_avg": 0.007825983688235283, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000827565799843435, "l1_avg": 0.0006702174743016561, "l0_avg": 1.0 }, "original": { "l2_avg": 0.5381780029981432, "l1_avg": 0.3927080790201823, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.5381647105357373, "l1_avg": 0.3927080790201823, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.270639918544995e-05, "l1_avg": 3.361788888772329e-05, "l0_avg": 0.9996735466851129 }, "num_elements": 11796480, "num_changed": 11792629, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 58, 16211, 47819, 1190, 0, 0, 0, 0, 66, 16320, 48204, 1204, 0, 0, 0, 0 ], "lora_B": [ 670, 45300, 0, 0, 0, 0, 0, 0, 643, 45547, 0, 0, 0, 0, 0, 0 ], "original": [ 306, 71713, 170323, 550972, 4734174, 345332, 24966, 0, 289, 71458, 170337, 550862, 4735987, 345149, 24612, 0 ], "merged": [ 283, 71440, 169931, 549437, 4732792, 348581, 25326, 0, 299, 71185, 169868, 549294, 4734494, 348561, 24989, 0 ] } }, "model.layers.17.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010812328881769577, "l1_avg": 0.009353110525343154, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008217066477334067, "l1_avg": 0.0006615967722609639, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01324439273906315, "l1_avg": 0.00882129669189453, "l0_avg": 0.9999966091579862 }, "merged": { "l2_avg": 0.013242600122558875, "l1_avg": 0.008821503321329752, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.301265553778877e-05, "l1_avg": 4.052312837706672e-05, "l0_avg": 0.9999947441948784 }, "num_elements": 11796480, "num_changed": 11796418, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 26, 9382, 29173, 7383, 0, 0, 0, 0, 39, 9710, 29033, 7414, 0, 0, 0, 0 ], "lora_B": [ 969, 64575, 0, 0, 0, 0, 0, 0, 956, 64572, 0, 0, 0, 0, 0, 0 ], "original": [ 13041, 2393904, 2451004, 1028707, 13474, 0, 0, 0, 13071, 2392054, 2450078, 1027616, 13531, 0, 0, 0 ], "merged": [ 13035, 2387178, 2451784, 1034517, 13734, 0, 0, 0, 12978, 2385328, 2450725, 1033442, 13759, 0, 0, 0 ] } }, "model.layers.17.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010807801280490041, "l1_avg": 0.009336581495073106, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007837545708753169, "l1_avg": 0.0006331523763947189, "l0_avg": 1.0 }, "original": { "l2_avg": 0.05940683466552779, "l1_avg": 0.04598369068569607, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.059403033522389376, "l1_avg": 0.04598373836941189, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1365453061096615e-05, "l1_avg": 3.875713381502363e-05, "l0_avg": 0.9999735514322917 }, "num_elements": 1474560, "num_changed": 1474521, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 40, 9689, 28804, 7440, 0, 0, 0, 0, 35, 9667, 29146, 7339, 0, 0, 0, 0 ], "lora_B": [ 119, 8076, 0, 0, 0, 0, 0, 0, 133, 8056, 0, 0, 0, 0, 0, 0 ], "original": [ 179, 42896, 125131, 372468, 197351, 0, 0, 0, 167, 43028, 124970, 371552, 196818, 0, 0, 0 ], "merged": [ 166, 42726, 124757, 371917, 198448, 0, 0, 0, 166, 42857, 124578, 371047, 197898, 0, 0, 0 ] } }, "model.layers.18.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010788691897257842, "l1_avg": 0.00934318568971422, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008293594000861049, "l1_avg": 0.0006731941248290241, "l0_avg": 1.0 }, "original": { "l2_avg": 0.007072280480345502, "l1_avg": 0.00353697935740153, "l0_avg": 0.9999898274739584 }, "merged": { "l2_avg": 0.007072274197464282, "l1_avg": 0.0035372985733879937, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.11475555003302e-05, "l1_avg": 4.0268683288660314e-05, "l0_avg": 0.9999979654947917 }, "num_elements": 1474560, "num_changed": 1474557, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 37, 9553, 29357, 7493, 0, 0, 0, 0, 36, 9533, 28834, 7317, 0, 0, 0, 0 ], "lora_B": [ 125, 7979, 0, 0, 0, 0, 0, 0, 92, 8188, 0, 0, 0, 0, 0, 0 ], "original": [ 3152, 524365, 194848, 14208, 1002, 0, 0, 0, 3137, 524427, 194342, 14032, 1047, 0, 0, 0 ], "merged": [ 3103, 523034, 195967, 14314, 1011, 0, 0, 0, 3314, 523125, 195502, 14132, 1058, 0, 0, 0 ] } }, "model.layers.18.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009025835195027518, "l1_avg": 0.007814658805727959, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008121130043965656, "l1_avg": 0.0006571923693021139, "l0_avg": 1.0 }, "original": { "l2_avg": 0.361479594251785, "l1_avg": 0.2647099812825521, "l0_avg": 0.9999998304578993 }, "merged": { "l2_avg": 0.36146964267565224, "l1_avg": 0.2647099600897895, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.203388599105948e-05, "l1_avg": 3.243654241992367e-05, "l0_avg": 0.9997639973958333 }, "num_elements": 11796480, "num_changed": 11793696, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16298, 48046, 939, 0, 0, 0, 0, 71, 16184, 48563, 907, 0, 0, 0, 0 ], "lora_B": [ 615, 45394, 0, 0, 0, 0, 0, 0, 670, 45481, 0, 0, 0, 0, 0, 0 ], "original": [ 390, 97881, 236637, 781864, 4688614, 87762, 3358, 0, 385, 97598, 236245, 782360, 4692084, 88020, 3282, 0 ], "merged": [ 380, 97511, 236027, 779758, 4690525, 88891, 3413, 0, 402, 97218, 235653, 780200, 4693984, 89160, 3358, 0 ] } }, "model.layers.18.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010797414107112224, "l1_avg": 0.009344889058007134, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000841873832427806, "l1_avg": 0.000681223114952445, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014272201740192196, "l1_avg": 0.010075535376866658, "l0_avg": 0.9999966091579862 }, "merged": { "l2_avg": 0.014269686079484525, "l1_avg": 0.010075675116644965, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1928210711894386e-05, "l1_avg": 4.086023869199885e-05, "l0_avg": 0.9999935574001736 }, "num_elements": 11796480, "num_changed": 11796404, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 38, 9570, 28801, 7413, 0, 0, 0, 0, 42, 9575, 29143, 7578, 0, 0, 0, 0 ], "lora_B": [ 901, 64499, 0, 0, 0, 0, 0, 0, 923, 64749, 0, 0, 0, 0, 0, 0 ], "original": [ 10608, 1886063, 2750879, 1240891, 12533, 0, 0, 0, 10547, 1884531, 2749336, 1238256, 12836, 0, 0, 0 ], "merged": [ 10544, 1879586, 2750261, 1247868, 12731, 0, 0, 0, 10346, 1878072, 2748541, 1245464, 13067, 0, 0, 0 ] } }, "model.layers.18.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010781033850410186, "l1_avg": 0.009321710798475477, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.00081983907148242, "l1_avg": 0.0006643868982791901, "l0_avg": 1.0 }, "original": { "l2_avg": 0.07312905563841478, "l1_avg": 0.05378440221150716, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.07312435604326183, "l1_avg": 0.05378443929884169, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1688950884252e-05, "l1_avg": 4.0249060839414595e-05, "l0_avg": 0.9999559190538194 }, "num_elements": 1474560, "num_changed": 1474495, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 51, 9559, 29133, 7398, 0, 0, 0, 0, 31, 9711, 28955, 7322, 0, 0, 0, 0 ], "lora_B": [ 117, 8045, 0, 0, 0, 0, 0, 0, 106, 8116, 0, 0, 0, 0, 0, 0 ], "original": [ 133, 36863, 107838, 345870, 246860, 113, 10, 0, 155, 36876, 107874, 345474, 246374, 113, 7, 0 ], "merged": [ 141, 36707, 107504, 345243, 247985, 113, 10, 0, 137, 36742, 107493, 344844, 247521, 113, 7, 0 ] } }, "model.layers.16.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020295996564389704, "l1_avg": 0.0017235984404881796, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000659751416823711, "l1_avg": 0.0005228545102808211, "l0_avg": 1.0 }, "original": { "l2_avg": 1.3198592889810008, "l1_avg": 1.0984167631172839, "l0_avg": 0.8711057338008175 }, "merged": { "l2_avg": 1.3196884537550368, "l1_avg": 1.0984665557484568, "l0_avg": 0.8571233386757933 }, "diff": { "l2_avg": 0.08011782473519402, "l1_avg": 0.015762171804169077, "l0_avg": 0.049155537169656635 }, "num_elements": 1061683200, "num_changed": 52187608, "precision": "mxfp4", "fp4_dist_before": [ 68420399, 89990065, 111405288, 57619710, 85013275, 59052316, 43114652, 16204592, 68424478, 89983239, 111408582, 57629111, 85012867, 59065512, 43125672, 16213442 ], "fp4_dist_after": [ 75845032, 127548840, 110329097, 70534357, 70965847, 47293305, 24480939, 3821395, 75844719, 127561810, 110322145, 70543267, 70970411, 47306331, 24491966, 3823739 ], "bf16_dists": null }, "model.layers.16.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010712245290754369, "l1_avg": 0.009262936645083958, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008458043168364604, "l1_avg": 0.0006850313809182908, "l0_avg": 0.984375 }, "original": { "l2_avg": 0.01815070178773668, "l1_avg": 0.01661700025016879, "l0_avg": 0.8815845654334551 }, "merged": { "l2_avg": 0.01815212302737766, "l1_avg": 0.01661699836636767, "l0_avg": 0.8683593778257017 }, "diff": { "l2_avg": 0.0012279050217734442, "l1_avg": 0.00023090762856565875, "l0_avg": 0.04724370038067853 }, "num_elements": 2123366400, "num_changed": 100315686, "precision": "mxfp4", "fp4_dist_before": [ 125730596, 171992278, 211473485, 121331114, 177709804, 130184270, 91234280, 31794566, 125708759, 172029160, 211547056, 121373456, 177827209, 130278634, 91306109, 31845624 ], "fp4_dist_after": [ 141665625, 239545996, 213517519, 146798636, 153641965, 107090361, 53740133, 7343460, 137855649, 239606022, 213616701, 146847108, 153737373, 107214939, 53792572, 7352341 ], "bf16_dists": null }, "model.layers.17.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020334435360202774, "l1_avg": 0.0017258319589826797, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007654136735797253, "l1_avg": 0.0006141725099749035, "l0_avg": 1.0 }, "original": { "l2_avg": 1.4744728410781853, "l1_avg": 1.234739703896605, "l0_avg": 0.8705659126941069 }, "merged": { "l2_avg": 1.474410980743731, "l1_avg": 1.2347837094907408, "l0_avg": 0.8569933752366055 }, "diff": { "l2_avg": 0.09136422235688337, "l1_avg": 0.017614512502411264, "l0_avg": 0.047382826628508394 }, "num_elements": 1061683200, "num_changed": 50305551, "precision": "mxfp4", "fp4_dist_before": [ 68719342, 91680849, 111432087, 58491550, 84470320, 58058938, 42054794, 15933286, 68698654, 91688818, 111457007, 58487814, 84477738, 58043059, 42057664, 15931280 ], "fp4_dist_after": [ 75909353, 128010369, 110030724, 70901430, 71153892, 47201423, 23972664, 3645896, 75918378, 128028839, 110043926, 70895428, 71162038, 47194441, 23972482, 3641917 ], "bf16_dists": null }, "model.layers.17.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010775115705773539, "l1_avg": 0.009321137269337972, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008544744965807348, "l1_avg": 0.0006991783777872721, "l0_avg": 0.9999986436631945 }, "original": { "l2_avg": 0.019258979956309, "l1_avg": 0.01748260498046875, "l0_avg": 0.8783673764452522 }, "merged": { "l2_avg": 0.019262904591030545, "l1_avg": 0.017482627586082176, "l0_avg": 0.8647023533008716 }, "diff": { "l2_avg": 0.0013654618627495236, "l1_avg": 0.00025507093947610735, "l0_avg": 0.04844469235267168 }, "num_elements": 2123366400, "num_changed": 102865832, "precision": "mxfp4", "fp4_dist_before": [ 129145097, 174731831, 212797318, 120437417, 175655317, 127677667, 89512625, 31400883, 129125529, 174769810, 212892374, 120512799, 175778616, 127814407, 89665837, 31448873 ], "fp4_dist_after": [ 143639987, 243501426, 213941359, 145489939, 150807107, 104625741, 52351790, 6992314, 143646490, 243566769, 214034833, 145593481, 150966056, 104763414, 52444755, 7000939 ], "bf16_dists": null }, "model.layers.18.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010756706400594887, "l1_avg": 0.009315861596001519, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008594910827252599, "l1_avg": 0.000702807969517178, "l0_avg": 0.9999986436631945 }, "original": { "l2_avg": 0.019086371527777778, "l1_avg": 0.01711915381160783, "l0_avg": 0.8772385142761984 }, "merged": { "l2_avg": 0.01909152931637234, "l1_avg": 0.01711909352997203, "l0_avg": 0.8637074482293776 }, "diff": { "l2_avg": 0.0013843056228425767, "l1_avg": 0.00025027781356999905, "l0_avg": 0.047845385516131364 }, "num_elements": 2123366400, "num_changed": 101593284, "precision": "mxfp4", "fp4_dist_before": [ 130335980, 175821079, 213378146, 120116975, 174846146, 126491522, 88880063, 31511443, 130331634, 175850136, 213466302, 120201354, 174964504, 126631423, 88994147, 31545546 ], "fp4_dist_after": [ 144681951, 244524862, 214137973, 145341273, 150291586, 103779213, 51664333, 6942806, 144717074, 244573429, 214252526, 145440297, 150422525, 103921373, 51733299, 6941880 ], "bf16_dists": null }, "model.layers.0.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010798477484758796, "l1_avg": 0.009353333049350314, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008446850115433335, "l1_avg": 0.000685103761497885, "l0_avg": 1.0 }, "original": { "l2_avg": 0.059868456797442786, "l1_avg": 0.037969263394673665, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.05986558552072501, "l1_avg": 0.03796927928924561, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.221639385421339e-05, "l1_avg": 4.114976101037529e-05, "l0_avg": 0.9999796549479166 }, "num_elements": 1474560, "num_changed": 1474530, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9514, 29022, 7496, 0, 0, 0, 0, 53, 9484, 29118, 7442, 0, 0, 0, 0 ], "lora_B": [ 110, 8080, 0, 0, 0, 0, 0, 0, 102, 8092, 0, 0, 0, 0, 0, 0 ], "original": [ 323, 83050, 201999, 316920, 134946, 7, 0, 0, 332, 83148, 201113, 317945, 134768, 9, 0, 0 ], "merged": [ 319, 82673, 201545, 317131, 135550, 7, 0, 0, 300, 82852, 200611, 318183, 135380, 9, 0, 0 ] } }, "model.layers.0.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00903690137693966, "l1_avg": 0.00781862810254097, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008328965300489859, "l1_avg": 0.0006743582172526254, "l0_avg": 1.0 }, "original": { "l2_avg": 0.009141611686925394, "l1_avg": 0.005614105198118422, "l0_avg": 0.9999962700737848 }, "merged": { "l2_avg": 0.009140441043929418, "l1_avg": 0.00561422242058648, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.286590571469512e-05, "l1_avg": 3.393135137028164e-05, "l0_avg": 0.9999964396158855 }, "num_elements": 11796480, "num_changed": 11796438, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 57, 16072, 47879, 1069, 0, 0, 0, 0, 66, 16516, 48292, 1121, 0, 0, 0, 0 ], "lora_B": [ 636, 45595, 0, 0, 0, 0, 0, 0, 652, 45277, 0, 0, 0, 0, 0, 0 ], "original": [ 13215, 2960866, 2621722, 296628, 8369, 5, 0, 0, 13105, 2955695, 2621019, 297443, 8408, 5, 0, 0 ], "merged": [ 13147, 2950970, 2629087, 299133, 8457, 5, 0, 0, 13271, 2945341, 2628539, 300044, 8481, 5, 0, 0 ] } }, "model.layers.0.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010821576497566024, "l1_avg": 0.009367282523049249, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008571080248490337, "l1_avg": 0.0006965264910832047, "l0_avg": 1.0 }, "original": { "l2_avg": 0.034717892611691836, "l1_avg": 0.02165618207719591, "l0_avg": 0.9999969482421875 }, "merged": { "l2_avg": 0.034712899052953786, "l1_avg": 0.021656303935580785, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3364710938907475e-05, "l1_avg": 4.182119543353716e-05, "l0_avg": 0.9999865214029948 }, "num_elements": 11796480, "num_changed": 11796321, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 34, 9577, 28882, 7537, 0, 0, 0, 0, 45, 9524, 29010, 7551, 0, 0, 0, 0 ], "lora_B": [ 871, 64593, 0, 0, 0, 0, 0, 0, 900, 64708, 0, 0, 0, 0, 0, 0 ], "original": [ 7134, 1342605, 2070068, 2029911, 448233, 0, 0, 0, 7071, 1342191, 2070960, 2028873, 449434, 0, 0, 0 ], "merged": [ 6963, 1338021, 2068240, 2033713, 451151, 0, 0, 0, 6914, 1337339, 2069299, 2032381, 452459, 0, 0, 0 ] } }, "model.layers.0.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01078317317146577, "l1_avg": 0.009338992171817356, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008247842197306454, "l1_avg": 0.0006674939068034291, "l0_avg": 1.0 }, "original": { "l2_avg": 0.05638893434870864, "l1_avg": 0.03865005175272624, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.056385899717079135, "l1_avg": 0.038650112681918675, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.0548770536822374e-05, "l1_avg": 3.948757787131601e-05, "l0_avg": 0.9999708387586805 }, "num_elements": 1474560, "num_changed": 1474517, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9548, 28960, 7354, 0, 0, 0, 0, 37, 9573, 29063, 7590, 0, 0, 0, 0 ], "lora_B": [ 117, 8110, 0, 0, 0, 0, 0, 0, 120, 8037, 0, 0, 0, 0, 0, 0 ], "original": [ 284, 69216, 186648, 339684, 142623, 0, 0, 0, 268, 69231, 185226, 339473, 141907, 0, 0, 0 ], "merged": [ 287, 68867, 186205, 339763, 143321, 0, 0, 0, 282, 68922, 184728, 339604, 142581, 0, 0, 0 ] } }, "model.layers.1.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01076959115298732, "l1_avg": 0.009315343697865804, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008497001836076379, "l1_avg": 0.0006915585254319012, "l0_avg": 1.0 }, "original": { "l2_avg": 0.03044991472353893, "l1_avg": 0.01929946608013577, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.030448488509501874, "l1_avg": 0.019299515088399253, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.228140694934287e-05, "l1_avg": 4.098148395617803e-05, "l0_avg": 0.9999857584635417 }, "num_elements": 1474560, "num_changed": 1474539, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 50, 9655, 29102, 7362, 0, 0, 0, 0, 39, 9708, 28817, 7427, 0, 0, 0, 0 ], "lora_B": [ 104, 8162, 0, 0, 0, 0, 0, 0, 109, 8009, 0, 0, 0, 0, 0, 0 ], "original": [ 596, 142274, 286225, 275904, 32022, 10, 0, 0, 577, 142552, 286365, 276159, 31873, 3, 0, 0 ], "merged": [ 572, 141723, 285812, 276589, 32316, 11, 0, 0, 579, 141979, 285994, 276839, 32143, 3, 0, 0 ] } }, "model.layers.1.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009022557619011287, "l1_avg": 0.007807549089193344, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008140446467768076, "l1_avg": 0.0006586556633313497, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01851023148704291, "l1_avg": 0.01091019180085924, "l0_avg": 0.9999983045789931 }, "merged": { "l2_avg": 0.018508231175813548, "l1_avg": 0.010910280545552571, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.186353744313938e-05, "l1_avg": 3.289390200128158e-05, "l0_avg": 0.9999905056423611 }, "num_elements": 11796480, "num_changed": 11796368, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 68, 16272, 48095, 971, 0, 0, 0, 0, 67, 16428, 48170, 1001, 0, 0, 0, 0 ], "lora_B": [ 689, 45246, 0, 0, 0, 0, 0, 0, 652, 45573, 0, 0, 0, 0, 0, 0 ], "original": [ 7342, 1782836, 2919966, 1115463, 69215, 9, 0, 0, 7292, 1786070, 2921362, 1117816, 69099, 10, 0, 0 ], "merged": [ 7339, 1775789, 2920455, 1121574, 69747, 9, 0, 0, 7290, 1778842, 2921817, 1124045, 69563, 10, 0, 0 ] } }, "model.layers.1.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010765749956481127, "l1_avg": 0.009311509132385255, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008839111155961843, "l1_avg": 0.000722558528650552, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01937174032139069, "l1_avg": 0.013153917259640165, "l0_avg": 0.9999970330132378 }, "merged": { "l2_avg": 0.019368510501815456, "l1_avg": 0.01315411196814643, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.586461839735528e-05, "l1_avg": 4.227905948128965e-05, "l0_avg": 0.9999905904134114 }, "num_elements": 11796480, "num_changed": 11796369, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 37, 9755, 29072, 7391, 0, 0, 0, 0, 25, 9689, 28823, 7368, 0, 0, 0, 0 ], "lora_B": [ 821, 65251, 0, 0, 0, 0, 0, 0, 810, 64190, 0, 0, 0, 0, 0, 0 ], "original": [ 8421, 1632174, 2533661, 1646550, 75263, 0, 0, 0, 8200, 1632601, 2533152, 1650806, 75652, 0, 0, 0 ], "merged": [ 7821, 1626545, 2532398, 1652824, 76210, 0, 0, 0, 7896, 1627075, 2532017, 1657058, 76636, 0, 0, 0 ] } }, "model.layers.1.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010780751120755262, "l1_avg": 0.00933414101600647, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008545542368665338, "l1_avg": 0.0006911020027473569, "l0_avg": 1.0 }, "original": { "l2_avg": 0.03547329816018445, "l1_avg": 0.026410606172349717, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.03547099548421713, "l1_avg": 0.026410653856065537, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.266296313533921e-05, "l1_avg": 3.9556312064329786e-05, "l0_avg": 0.9999837239583333 }, "num_elements": 1474560, "num_changed": 1474536, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 43, 9471, 29022, 7499, 0, 0, 0, 0, 45, 9627, 29137, 7316, 0, 0, 0, 0 ], "lora_B": [ 113, 8048, 0, 0, 0, 0, 0, 0, 117, 8106, 0, 0, 0, 0, 0, 0 ], "original": [ 353, 84628, 221687, 371164, 60560, 0, 0, 0, 327, 83788, 221694, 369593, 60766, 0, 0, 0 ], "merged": [ 362, 84218, 221138, 371539, 61100, 0, 0, 0, 344, 83431, 221144, 369978, 61306, 0, 0, 0 ] } }, "model.layers.19.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010771741469085041, "l1_avg": 0.009318746460808647, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008334285812452435, "l1_avg": 0.0006744927377440035, "l0_avg": 1.0 }, "original": { "l2_avg": 0.047589872291237505, "l1_avg": 0.019360958205329046, "l0_avg": 0.9999891493055556 }, "merged": { "l2_avg": 0.04758900525362907, "l1_avg": 0.019361480077107748, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.107678412001961e-05, "l1_avg": 4.0284583034614724e-05, "l0_avg": 0.9999864366319444 }, "num_elements": 1474560, "num_changed": 1474540, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 38, 9583, 28813, 7395, 0, 0, 0, 0, 38, 9641, 29237, 7415, 0, 0, 0, 0 ], "lora_B": [ 119, 8023, 0, 0, 0, 0, 0, 0, 108, 8134, 0, 0, 0, 0, 0, 0 ], "original": [ 4086, 369143, 170869, 129671, 63607, 37, 0, 0, 4037, 369107, 170832, 129681, 63448, 42, 0, 0 ], "merged": [ 3751, 368796, 171085, 129829, 63840, 37, 0, 0, 3839, 368794, 170997, 129851, 63699, 42, 0, 0 ] } }, "model.layers.19.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009030801279162216, "l1_avg": 0.007816169410943985, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008281975239161541, "l1_avg": 0.00067057932416598, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7311691677304214, "l1_avg": 0.5213995191786024, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.7311629835366817, "l1_avg": 0.5213994344075521, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.2464876841466547e-05, "l1_avg": 3.348399833258655e-05, "l0_avg": 0.9995762295193142 }, "num_elements": 11796480, "num_changed": 11791481, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 58, 16337, 48220, 1147, 0, 0, 0, 0, 60, 16334, 47854, 1062, 0, 0, 0, 0 ], "lora_B": [ 615, 45317, 0, 0, 0, 0, 0, 0, 652, 45576, 0, 0, 0, 0, 0, 0 ], "original": [ 187, 48075, 130528, 436156, 4502758, 685366, 94651, 0, 184, 48004, 130417, 437192, 4501584, 686436, 94942, 0 ], "merged": [ 187, 47885, 130178, 434859, 4498588, 690182, 95839, 0, 196, 47794, 130033, 436027, 4497338, 691245, 96129, 0 ] } }, "model.layers.19.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010805943118369075, "l1_avg": 0.009346950716442532, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008489577133821126, "l1_avg": 0.0006884735776111484, "l0_avg": 1.0 }, "original": { "l2_avg": 0.011698729705591885, "l1_avg": 0.007001764244503445, "l0_avg": 0.9999925401475694 }, "merged": { "l2_avg": 0.01169727362118228, "l1_avg": 0.0070021079646216495, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.377887258145538e-05, "l1_avg": 4.1669896907276575e-05, "l0_avg": 0.9999945746527777 }, "num_elements": 11796480, "num_changed": 11796416, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9625, 28769, 7490, 0, 0, 0, 0, 38, 9663, 29138, 7401, 0, 0, 0, 0 ], "lora_B": [ 865, 64573, 0, 0, 0, 0, 0, 0, 922, 64712, 0, 0, 0, 0, 0, 0 ], "original": [ 22158, 3078070, 2042020, 740699, 12011, 1, 0, 0, 22114, 3077252, 2050319, 740043, 11790, 3, 0, 0 ], "merged": [ 22221, 3070856, 2044654, 744949, 12173, 1, 0, 0, 22390, 3070168, 2052765, 744339, 11961, 3, 0, 0 ] } }, "model.layers.19.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010792192247457822, "l1_avg": 0.009332721100913153, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008080072002485394, "l1_avg": 0.0006506533827632666, "l0_avg": 1.0 }, "original": { "l2_avg": 0.08165449193585121, "l1_avg": 0.05492275026109483, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.08164868027072222, "l1_avg": 0.05492277675204807, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.056056934598942e-05, "l1_avg": 3.880917922490173e-05, "l0_avg": 0.9999525282118056 }, "num_elements": 1474560, "num_changed": 1474490, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 30, 9736, 28784, 7382, 0, 0, 0, 0, 40, 9540, 29241, 7407, 0, 0, 0, 0 ], "lora_B": [ 112, 8063, 0, 0, 0, 0, 0, 0, 122, 8087, 0, 0, 0, 0, 0, 0 ], "original": [ 161, 38664, 112157, 344442, 242247, 205, 50, 0, 143, 38823, 111909, 345011, 240504, 193, 51, 0 ], "merged": [ 155, 38460, 111797, 343970, 243264, 206, 50, 0, 137, 38723, 111554, 344399, 241600, 192, 53, 0 ] } }, "model.layers.2.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010807868821463163, "l1_avg": 0.009362379047605727, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000862315995618701, "l1_avg": 0.0006977969314903021, "l0_avg": 1.0 }, "original": { "l2_avg": 0.020890688437917828, "l1_avg": 0.015108088652292887, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.02088960307018698, "l1_avg": 0.015108156204223632, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.30222592695113e-05, "l1_avg": 4.152200288242764e-05, "l0_avg": 0.9999857584635417 }, "num_elements": 1474560, "num_changed": 1474539, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 29, 9425, 28849, 7490, 0, 0, 0, 0, 42, 9632, 29197, 7496, 0, 0, 0, 0 ], "lora_B": [ 93, 8143, 0, 0, 0, 0, 0, 0, 108, 8040, 0, 0, 0, 0, 0, 0 ], "original": [ 595, 148291, 321288, 258567, 9109, 0, 0, 0, 591, 147147, 321436, 258426, 9110, 0, 0, 0 ], "merged": [ 572, 147624, 320949, 259468, 9225, 0, 0, 0, 543, 146520, 321045, 259388, 9226, 0, 0, 0 ] } }, "model.layers.2.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009052571443507108, "l1_avg": 0.007835904136300087, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008091770336362221, "l1_avg": 0.0006512484616703457, "l0_avg": 1.0 }, "original": { "l2_avg": 0.023347590463821993, "l1_avg": 0.01422597434785631, "l0_avg": 0.9999983045789931 }, "merged": { "l2_avg": 0.023345855602000632, "l1_avg": 0.014226052496168349, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.201076589601399e-05, "l1_avg": 3.29847266483638e-05, "l0_avg": 0.9999884711371527 }, "num_elements": 11796480, "num_changed": 11796344, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 62, 16290, 48420, 913, 0, 0, 0, 0, 79, 16336, 48010, 962, 0, 0, 0, 0 ], "lora_B": [ 700, 45278, 0, 0, 0, 0, 0, 0, 703, 45479, 0, 0, 0, 0, 0, 0 ], "original": [ 5129, 1295939, 2795480, 1697419, 105329, 38, 2, 0, 5194, 1296784, 2793843, 1695274, 106024, 21, 4, 0 ], "merged": [ 5238, 1290443, 2792705, 1704793, 106142, 38, 2, 0, 5152, 1291185, 2791308, 1702605, 106844, 21, 4, 0 ] } }, "model.layers.2.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010786100994114532, "l1_avg": 0.00933652851316664, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008615142638338825, "l1_avg": 0.0006994551513344049, "l0_avg": 1.0 }, "original": { "l2_avg": 0.017610372432573646, "l1_avg": 0.012524637911054824, "l0_avg": 0.9999979654947917 }, "merged": { "l2_avg": 0.017606794974233952, "l1_avg": 0.012524739901224772, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.355273471054973e-05, "l1_avg": 4.136233797503842e-05, "l0_avg": 0.9999916076660156 }, "num_elements": 11796480, "num_changed": 11796381, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 30, 9647, 29131, 7284, 0, 0, 0, 0, 31, 9450, 29169, 7418, 0, 0, 0, 0 ], "lora_B": [ 868, 64836, 0, 0, 0, 0, 0, 0, 907, 64461, 0, 0, 0, 0, 0, 0 ], "original": [ 6894, 1566063, 2628654, 1662959, 36419, 0, 0, 0, 6984, 1566508, 2625887, 1659589, 36523, 0, 0, 0 ], "merged": [ 7088, 1559741, 2626954, 1670139, 37005, 0, 0, 0, 7034, 1560659, 2623747, 1667021, 37092, 0, 0, 0 ] } }, "model.layers.2.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010801776782759719, "l1_avg": 0.009354958269331191, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008587405900470912, "l1_avg": 0.0006984120118431747, "l0_avg": 1.0 }, "original": { "l2_avg": 0.040144252799086934, "l1_avg": 0.03073741594950358, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.04014148833134991, "l1_avg": 0.030737471580505372, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3371517519233415e-05, "l1_avg": 4.1608077784379326e-05, "l0_avg": 0.9999776204427083 }, "num_elements": 1474560, "num_changed": 1474527, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 39, 9561, 29098, 7329, 0, 0, 0, 0, 46, 9441, 29115, 7531, 0, 0, 0, 0 ], "lora_B": [ 101, 8104, 0, 0, 0, 0, 0, 0, 98, 8081, 0, 0, 0, 0, 0, 0 ], "original": [ 263, 68588, 187304, 395072, 85581, 0, 0, 0, 281, 68581, 187478, 395537, 85875, 0, 0, 0 ], "merged": [ 262, 68330, 186730, 395143, 86355, 0, 0, 0, 251, 68262, 186987, 395601, 86639, 0, 0, 0 ] } }, "model.layers.0.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020434163536590263, "l1_avg": 0.0017323258850309583, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007847534618466842, "l1_avg": 0.0006307920647992028, "l0_avg": 1.0 }, "original": { "l2_avg": 0.02562208822690981, "l1_avg": 0.01792925610954379, "l0_avg": 0.8817025851025994 }, "merged": { "l2_avg": 0.025623601767069668, "l1_avg": 0.017929197711709106, "l0_avg": 0.8683371979513286 }, "diff": { "l2_avg": 0.0014872800975948542, "l1_avg": 0.0002452447090619876, "l0_avg": 0.04798251493477527 }, "num_elements": 1061683200, "num_changed": 50942230, "precision": "mxfp4", "fp4_dist_before": [ 62803656, 85527349, 107102535, 60101440, 88946557, 64407995, 45759626, 16371796, 62790722, 85509837, 107030932, 60048466, 88899364, 64338850, 45697037, 16347038 ], "fp4_dist_after": [ 69902776, 121217066, 108097698, 73838855, 76226563, 52212063, 25984885, 3545865, 69881409, 121188151, 108022973, 73761002, 76163216, 52147888, 25951364, 3541426 ], "bf16_dists": null }, "model.layers.0.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010735510431965663, "l1_avg": 0.00929771794213189, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008559440035962021, "l1_avg": 0.0006972139080365498, "l0_avg": 1.0 }, "original": { "l2_avg": 0.024459375275505915, "l1_avg": 0.022107131392867477, "l0_avg": 0.7638229308893651 }, "merged": { "l2_avg": 0.024462191263834636, "l1_avg": 0.022107153998480903, "l0_avg": 0.7490986463758681 }, "diff": { "l2_avg": 0.0016972454057799446, "l1_avg": 0.00034829157370108146, "l0_avg": 0.046622724650818625 }, "num_elements": 2123366400, "num_changed": 98997127, "precision": "mxfp4", "fp4_dist_before": [ 250577062, 181662495, 195614547, 93108623, 137137778, 96395041, 76011042, 31030367, 250913391, 181781448, 195652756, 93087531, 137062370, 96352623, 75977201, 31002125 ], "fp4_dist_after": [ 266371084, 247404620, 185170421, 113537281, 115783205, 81303367, 44676309, 7459471, 266384420, 247529138, 185164900, 113498264, 115722811, 81264596, 44648261, 7448252 ], "bf16_dists": null }, "model.layers.18.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020337355026024094, "l1_avg": 0.0017263147566053602, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007730728493826013, "l1_avg": 0.0006208479404449463, "l0_avg": 1.0 }, "original": { "l2_avg": 1.6197555529597611, "l1_avg": 1.3785572193287037, "l0_avg": 0.8703648687292028 }, "merged": { "l2_avg": 1.6197987113326364, "l1_avg": 1.3786207561728394, "l0_avg": 0.8566580077748239 }, "diff": { "l2_avg": 0.10484845653988022, "l1_avg": 0.020308334915726273, "l0_avg": 0.04766754527150849 }, "num_elements": 1061683200, "num_changed": 50607832, "precision": "mxfp4", "fp4_dist_before": [ 68817216, 92768879, 111281098, 59354446, 84297054, 57864172, 41148504, 15403846, 68814225, 92774908, 111237382, 59325838, 84254229, 57831750, 41117489, 15392164 ], "fp4_dist_after": [ 76083938, 128045280, 109755004, 70733720, 71067542, 47547063, 24092076, 3602854, 76099847, 128036885, 109702831, 70701751, 71023133, 47518089, 24071957, 3601230 ], "bf16_dists": null }, "model.layers.19.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020343403996059043, "l1_avg": 0.001726949049366845, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008067386867025474, "l1_avg": 0.0006500302917427487, "l0_avg": 1.0 }, "original": { "l2_avg": 2.1149439337360763, "l1_avg": 1.8078583140432098, "l0_avg": 0.8678013422459732 }, "merged": { "l2_avg": 2.1152376504403656, "l1_avg": 1.8079705584490742, "l0_avg": 0.8531920746226369 }, "diff": { "l2_avg": 0.13997731904043598, "l1_avg": 0.028094525749300735, "l0_avg": 0.050642707730516975 }, "num_elements": 1061683200, "num_changed": 53766512, "precision": "mxfp4", "fp4_dist_before": [ 70175799, 93286448, 111885747, 58693001, 83760075, 57667424, 40637343, 14862850, 70177295, 93265899, 111839588, 58648589, 83730406, 57622631, 40585390, 14844715 ], "fp4_dist_after": [ 77929117, 129034136, 109976540, 68950587, 69441585, 47079330, 24716247, 3839629, 77934391, 129003357, 109931760, 68896437, 69398087, 47035526, 24681727, 3834744 ], "bf16_dists": null }, "model.layers.19.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010753939729149939, "l1_avg": 0.009303849273257786, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008609100321809058, "l1_avg": 0.0007055915892124176, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019795747598012288, "l1_avg": 0.01771171287254051, "l0_avg": 0.8752191218623409 }, "merged": { "l2_avg": 0.01980352799097697, "l1_avg": 0.017711733594352817, "l0_avg": 0.8614886625313465 }, "diff": { "l2_avg": 0.0014699013696776496, "l1_avg": 0.0002659203682416751, "l0_avg": 0.048263445253725404 }, "num_elements": 2123366400, "num_changed": 102480978, "precision": "mxfp4", "fp4_dist_before": [ 132464471, 177746538, 213528794, 119948282, 173575667, 125280163, 87577241, 30902589, 132491053, 177848387, 213689448, 120119886, 173828897, 125524217, 87833441, 31007326 ], "fp4_dist_after": [ 147053610, 245780799, 213762671, 144099112, 148873534, 103020221, 51506316, 6944096, 147056710, 245934201, 213957736, 144319539, 149147096, 103260461, 51677967, 6972331 ], "bf16_dists": null }, "model.layers.10.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010779273072948136, "l1_avg": 0.009317547082901001, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008070803014561534, "l1_avg": 0.0006529029924422503, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012064083014536094, "l1_avg": 0.008005122343699137, "l0_avg": 0.9999952528211805 }, "merged": { "l2_avg": 0.012063501848023198, "l1_avg": 0.008005173338784113, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.981275309009318e-05, "l1_avg": 3.933426406648424e-05, "l0_avg": 0.9999959309895833 }, "num_elements": 1474560, "num_changed": 1474554, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 24, 9621, 29104, 7426, 0, 0, 0, 0, 44, 9861, 28592, 7488, 0, 0, 0, 0 ], "lora_B": [ 106, 8023, 0, 0, 0, 0, 0, 0, 124, 8131, 0, 0, 0, 0, 0, 0 ], "original": [ 1212, 278210, 365599, 90652, 2017, 0, 0, 0, 1214, 278001, 364742, 90883, 2030, 0, 0, 0 ], "merged": [ 1212, 277131, 365974, 91355, 2026, 0, 0, 0, 1258, 276950, 364976, 91635, 2043, 0, 0, 0 ] } }, "model.layers.10.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00904432449063364, "l1_avg": 0.007823658175766468, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008342810218279381, "l1_avg": 0.0006761181271738476, "l0_avg": 1.0 }, "original": { "l2_avg": 0.12095067440817482, "l1_avg": 0.08636985884772407, "l0_avg": 0.9999997456868489 }, "merged": { "l2_avg": 0.12094122929618452, "l1_avg": 0.086369874742296, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3402644209432986e-05, "l1_avg": 3.4016924392845894e-05, "l0_avg": 0.999927266438802 }, "num_elements": 11796480, "num_changed": 11795622, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 54, 16255, 47906, 1233, 0, 0, 0, 0, 66, 16209, 48124, 1225, 0, 0, 0, 0 ], "lora_B": [ 668, 45553, 0, 0, 0, 0, 0, 0, 641, 45298, 0, 0, 0, 0, 0, 0 ], "original": [ 838, 215640, 592731, 2007095, 3082792, 1515, 374, 0, 826, 215191, 593440, 2005673, 3078432, 1537, 396, 0 ], "merged": [ 898, 214707, 590985, 2002282, 3090204, 1528, 376, 0, 856, 214219, 591704, 2000858, 3085919, 1546, 398, 0 ] } }, "model.layers.10.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010758329873759697, "l1_avg": 0.009296438429090711, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008466332982231241, "l1_avg": 0.0006857975386083126, "l0_avg": 1.0 }, "original": { "l2_avg": 0.013016171777529844, "l1_avg": 0.009479468398623996, "l0_avg": 0.9999971177842882 }, "merged": { "l2_avg": 0.01301415702762975, "l1_avg": 0.009479599528842503, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2514200345486903e-05, "l1_avg": 4.087995168649488e-05, "l0_avg": 0.9999943203396268 }, "num_elements": 11796480, "num_changed": 11796413, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9684, 29003, 7391, 0, 0, 0, 0, 38, 9810, 28846, 7356, 0, 0, 0, 0 ], "lora_B": [ 888, 64605, 0, 0, 0, 0, 0, 0, 895, 64684, 0, 0, 0, 0, 0, 0 ], "original": [ 7979, 1860782, 2917748, 1109205, 4978, 0, 0, 0, 7976, 1860701, 2913019, 1108987, 5105, 0, 0, 0 ], "merged": [ 8021, 1853508, 2917625, 1116408, 5083, 0, 0, 0, 8107, 1853602, 2912957, 1115955, 5214, 0, 0, 0 ] } }, "model.layers.10.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01079725075220049, "l1_avg": 0.00933397478527493, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008210999076254666, "l1_avg": 0.000668276334181428, "l0_avg": 1.0 }, "original": { "l2_avg": 0.049986656394918234, "l1_avg": 0.038393433888753256, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.049983430135411504, "l1_avg": 0.038393492168850366, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.105461610258824e-05, "l1_avg": 3.9556211171050866e-05, "l0_avg": 0.9999708387586805 }, "num_elements": 1474560, "num_changed": 1474517, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 40, 9671, 28908, 7486, 0, 0, 0, 0, 43, 9671, 28989, 7352, 0, 0, 0, 0 ], "lora_B": [ 101, 8096, 0, 0, 0, 0, 0, 0, 118, 8069, 0, 0, 0, 0, 0, 0 ], "original": [ 225, 55135, 152903, 384648, 143898, 0, 0, 0, 226, 55881, 153264, 384769, 143611, 0, 0, 0 ], "merged": [ 192, 54922, 152469, 384312, 144908, 0, 0, 0, 218, 55679, 152799, 384521, 144540, 0, 0, 0 ] } }, "model.layers.11.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010787378775082753, "l1_avg": 0.009335876835717096, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007994427578523755, "l1_avg": 0.000646946660708636, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01841359654647645, "l1_avg": 0.009716233279969956, "l0_avg": 0.9999945746527777 }, "merged": { "l2_avg": 0.018413178734875288, "l1_avg": 0.00971643262439304, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.939174789549595e-05, "l1_avg": 3.825840540230274e-05, "l0_avg": 0.9999918619791667 }, "num_elements": 1474560, "num_changed": 1474548, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 50, 9639, 28940, 7374, 0, 0, 0, 0, 49, 9572, 29039, 7497, 0, 0, 0, 0 ], "lora_B": [ 143, 8073, 0, 0, 0, 0, 0, 0, 107, 8061, 0, 0, 0, 0, 0, 0 ], "original": [ 1887, 344171, 260175, 119003, 11945, 0, 0, 0, 1939, 344128, 259813, 119472, 12027, 0, 0, 0 ], "merged": [ 1960, 343250, 260548, 119494, 12041, 0, 0, 0, 1875, 343234, 260116, 119943, 12099, 0, 0, 0 ] } }, "model.layers.11.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00903718191690006, "l1_avg": 0.00781302060931921, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008365556211698032, "l1_avg": 0.0006785504933860567, "l0_avg": 1.0 }, "original": { "l2_avg": 0.16030825170903873, "l1_avg": 0.11654889848497178, "l0_avg": 0.9999995761447482 }, "merged": { "l2_avg": 0.16029712726857603, "l1_avg": 0.11654891967773437, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3470190983642647e-05, "l1_avg": 3.4000062280231055e-05, "l0_avg": 0.9999059889051649 }, "num_elements": 11796480, "num_changed": 11795371, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 78, 16392, 47920, 1315, 0, 0, 0, 0, 65, 16401, 47564, 1337, 0, 0, 0, 0 ], "lora_B": [ 581, 45531, 0, 0, 0, 0, 0, 0, 637, 45411, 0, 0, 0, 0, 0, 0 ], "original": [ 734, 176143, 459375, 1574681, 3683229, 2390, 375, 0, 725, 175595, 458943, 1578533, 3683069, 2354, 334, 0 ], "merged": [ 700, 175442, 458051, 1570518, 3689433, 2416, 379, 0, 761, 174800, 457479, 1574568, 3689220, 2377, 336, 0 ] } }, "model.layers.11.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010813868973028756, "l1_avg": 0.009337700737847222, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008170174815643157, "l1_avg": 0.0006605401868000627, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012771576252723606, "l1_avg": 0.00921734439002143, "l0_avg": 0.9999966939290365 }, "merged": { "l2_avg": 0.012769648134847881, "l1_avg": 0.009217487441168891, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.506130247346086e-05, "l1_avg": 4.1455035615298486e-05, "l0_avg": 0.9999941507975261 }, "num_elements": 11796480, "num_changed": 11796411, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 46, 9704, 28916, 7394, 0, 0, 0, 0, 35, 9733, 28816, 7516, 0, 0, 0, 0 ], "lora_B": [ 927, 64575, 0, 0, 0, 0, 0, 0, 923, 64647, 0, 0, 0, 0, 0, 0 ], "original": [ 8938, 1961599, 2856199, 1067518, 4836, 0, 0, 0, 8747, 1959846, 2856608, 1067513, 4676, 0, 0, 0 ], "merged": [ 8723, 1954401, 2856170, 1074487, 4952, 0, 0, 0, 8962, 1952634, 2856869, 1074503, 4779, 0, 0, 0 ] } }, "model.layers.11.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010804884452883417, "l1_avg": 0.009337617953618368, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007872685673646629, "l1_avg": 0.0006376968813128769, "l0_avg": 1.0 }, "original": { "l2_avg": 0.053487512366836645, "l1_avg": 0.04031961229112413, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.053484213854195876, "l1_avg": 0.04031960434383816, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1978227252322005e-05, "l1_avg": 3.881175588402483e-05, "l0_avg": 0.9999708387586805 }, "num_elements": 1474560, "num_changed": 1474517, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9734, 29057, 7331, 0, 0, 0, 0, 29, 9638, 28893, 7446, 0, 0, 0, 0 ], "lora_B": [ 148, 8099, 0, 0, 0, 0, 0, 0, 108, 8029, 0, 0, 0, 0, 0, 0 ], "original": [ 205, 55437, 149798, 377681, 154944, 0, 0, 0, 209, 55468, 148863, 377327, 154628, 0, 0, 0 ], "merged": [ 216, 55171, 149424, 377400, 155859, 0, 0, 0, 230, 55185, 148479, 377032, 155564, 0, 0, 0 ] } }, "model.layers.1.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020387636308501866, "l1_avg": 0.0017293812500105963, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007648730021996941, "l1_avg": 0.0006142784323957232, "l0_avg": 1.0 }, "original": { "l2_avg": 0.03980980014138515, "l1_avg": 0.028812521475332753, "l0_avg": 0.880399670071072 }, "merged": { "l2_avg": 0.03979328981592849, "l1_avg": 0.028812553499951776, "l0_avg": 0.8669218576690297 }, "diff": { "l2_avg": 0.002350308470497878, "l1_avg": 0.00040283144256215037, "l0_avg": 0.048275773790147566 }, "num_elements": 1061683200, "num_changed": 51253578, "precision": "mxfp4", "fp4_dist_before": [ 63488584, 86154927, 107716122, 59887260, 88503963, 63677140, 45309743, 16278299, 63489077, 86123054, 107652801, 59838302, 88430729, 63603164, 45271655, 16258380 ], "fp4_dist_after": [ 70626843, 122082333, 108454327, 73416001, 75522060, 51535282, 25792617, 3571732, 70659985, 122031836, 108381766, 73348074, 75455105, 51471101, 25767838, 3566300 ], "bf16_dists": null }, "model.layers.1.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010748641847659612, "l1_avg": 0.00931148264143202, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008507342188541462, "l1_avg": 0.0006923016574647691, "l0_avg": 1.0 }, "original": { "l2_avg": 0.021090655856662328, "l1_avg": 0.021456119867018713, "l0_avg": 0.8330497633380655 }, "merged": { "l2_avg": 0.02109139495425754, "l1_avg": 0.021456114215615354, "l0_avg": 0.8174360774475851 }, "diff": { "l2_avg": 0.0015295493933889601, "l1_avg": 0.00034264935387505425, "l0_avg": 0.05272385632550275 }, "num_elements": 2123366400, "num_changed": 111952065, "precision": "mxfp4", "fp4_dist_before": [ 177160775, 176388912, 209630823, 105633445, 159683395, 114860949, 86130618, 31922925, 177335748, 176522966, 209761436, 105672324, 159724402, 114895944, 86125132, 31916606 ], "fp4_dist_after": [ 193826371, 249846620, 204827739, 129058570, 132908404, 92600476, 50499045, 7943179, 193823728, 250040933, 204933148, 129098918, 132938462, 92611265, 50472044, 7937498 ], "bf16_dists": null }, "model.layers.10.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020326530187979, "l1_avg": 0.0017256683773464628, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007463855751233152, "l1_avg": 0.0005977995693683624, "l0_avg": 1.0 }, "original": { "l2_avg": 0.5303421942111894, "l1_avg": 0.4746394555362654, "l0_avg": 0.8745899586618683 }, "merged": { "l2_avg": 0.5305382051546641, "l1_avg": 0.47464687017746915, "l0_avg": 0.860041717717677 }, "diff": { "l2_avg": 0.03547607385816832, "l1_avg": 0.007141131177360629, "l0_avg": 0.051418373202100215 }, "num_elements": 1061683200, "num_changed": 54590023, "precision": "mxfp4", "fp4_dist_before": [ 66580100, 88917455, 110003766, 59005671, 86527424, 61115771, 43348436, 15566643, 66565634, 88888760, 109947127, 58939522, 86426900, 61028062, 43283962, 15537967 ], "fp4_dist_after": [ 74290715, 125661655, 109601594, 70914209, 72097185, 49118497, 25562006, 3806367, 74300642, 125614182, 109530524, 70827951, 72001894, 49042961, 25516113, 3796705 ], "bf16_dists": null }, "model.layers.10.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010712012050688757, "l1_avg": 0.009258027209175958, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008434071031407762, "l1_avg": 0.0006874440444840325, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018296873569488524, "l1_avg": 0.01789375587745949, "l0_avg": 0.8813605734742718 }, "merged": { "l2_avg": 0.018297459019554985, "l1_avg": 0.017893729504243828, "l0_avg": 0.8673347374244973 }, "diff": { "l2_avg": 0.00129761869708697, "l1_avg": 0.0002660214165110647, "l0_avg": 0.049955613878038194 }, "num_elements": 2123366400, "num_changed": 106074072, "precision": "mxfp4", "fp4_dist_before": [ 125949943, 171950205, 210901961, 121273068, 177728646, 131026752, 91429407, 31172188, 125965029, 172001482, 210984769, 121348654, 177819013, 131118883, 91496463, 31199937 ], "fp4_dist_after": [ 140847445, 240632231, 212961949, 145852700, 152374372, 107249024, 54402841, 7118598, 140849516, 240733089, 213036509, 145939028, 152458853, 107339867, 54446260, 7124118 ], "bf16_dists": null }, "model.layers.11.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010740929376156713, "l1_avg": 0.009287484486897786, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008428609833246866, "l1_avg": 0.0006888140406873492, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018851494789123534, "l1_avg": 0.01823229000892168, "l0_avg": 0.8828186605006089 }, "merged": { "l2_avg": 0.018852443165249293, "l1_avg": 0.01823229000892168, "l0_avg": 0.8687712506894713 }, "diff": { "l2_avg": 0.0013244017130798764, "l1_avg": 0.0002702366864239728, "l0_avg": 0.05028547263439791 }, "num_elements": 2123366400, "num_changed": 106774483, "precision": "mxfp4", "fp4_dist_before": [ 124395601, 170167168, 210675186, 121114215, 178756785, 132214505, 92561824, 31484206, 124423318, 170243618, 210787480, 121207533, 178892630, 132304647, 92623537, 31514147 ], "fp4_dist_after": [ 139319371, 239583699, 213303175, 146330175, 153122867, 107728914, 54795051, 7201253, 139327346, 239686293, 213418198, 146436620, 153257449, 107812479, 54832516, 7210994 ], "bf16_dists": null }, "model.layers.12.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010785175054494658, "l1_avg": 0.009332484006881714, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008065151050686836, "l1_avg": 0.0006513990228995681, "l0_avg": 1.0 }, "original": { "l2_avg": 0.007440663084227731, "l1_avg": 0.005181460248099433, "l0_avg": 0.999993896484375 }, "merged": { "l2_avg": 0.007440479309952031, "l1_avg": 0.005181780126359728, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.984729052992744e-05, "l1_avg": 3.8999034505751396e-05, "l0_avg": 0.9999932183159722 }, "num_elements": 1474560, "num_changed": 1474550, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9573, 28926, 7357, 0, 0, 0, 0, 32, 9622, 29136, 7478, 0, 0, 0, 0 ], "lora_B": [ 135, 8184, 0, 0, 0, 0, 0, 0, 133, 7932, 0, 0, 0, 0, 0, 0 ], "original": [ 1848, 389622, 314115, 33260, 105, 0, 0, 0, 1795, 388013, 312737, 32943, 122, 0, 0, 0 ], "merged": [ 1858, 388317, 315028, 33613, 105, 0, 0, 0, 1849, 386744, 313626, 33295, 125, 0, 0, 0 ] } }, "model.layers.12.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009059107497748837, "l1_avg": 0.00784157682210207, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008239601132129921, "l1_avg": 0.0006685695714420743, "l0_avg": 1.0 }, "original": { "l2_avg": 0.1342254372251146, "l1_avg": 0.10016854604085286, "l0_avg": 0.9999995761447482 }, "merged": { "l2_avg": 0.13421386851786024, "l1_avg": 0.10016855663723416, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.3029794172256296e-05, "l1_avg": 3.338173393987947e-05, "l0_avg": 0.9999137878417969 }, "num_elements": 11796480, "num_changed": 11795463, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 61, 16211, 48058, 1267, 0, 0, 0, 0, 73, 16351, 47841, 1210, 0, 0, 0, 0 ], "lora_B": [ 659, 45305, 0, 0, 0, 0, 0, 0, 661, 45535, 0, 0, 0, 0, 0, 0 ], "original": [ 824, 198232, 510560, 1737976, 3448835, 1083, 121, 0, 821, 198693, 510480, 1735664, 3452020, 1045, 126, 0 ], "merged": [ 797, 197455, 509161, 1733721, 3455313, 1095, 122, 0, 836, 197841, 508930, 1731462, 3458562, 1057, 128, 0 ] } }, "model.layers.12.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010783331028856437, "l1_avg": 0.009328770637512206, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008396149424473617, "l1_avg": 0.0006794914370402694, "l0_avg": 1.0 }, "original": { "l2_avg": 0.014159537903165827, "l1_avg": 0.010413003630108303, "l0_avg": 0.9999974568684896 }, "merged": { "l2_avg": 0.014157057783801486, "l1_avg": 0.01041310297118293, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.229794567527812e-05, "l1_avg": 4.083249045328961e-05, "l0_avg": 0.9999946594238281 }, "num_elements": 11796480, "num_changed": 11796417, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 37, 9545, 28953, 7482, 0, 0, 0, 0, 36, 9730, 29053, 7324, 0, 0, 0, 0 ], "lora_B": [ 876, 64713, 0, 0, 0, 0, 0, 0, 917, 64566, 0, 0, 0, 0, 0, 0 ], "original": [ 7031, 1665906, 2920340, 1295678, 7522, 0, 0, 0, 7054, 1667178, 2921642, 1296799, 7330, 0, 0, 0 ], "merged": [ 7199, 1659274, 2918999, 1303406, 7713, 0, 0, 0, 6998, 1660264, 2920713, 1304410, 7504, 0, 0, 0 ] } }, "model.layers.12.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010789626475839394, "l1_avg": 0.009342069096035428, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008268022793345153, "l1_avg": 0.0006734909838996828, "l0_avg": 1.0 }, "original": { "l2_avg": 0.053733034799171984, "l1_avg": 0.04156673749287923, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.05372940329382652, "l1_avg": 0.04156669775644938, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.17193651832853e-05, "l1_avg": 4.041468621128135e-05, "l0_avg": 0.9999728732638888 }, "num_elements": 1474560, "num_changed": 1474520, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 53, 9503, 28893, 7291, 0, 0, 0, 0, 26, 9685, 29360, 7349, 0, 0, 0, 0 ], "lora_B": [ 93, 8047, 0, 0, 0, 0, 0, 0, 120, 8124, 0, 0, 0, 0, 0, 0 ], "original": [ 185, 49821, 140330, 378519, 167553, 0, 0, 0, 210, 49687, 140560, 379507, 168188, 0, 0, 0 ], "merged": [ 205, 49633, 139913, 378159, 168552, 0, 0, 0, 172, 49461, 140120, 379101, 169244, 0, 0, 0 ] } }, "model.layers.13.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01075228574202556, "l1_avg": 0.009297711981667412, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007913949084468186, "l1_avg": 0.0006345683941617608, "l0_avg": 1.0 }, "original": { "l2_avg": 0.02083331316661207, "l1_avg": 0.00994187593460083, "l0_avg": 0.9999911838107639 }, "merged": { "l2_avg": 0.020832766555945886, "l1_avg": 0.00994218322965834, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.8746884379286875e-05, "l1_avg": 3.787790839042929e-05, "l0_avg": 0.9999966091579862 }, "num_elements": 1474560, "num_changed": 1474555, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 41, 9704, 29090, 7354, 0, 0, 0, 0, 34, 9684, 28884, 7369, 0, 0, 0, 0 ], "lora_B": [ 132, 8048, 0, 0, 0, 0, 0, 0, 125, 8079, 0, 0, 0, 0, 0, 0 ], "original": [ 2392, 361854, 247135, 110215, 15429, 0, 0, 0, 2279, 362526, 246874, 110567, 15289, 0, 0, 0 ], "merged": [ 2341, 360964, 247502, 110589, 15532, 0, 0, 0, 2378, 361572, 247293, 111014, 15375, 0, 0, 0 ] } }, "model.layers.13.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.00902434293317243, "l1_avg": 0.007801322732120752, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008300848425327858, "l1_avg": 0.0006726636240879694, "l0_avg": 1.0 }, "original": { "l2_avg": 0.21920052416913355, "l1_avg": 0.16342239379882811, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.21918899100322253, "l1_avg": 0.16342239379882811, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.2693891166306364e-05, "l1_avg": 3.346408096452554e-05, "l0_avg": 0.9998597886827257 }, "num_elements": 11796480, "num_changed": 11794826, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 64, 16396, 47712, 1278, 0, 0, 0, 0, 55, 16459, 47788, 1320, 0, 0, 0, 0 ], "lora_B": [ 642, 45422, 0, 0, 0, 0, 0, 0, 672, 45424, 0, 0, 0, 0, 0, 0 ], "original": [ 600, 138031, 333884, 1144309, 4274093, 5231, 354, 0, 586, 138199, 334843, 1144991, 4275751, 5283, 325, 0 ], "merged": [ 647, 137498, 332979, 1141129, 4278569, 5323, 357, 0, 598, 137670, 333883, 1141772, 4280357, 5368, 330, 0 ] } }, "model.layers.13.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010813729964281753, "l1_avg": 0.009351619084676107, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000827527028731085, "l1_avg": 0.0006701566744595766, "l0_avg": 1.0 }, "original": { "l2_avg": 0.012413197338576062, "l1_avg": 0.008824894163343642, "l0_avg": 0.9999966939290365 }, "merged": { "l2_avg": 0.012411189252677842, "l1_avg": 0.008825052446789211, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2675854453461205e-05, "l1_avg": 4.0475284266803e-05, "l0_avg": 0.9999933878580729 }, "num_elements": 11796480, "num_changed": 11796402, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9597, 29037, 7422, 0, 0, 0, 0, 25, 9646, 28956, 7444, 0, 0, 0, 0 ], "lora_B": [ 887, 64534, 0, 0, 0, 0, 0, 0, 942, 64709, 0, 0, 0, 0, 0, 0 ], "original": [ 9740, 2093791, 2792497, 993601, 4858, 0, 0, 0, 9564, 2099007, 2794105, 994507, 4810, 0, 0, 0 ], "merged": [ 9675, 2086063, 2793829, 999971, 4955, 0, 0, 0, 9524, 2091486, 2795177, 1000872, 4928, 0, 0, 0 ] } }, "model.layers.13.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010802657171490744, "l1_avg": 0.009328207042482163, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007840656908228993, "l1_avg": 0.0006293227197602391, "l0_avg": 1.0 }, "original": { "l2_avg": 0.05370013763310136, "l1_avg": 0.04143078327178955, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.05369663806826153, "l1_avg": 0.04143076207902696, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.158326472309634e-05, "l1_avg": 3.8968344839910665e-05, "l0_avg": 0.9999742296006945 }, "num_elements": 1474560, "num_changed": 1474522, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9710, 29192, 7310, 0, 0, 0, 0, 38, 9759, 28839, 7280, 0, 0, 0, 0 ], "lora_B": [ 135, 8023, 0, 0, 0, 0, 0, 0, 125, 8101, 0, 0, 0, 0, 0, 0 ], "original": [ 199, 49957, 140172, 382326, 165211, 0, 0, 0, 188, 49827, 139947, 382031, 164702, 0, 0, 0 ], "merged": [ 184, 49771, 139791, 381838, 166279, 0, 0, 0, 214, 49614, 139499, 381647, 165723, 0, 0, 0 ] } }, "model.layers.14.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01075974273667416, "l1_avg": 0.009310655461417305, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008061928674578667, "l1_avg": 0.0006542591727338731, "l0_avg": 1.0 }, "original": { "l2_avg": 0.006051247489799013, "l1_avg": 0.003045191036330329, "l0_avg": 0.9999925401475694 }, "merged": { "l2_avg": 0.006051332308695491, "l1_avg": 0.0030455695258246526, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.96815212492875e-05, "l1_avg": 3.904795739799738e-05, "l0_avg": 0.9999986436631945 }, "num_elements": 1474560, "num_changed": 1474558, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 46, 9515, 28984, 7321, 0, 0, 0, 0, 33, 9698, 29179, 7384, 0, 0, 0, 0 ], "lora_B": [ 112, 8047, 0, 0, 0, 0, 0, 0, 128, 8097, 0, 0, 0, 0, 0, 0 ], "original": [ 3542, 554535, 171084, 7786, 771, 0, 0, 0, 3434, 553485, 171372, 7742, 809, 0, 0, 0 ], "merged": [ 3432, 553376, 172226, 7832, 777, 0, 0, 0, 3536, 552296, 172488, 7783, 814, 0, 0, 0 ] } }, "model.layers.14.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009047505260607198, "l1_avg": 0.007831534370779991, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008084627976624742, "l1_avg": 0.000656282901763916, "l0_avg": 1.0 }, "original": { "l2_avg": 0.13928902129486553, "l1_avg": 0.10397022035386827, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 0.13927742593160367, "l1_avg": 0.10397023095024957, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.202917867202695e-05, "l1_avg": 3.216808351377646e-05, "l0_avg": 0.9999032762315538 }, "num_elements": 11796480, "num_changed": 11795339, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 50, 16161, 48089, 1100, 0, 0, 0, 0, 88, 16261, 48139, 1184, 0, 0, 0, 0 ], "lora_B": [ 629, 45577, 0, 0, 0, 0, 0, 0, 608, 45346, 0, 0, 0, 0, 0, 0 ], "original": [ 885, 203609, 505018, 1685290, 3504016, 613, 101, 0, 929, 203478, 503467, 1687364, 3500989, 618, 103, 0 ], "merged": [ 917, 202871, 503629, 1680973, 3510470, 620, 102, 0, 848, 202752, 502085, 1683062, 3507421, 627, 103, 0 ] } }, "model.layers.14.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010784638653510457, "l1_avg": 0.009330671363406711, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008231350307712212, "l1_avg": 0.0006652033771388233, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01564089220787998, "l1_avg": 0.011252227094438341, "l0_avg": 0.9999966091579862 }, "merged": { "l2_avg": 0.01563780455367807, "l1_avg": 0.011252348952823215, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.099453892841339e-05, "l1_avg": 3.965402849846416e-05, "l0_avg": 0.9999921162923177 }, "num_elements": 11796480, "num_changed": 11796387, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 46, 9661, 28997, 7443, 0, 0, 0, 0, 33, 9590, 29104, 7286, 0, 0, 0, 0 ], "lora_B": [ 967, 64586, 0, 0, 0, 0, 0, 0, 905, 64614, 0, 0, 0, 0, 0, 0 ], "original": [ 9690, 1684567, 2713749, 1478040, 15545, 0, 0, 0, 9713, 1684195, 2710092, 1475285, 15604, 0, 0, 0 ], "merged": [ 9481, 1678575, 2712025, 1485571, 15903, 0, 0, 0, 9494, 1678342, 2708405, 1482755, 15929, 0, 0, 0 ] } }, "model.layers.14.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010781929160984109, "l1_avg": 0.009325689739651151, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008154078968800604, "l1_avg": 0.0006578554748557508, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0615715134297196, "l1_avg": 0.04793897204928928, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.06156736044523284, "l1_avg": 0.04793906211853027, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.097414122945514e-05, "l1_avg": 3.9812539600663716e-05, "l0_avg": 0.9999640570746527 }, "num_elements": 1474560, "num_changed": 1474507, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 36, 9630, 28991, 7253, 0, 0, 0, 0, 41, 9585, 29277, 7347, 0, 0, 0, 0 ], "lora_B": [ 118, 8171, 0, 0, 0, 0, 0, 0, 121, 7974, 0, 0, 0, 0, 0, 0 ], "original": [ 155, 40502, 118852, 365692, 212266, 0, 0, 0, 171, 40277, 118357, 365667, 212621, 0, 0, 0 ], "merged": [ 177, 40313, 118468, 365188, 213336, 0, 0, 0, 160, 40079, 117988, 365155, 213696, 0, 0, 0 ] } }, "model.layers.11.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.002024305245782953, "l1_avg": 0.0017203594247500102, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007313240362874516, "l1_avg": 0.0005856654710239835, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7130518470501002, "l1_avg": 0.5938414472415123, "l0_avg": 0.8731160990397135 }, "merged": { "l2_avg": 0.7130943460589453, "l1_avg": 0.5938510923032407, "l0_avg": 0.8598320402922454 }, "diff": { "l2_avg": 0.04384309619564327, "l1_avg": 0.008234153088228202, "l0_avg": 0.046735309553735054 }, "num_elements": 1061683200, "num_changed": 49618093, "precision": "mxfp4", "fp4_dist_before": [ 67354822, 90076986, 110876133, 58682143, 85475524, 59136015, 42981405, 16382538, 67355684, 90061235, 110832799, 58659470, 85421372, 59090852, 42934979, 16361243 ], "fp4_dist_after": [ 74397548, 126786248, 109991751, 72052135, 72369243, 47872286, 23914076, 3574146, 74416420, 126755117, 109948153, 72016448, 72310015, 47824657, 23886021, 3568936 ], "bf16_dists": null }, "model.layers.12.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.00202713342041187, "l1_avg": 0.001721850037574768, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.00070716596531449, "l1_avg": 0.0005641731950971815, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7570058924829568, "l1_avg": 0.674957561728395, "l0_avg": 0.8743379060721692 }, "merged": { "l2_avg": 0.7570921492865226, "l1_avg": 0.6749757065007717, "l0_avg": 0.861181728221752 }, "diff": { "l2_avg": 0.049241890199469805, "l1_avg": 0.009626894350405093, "l0_avg": 0.04605737662609712 }, "num_elements": 1061683200, "num_changed": 48898343, "precision": "mxfp4", "fp4_dist_before": [ 66711584, 91741404, 110031038, 60702711, 85685605, 59208496, 41510979, 15373182, 66701750, 91721495, 109983003, 60673174, 85639698, 59162040, 41475977, 15361064 ], "fp4_dist_after": [ 73701594, 125938209, 109146589, 72281552, 73144095, 49121540, 24227537, 3406116, 73679433, 125907444, 109083436, 72253772, 73102051, 49071847, 24212266, 3405719 ], "bf16_dists": null }, "model.layers.12.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010748614080985134, "l1_avg": 0.00930076175265842, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008391130483366105, "l1_avg": 0.0006840712494320339, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018675684928894043, "l1_avg": 0.017729194546923226, "l0_avg": 0.8830257486414026 }, "merged": { "l2_avg": 0.018676319387223986, "l1_avg": 0.01772923410674672, "l0_avg": 0.8690522808498806 }, "diff": { "l2_avg": 0.001299555930826399, "l1_avg": 0.00026091787550184463, "l0_avg": 0.05005596255078728 }, "num_elements": 2123366400, "num_changed": 106287149, "precision": "mxfp4", "fp4_dist_before": [ 124172755, 169910347, 210541673, 121093092, 178912435, 132409500, 92786719, 31621844, 124206440, 169960184, 210645347, 121145992, 179011555, 132488155, 92824097, 31636265 ], "fp4_dist_after": [ 139027071, 239362251, 213215753, 146559608, 153444239, 107915770, 54764046, 7180457, 139022916, 239450870, 213297430, 146641216, 153520285, 107973112, 54799997, 7191379 ], "bf16_dists": null }, "model.layers.13.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0020247836655842024, "l1_avg": 0.0017208259966638353, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007133675617592159, "l1_avg": 0.0005703679389423794, "l0_avg": 1.0 }, "original": { "l2_avg": 0.871462316943161, "l1_avg": 0.8090717230902778, "l0_avg": 0.8745524050865644 }, "merged": { "l2_avg": 0.8716561699679919, "l1_avg": 0.809095775462963, "l0_avg": 0.8601233315173491 }, "diff": { "l2_avg": 0.06074686341975666, "l1_avg": 0.012524824731143904, "l0_avg": 0.05065887545361931 }, "num_elements": 1061683200, "num_changed": 53783677, "precision": "mxfp4", "fp4_dist_before": [ 66587105, 90949885, 109573846, 60590829, 86191484, 60659517, 41716250, 14661694, 66598499, 90935449, 109533964, 60564720, 86153794, 60617152, 41699232, 14649780 ], "fp4_dist_after": [ 74259700, 125493224, 108913366, 70824826, 72244618, 49844895, 25681087, 3681757, 74245009, 125462129, 108879482, 70780335, 72212335, 49811675, 25669080, 3679682 ], "bf16_dists": null }, "model.layers.13.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010742110015155501, "l1_avg": 0.009299572308858235, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008369682297599567, "l1_avg": 0.0006816579235924615, "l0_avg": 1.0 }, "original": { "l2_avg": 0.018734019332461885, "l1_avg": 0.017442534646870178, "l0_avg": 0.8832228521653164 }, "merged": { "l2_avg": 0.018734350469377305, "l1_avg": 0.017442523344063463, "l0_avg": 0.8694649157112028 }, "diff": { "l2_avg": 0.0012870906955666011, "l1_avg": 0.00025190856721666124, "l0_avg": 0.04927604015962577 }, "num_elements": 2123366400, "num_changed": 104631088, "precision": "mxfp4", "fp4_dist_before": [ 123997893, 170698831, 210694167, 121706376, 178792662, 131956763, 92145394, 31491707, 123962779, 170717752, 210803635, 121768412, 178889961, 132036171, 92198658, 31505239 ], "fp4_dist_after": [ 138573157, 239395467, 213299890, 147035741, 153723383, 107951691, 54403046, 7069356, 138600655, 239465977, 213393473, 147094950, 153828475, 108028737, 54429022, 7073380 ], "bf16_dists": null }, "lm_head.weight": { "lora_A": { "l2_avg": 0.010774496512500238, "l1_avg": 0.009324126773410372, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0018356661821962352, "l1_avg": 0.0015421853104948921, "l0_avg": 1.0 }, "original": { "l2_avg": 0.004370598861691551, "l1_avg": 0.003714712968396368, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.004374095388715884, "l1_avg": 0.003715515460478331, "l0_avg": 0.9999999982732822 }, "diff": { "l2_avg": 0.00010087524255007474, "l1_avg": 8.52967752021883e-05, "l0_avg": 0.9999986427998355 }, "num_elements": 579133440, "num_changed": 579132654, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9575, 29090, 7401, 0, 0, 0, 0, 38, 9622, 28976, 7426, 0, 0, 0, 0 ], "lora_B": [ 16317, 3958162, 860, 0, 0, 0, 0, 0, 16068, 2442171, 1238, 0, 0, 0, 0, 0 ], "original": [ 949797, 178320676, 108496229, 1222691, 0, 0, 0, 0, 952082, 179783741, 108088242, 1319982, 0, 0, 0, 0 ], "merged": [ 943554, 177839921, 108955645, 1257836, 0, 0, 0, 0, 942366, 179317678, 108522041, 1354399, 0, 0, 0, 0 ] } }, "model.layers.31.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.0107962737641707, "l1_avg": 0.009351033634609646, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008272191043943167, "l1_avg": 0.0006664739339612424, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01097583301193199, "l1_avg": 0.004634654190805223, "l0_avg": 0.9999844021267361 }, "merged": { "l2_avg": 0.01097574740767536, "l1_avg": 0.004635341962178548, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.0981200267388934e-05, "l1_avg": 3.977606797383891e-05, "l0_avg": 0.9999966091579862 }, "num_elements": 1474560, "num_changed": 1474555, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 32, 9505, 28948, 7524, 0, 0, 0, 0, 39, 9513, 29097, 7502, 0, 0, 0, 0 ], "lora_B": [ 110, 8027, 0, 0, 0, 0, 0, 0, 130, 8117, 0, 0, 0, 0, 0, 0 ], "original": [ 5674, 511843, 173564, 42437, 3018, 1, 0, 0, 5665, 513354, 173635, 42211, 3158, 0, 0, 0 ], "merged": [ 5672, 511330, 174058, 42659, 3041, 1, 0, 0, 5600, 512370, 174220, 42435, 3174, 0, 0, 0 ] } }, "model.layers.31.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009036175660891863, "l1_avg": 0.007821576669812202, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008362798615862339, "l1_avg": 0.0006787369234694375, "l0_avg": 1.0 }, "original": { "l2_avg": 1.2089248612087276, "l1_avg": 0.8242056104871962, "l0_avg": 1.0 }, "merged": { "l2_avg": 1.2089193167591679, "l1_avg": 0.8242056104871962, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.282270684316801e-05, "l1_avg": 3.339341427716944e-05, "l0_avg": 0.999298095703125 }, "num_elements": 11796480, "num_changed": 11788200, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 68, 16184, 48084, 1156, 0, 0, 0, 0, 61, 16310, 48048, 1161, 0, 0, 0, 0 ], "lora_B": [ 636, 45272, 0, 0, 0, 0, 0, 0, 612, 45640, 0, 0, 0, 0, 0, 0 ], "original": [ 100, 25959, 78012, 311918, 3781645, 1181655, 519406, 0, 94, 25682, 78175, 310939, 3780054, 1182872, 519969, 0 ], "merged": [ 86, 25855, 77748, 310959, 3776503, 1184131, 523409, 0, 80, 25573, 77930, 309951, 3774962, 1185512, 523781, 0 ] } }, "model.layers.31.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010776440278877836, "l1_avg": 0.009310456779268053, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000859001340309484, "l1_avg": 0.000699599040672183, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01581580226508326, "l1_avg": 0.010725782977210151, "l0_avg": 0.9999970330132378 }, "merged": { "l2_avg": 0.015813172205676743, "l1_avg": 0.010725926028357612, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.38942170826525e-05, "l1_avg": 4.1367641339699425e-05, "l0_avg": 0.9999929640028212 }, "num_elements": 11796480, "num_changed": 11796397, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 40, 9803, 28774, 7359, 0, 0, 0, 0, 33, 9656, 29068, 7427, 0, 0, 0, 0 ], "lora_B": [ 867, 64662, 0, 0, 0, 0, 0, 0, 843, 64700, 0, 0, 0, 0, 0, 0 ], "original": [ 8466, 1858905, 2702978, 1293001, 31594, 0, 0, 0, 8566, 1859516, 2704645, 1297051, 31758, 0, 0, 0 ], "merged": [ 8401, 1852309, 2702713, 1299571, 32010, 0, 0, 0, 8517, 1852904, 2704172, 1303729, 32154, 0, 0, 0 ] } }, "model.layers.31.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010789866010685926, "l1_avg": 0.009329371982150607, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008245359640568495, "l1_avg": 0.000667478539980948, "l0_avg": 1.0 }, "original": { "l2_avg": 0.10736698856149642, "l1_avg": 0.08048310279846191, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.10736052975960172, "l1_avg": 0.0804831345876058, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.3821050307809925e-05, "l1_avg": 4.043812708308299e-05, "l0_avg": 0.9999538845486111 }, "num_elements": 1474560, "num_changed": 1474492, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9618, 29159, 7403, 0, 0, 0, 0, 40, 9500, 28966, 7443, 0, 0, 0, 0 ], "lora_B": [ 106, 8086, 0, 0, 0, 0, 0, 0, 112, 8080, 0, 0, 0, 0, 0, 0 ], "original": [ 107, 25664, 76878, 263909, 369525, 74, 4, 0, 113, 26166, 76864, 265058, 370146, 51, 1, 0 ], "merged": [ 105, 25539, 76622, 263360, 370460, 75, 4, 0, 103, 26062, 76618, 264440, 371119, 52, 1, 0 ] } }, "model.layers.30.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001997454593896365, "l1_avg": 0.0017040537463294135, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008147620732761753, "l1_avg": 0.0006580723656548394, "l0_avg": 1.0 }, "original": { "l2_avg": 5.982629952228251, "l1_avg": 5.487172067901234, "l0_avg": 0.8769874403211806 }, "merged": { "l2_avg": 5.982581998480612, "l1_avg": 5.487965856481481, "l0_avg": 0.8643562505274643 }, "diff": { "l2_avg": 0.3703550050957563, "l1_avg": 0.07409729757426697, "l0_avg": 0.04461420694986979 }, "num_elements": 1061683200, "num_changed": 47366154, "precision": "mxfp4", "fp4_dist_before": [ 65303786, 90255777, 107832976, 61094094, 86813474, 61700215, 42741819, 15146706, 65296582, 90261498, 107814180, 61088590, 86794574, 61669154, 42726764, 15143011 ], "fp4_dist_after": [ 72002347, 122319642, 107692832, 72235146, 75089705, 51767532, 26052511, 3722728, 72008343, 122321077, 107674938, 72210543, 75070504, 51753877, 26040894, 3720581 ], "bf16_dists": null }, "model.layers.30.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010748430820933583, "l1_avg": 0.009293212493260702, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008565544247747829, "l1_avg": 0.0007021759119298723, "l0_avg": 0.9999986436631945 }, "original": { "l2_avg": 0.02025708489947849, "l1_avg": 0.019797144760320216, "l0_avg": 0.8825213373443227 }, "merged": { "l2_avg": 0.020257809427049425, "l1_avg": 0.01979714099271798, "l0_avg": 0.8683337534209828 }, "diff": { "l2_avg": 0.0014175714717970954, "l1_avg": 0.00029840451699716074, "l0_avg": 0.050911814371744794 }, "num_elements": 2123366400, "num_changed": 108104436, "precision": "mxfp4", "fp4_dist_before": [ 124700093, 168124275, 210861667, 119424255, 178944094, 132650731, 93994156, 32193299, 124750152, 168264393, 211065377, 119621612, 179256293, 132966409, 94249561, 32300033 ], "fp4_dist_after": [ 139790402, 239443356, 213666764, 145892223, 152831077, 107096739, 54837819, 7366566, 139785282, 239658698, 213923200, 146172135, 153129193, 107368409, 55000452, 7404085 ], "bf16_dists": null }, "model.layers.32.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01081512554927286, "l1_avg": 0.00936189889907837, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000739463372156024, "l1_avg": 0.0005891390610486269, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0025733774388258645, "l1_avg": 0.0018295894066492717, "l0_avg": 0.99998779296875 }, "merged": { "l2_avg": 0.0025736501551388426, "l1_avg": 0.0018300498525301615, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.67885060079136e-05, "l1_avg": 3.569606795079178e-05, "l0_avg": 0.9999959309895833 }, "num_elements": 1474560, "num_changed": 1474554, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 48, 9433, 29025, 7577, 0, 0, 0, 0, 48, 9622, 28961, 7446, 0, 0, 0, 0 ], "lora_B": [ 156, 8061, 0, 0, 0, 0, 0, 0, 120, 8047, 0, 0, 0, 0, 0, 0 ], "original": [ 4707, 656455, 74181, 657, 2, 0, 0, 0, 4959, 657918, 75050, 630, 1, 0, 0, 0 ], "merged": [ 4825, 655772, 74997, 665, 2, 0, 0, 0, 4845, 656955, 75859, 638, 2, 0, 0, 0 ] } }, "model.layers.32.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009035835193381235, "l1_avg": 0.007818758487701416, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008180701084087962, "l1_avg": 0.0006620415796836217, "l0_avg": 1.0 }, "original": { "l2_avg": 0.7776271057486608, "l1_avg": 0.5782581753200955, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.7776204950588012, "l1_avg": 0.5782581753200955, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.2162289506346854e-05, "l1_avg": 3.293522944053014e-05, "l0_avg": 0.9994960361056857 }, "num_elements": 11796480, "num_changed": 11790535, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 65, 16351, 48002, 1044, 0, 0, 0, 0, 49, 16404, 48044, 1113, 0, 0, 0, 0 ], "lora_B": [ 651, 45471, 0, 0, 0, 0, 0, 0, 671, 45367, 0, 0, 0, 0, 0, 0 ], "original": [ 92, 28599, 86756, 343869, 4460847, 870931, 105802, 0, 97, 28644, 86525, 344361, 4461950, 872334, 105673, 0 ], "merged": [ 104, 28469, 86453, 342750, 4455344, 876362, 107402, 0, 112, 28505, 86267, 343270, 4456442, 877735, 107265, 0 ] } }, "model.layers.32.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010798666756555565, "l1_avg": 0.009340361754099528, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007903786976719068, "l1_avg": 0.0006280205561779439, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01976570944821665, "l1_avg": 0.01385699643029107, "l0_avg": 0.9999978807237413 }, "merged": { "l2_avg": 0.01976241076728871, "l1_avg": 0.013857109016842312, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.988036206811353e-05, "l1_avg": 3.797352676176363e-05, "l0_avg": 0.999989488389757 }, "num_elements": 11796480, "num_changed": 11796356, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 39, 9453, 28728, 7354, 0, 0, 0, 0, 53, 9683, 29349, 7501, 0, 0, 0, 0 ], "lora_B": [ 1089, 64543, 0, 0, 0, 0, 0, 0, 1042, 64398, 0, 0, 0, 0, 0, 0 ], "original": [ 5735, 1339848, 2656147, 1835987, 61866, 0, 0, 0, 5642, 1342679, 2651910, 1834252, 62414, 0, 0, 0 ], "merged": [ 5615, 1334675, 2653086, 1843567, 62635, 0, 0, 0, 5678, 1337340, 2648955, 1841734, 63195, 0, 0, 0 ] } }, "model.layers.32.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010805636827909575, "l1_avg": 0.009349693854649862, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000815343577414751, "l1_avg": 0.0006574359722435474, "l0_avg": 1.0 }, "original": { "l2_avg": 0.10099424957376428, "l1_avg": 0.07876850234137642, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.10098704310900435, "l1_avg": 0.078768523534139, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.1420719714020246e-05, "l1_avg": 3.9510687606202235e-05, "l0_avg": 0.9999457465277778 }, "num_elements": 1474560, "num_changed": 1474480, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9695, 29050, 7465, 0, 0, 0, 0, 30, 9490, 29013, 7384, 0, 0, 0, 0 ], "lora_B": [ 115, 7977, 0, 0, 0, 0, 0, 0, 112, 8180, 0, 0, 0, 0, 0, 0 ], "original": [ 106, 24852, 73144, 262523, 376749, 2, 0, 0, 92, 24644, 73342, 262013, 377091, 2, 0, 0 ], "merged": [ 93, 24709, 72926, 261883, 377745, 2, 0, 0, 82, 24574, 73073, 261364, 378107, 2, 0, 0 ] } }, "model.layers.33.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010787403121247484, "l1_avg": 0.009346520900726319, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008391176233999431, "l1_avg": 0.000688590167555958, "l0_avg": 1.0 }, "original": { "l2_avg": 0.006643017542796623, "l1_avg": 0.003543711370891995, "l0_avg": 0.9999864366319444 }, "merged": { "l2_avg": 0.006642917802057248, "l1_avg": 0.0035442352294921876, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.212795493828319e-05, "l1_avg": 4.0307108105884656e-05, "l0_avg": 0.9999972873263889 }, "num_elements": 1474560, "num_changed": 1474556, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 30, 9546, 29138, 7439, 0, 0, 0, 0, 45, 9495, 29070, 7397, 0, 0, 0, 0 ], "lora_B": [ 95, 8061, 0, 0, 0, 0, 0, 0, 106, 8122, 0, 0, 0, 0, 0, 0 ], "original": [ 5306, 524539, 187420, 19758, 578, 0, 0, 0, 5246, 523738, 187538, 19865, 572, 0, 0, 0 ], "merged": [ 5118, 523556, 188194, 19917, 581, 0, 0, 0, 5227, 523048, 188313, 20030, 576, 0, 0, 0 ] } }, "model.layers.33.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009044482541315556, "l1_avg": 0.00782675202935934, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008246327741836636, "l1_avg": 0.000667589075035519, "l0_avg": 1.0 }, "original": { "l2_avg": 1.25829391957655, "l1_avg": 0.8519289652506511, "l0_avg": 0.9999999152289496 }, "merged": { "l2_avg": 1.2582907919383366, "l1_avg": 0.8519289652506511, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.234911150160937e-05, "l1_avg": 3.281609517418676e-05, "l0_avg": 0.9992525736490886 }, "num_elements": 11796480, "num_changed": 11787663, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 75, 16157, 47841, 1108, 0, 0, 0, 0, 65, 16359, 48347, 1120, 0, 0, 0, 0 ], "lora_B": [ 678, 45363, 0, 0, 0, 0, 0, 0, 615, 45504, 0, 0, 0, 0, 0, 0 ], "original": [ 85, 24403, 73817, 292476, 3739610, 1234255, 532958, 0, 98, 24310, 73735, 292931, 3740149, 1233353, 534300, 0 ], "merged": [ 95, 24314, 73611, 291509, 3733289, 1237471, 537342, 0, 83, 24174, 73513, 291986, 3733955, 1236453, 538685, 0 ] } }, "model.layers.33.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010821549009960685, "l1_avg": 0.00937444633907742, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008496529717464153, "l1_avg": 0.0006912794196978211, "l0_avg": 1.0 }, "original": { "l2_avg": 0.017933322180754746, "l1_avg": 0.012208890914916993, "l0_avg": 0.999998050265842 }, "merged": { "l2_avg": 0.017930101246515347, "l1_avg": 0.01220902336968316, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.392641340941786e-05, "l1_avg": 4.054985216094388e-05, "l0_avg": 0.9999890645345052 }, "num_elements": 11796480, "num_changed": 11796351, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9543, 29086, 7521, 0, 0, 0, 0, 39, 9436, 29006, 7480, 0, 0, 0, 0 ], "lora_B": [ 889, 64745, 0, 0, 0, 0, 0, 0, 932, 64506, 0, 0, 0, 0, 0, 0 ], "original": [ 6633, 1570896, 2750457, 1524848, 50170, 0, 0, 0, 6518, 1569355, 2744119, 1523933, 49551, 0, 0, 0 ], "merged": [ 6422, 1564925, 2748761, 1532015, 50784, 0, 0, 0, 6632, 1563131, 2742583, 1531140, 50087, 0, 0, 0 ] } }, "model.layers.33.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010857940243350067, "l1_avg": 0.009389176633622912, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008081376436166465, "l1_avg": 0.0006537153967656195, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0944943827674162, "l1_avg": 0.07103213204277886, "l0_avg": 0.9999986436631945 }, "merged": { "l2_avg": 0.09448796166280883, "l1_avg": 0.07103219562106662, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.4687149165433816e-05, "l1_avg": 4.057571188443237e-05, "l0_avg": 0.999945068359375 }, "num_elements": 1474560, "num_changed": 1474479, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 49, 9483, 28982, 7460, 0, 0, 0, 0, 38, 9630, 28962, 7556, 0, 0, 0, 0 ], "lora_B": [ 121, 7945, 0, 0, 0, 0, 0, 0, 130, 8188, 0, 0, 0, 0, 0, 0 ], "original": [ 112, 29641, 87210, 290360, 329566, 5, 0, 0, 95, 29534, 87033, 291254, 329728, 22, 0, 0 ], "merged": [ 108, 29470, 86951, 289733, 330602, 5, 0, 0, 132, 29406, 86745, 290632, 330754, 22, 0, 0 ] } }, "model.layers.31.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001988889963153748, "l1_avg": 0.001698748270670573, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008083834762870652, "l1_avg": 0.000652537163760927, "l0_avg": 1.0 }, "original": { "l2_avg": 6.107189811720714, "l1_avg": 5.747880015432099, "l0_avg": 0.8742351098708164 }, "merged": { "l2_avg": 6.106912159521883, "l1_avg": 5.748857060185185, "l0_avg": 0.8611710508370105 }, "diff": { "l2_avg": 0.3965698503465099, "l1_avg": 0.08114018192997685, "l0_avg": 0.0458572189896195 }, "num_elements": 1061683200, "num_changed": 48685839, "precision": "mxfp4", "fp4_dist_before": [ 66752977, 91544100, 108352722, 60762717, 85724465, 60946830, 42024902, 14836291, 66769494, 91517011, 108301957, 60720884, 85681759, 60919436, 42005957, 14821698 ], "fp4_dist_after": [ 73707183, 123576535, 107798979, 71172403, 73630863, 51129682, 26110427, 3837602, 73685180, 123552477, 107724696, 71131937, 73595091, 51098361, 26096113, 3835671 ], "bf16_dists": null }, "model.layers.31.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010760634829699979, "l1_avg": 0.009312187963061862, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008602648588155741, "l1_avg": 0.0007066914604769813, "l0_avg": 0.9999986436631945 }, "original": { "l2_avg": 0.02053447167078654, "l1_avg": 0.020060064998673802, "l0_avg": 0.8808726722811475 }, "merged": { "l2_avg": 0.020536143249935575, "l1_avg": 0.02006005557966821, "l0_avg": 0.8666638400230878 }, "diff": { "l2_avg": 0.001436445116996765, "l1_avg": 0.00029977680724344133, "l0_avg": 0.05084360240418234 }, "num_elements": 2123366400, "num_changed": 107959597, "precision": "mxfp4", "fp4_dist_before": [ 126442604, 168866093, 211736713, 118568995, 177982903, 131300967, 93489895, 32384593, 126508361, 169009586, 212013514, 118786590, 178315263, 131616305, 93819065, 32524953 ], "fp4_dist_after": [ 141553325, 240901124, 214046776, 145338239, 151717799, 105730505, 54123866, 7394720, 141568197, 241130267, 214356591, 145629133, 152068277, 106031092, 54337934, 7438555 ], "bf16_dists": null }, "model.layers.32.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001985016512064122, "l1_avg": 0.0016967341303825378, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008232200094392191, "l1_avg": 0.0006650563329458237, "l0_avg": 1.0 }, "original": { "l2_avg": 6.520237019322389, "l1_avg": 6.109363908179012, "l0_avg": 0.869192593421465 }, "merged": { "l2_avg": 6.520384237327641, "l1_avg": 6.110518904320988, "l0_avg": 0.855664325290256 }, "diff": { "l2_avg": 0.45216955330678943, "l1_avg": 0.09141820083429784, "l0_avg": 0.04696241119761526 }, "num_elements": 1061683200, "num_changed": 49859203, "precision": "mxfp4", "fp4_dist_before": [ 69439729, 93423060, 108589388, 60071334, 84549596, 59663299, 41028451, 14175227, 69436297, 93381853, 108546750, 60051339, 84513693, 59635406, 41007576, 14170202 ], "fp4_dist_after": [ 76616952, 124858590, 107557197, 69299336, 72177396, 50181890, 26302241, 3941843, 76621809, 124797745, 107520394, 69264959, 72146156, 50162280, 26294075, 3940337 ], "bf16_dists": null }, "model.layers.32.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010762447438209878, "l1_avg": 0.009318623277876113, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008541959882366316, "l1_avg": 0.0007010212375058068, "l0_avg": 0.9999959309895833 }, "original": { "l2_avg": 0.020701531569163004, "l1_avg": 0.02010703946337288, "l0_avg": 0.8802869203355578 }, "merged": { "l2_avg": 0.020705025725894503, "l1_avg": 0.02010702062536169, "l0_avg": 0.8658811503280828 }, "diff": { "l2_avg": 0.0015063641799820794, "l1_avg": 0.0003089064727594823, "l0_avg": 0.05147551359953704 }, "num_elements": 2123366400, "num_changed": 109301376, "precision": "mxfp4", "fp4_dist_before": [ 127066655, 169323949, 211389934, 118621541, 177790285, 131415222, 93242208, 31952570, 127128076, 169466605, 211640608, 118819360, 178129384, 131701144, 93554391, 32124468 ], "fp4_dist_after": [ 142396726, 240910618, 213752245, 144679284, 151293116, 105807299, 54544522, 7456544, 142386733, 241134122, 214075416, 144930410, 151599576, 106116310, 54771104, 7512375 ], "bf16_dists": null }, "model.layers.33.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010783754473537011, "l1_avg": 0.009326023525661892, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008408062848255397, "l1_avg": 0.0006893027987745073, "l0_avg": 0.9998806423611111 }, "original": { "l2_avg": 0.020483662022484674, "l1_avg": 0.02007679503641011, "l0_avg": 0.87761903974745 }, "merged": { "l2_avg": 0.020484843518998888, "l1_avg": 0.02007682517722801, "l0_avg": 0.8632409922281901 }, "diff": { "l2_avg": 0.0014924380514356825, "l1_avg": 0.0003099478909998764, "l0_avg": 0.05104699546908155 }, "num_elements": 2123366400, "num_changed": 108391475, "precision": "mxfp4", "fp4_dist_before": [ 129911772, 172176458, 212438917, 118519756, 176102150, 129143070, 91476262, 31629531, 129947847, 172230110, 212540560, 118571123, 176204231, 129190028, 91574810, 31709775 ], "fp4_dist_after": [ 145198574, 243231775, 213901310, 143849753, 149741751, 104414074, 53705464, 7380269, 145190908, 243314017, 214036389, 143896345, 149809459, 104485423, 53792680, 7418209 ], "bf16_dists": null }, "model.layers.34.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.010779065737867858, "l1_avg": 0.009320512082841662, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.000791400671005249, "l1_avg": 0.0006349437753669918, "l0_avg": 1.0 }, "original": { "l2_avg": 0.0029396325020547955, "l1_avg": 0.001895280016793145, "l0_avg": 0.9999911838107639 }, "merged": { "l2_avg": 0.002939929760872541, "l1_avg": 0.001895800232887268, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.9538901686269635e-05, "l1_avg": 3.8713418568174045e-05, "l0_avg": 1.0 }, "num_elements": 1474560, "num_changed": 1474560, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 37, 9709, 28922, 7304, 0, 0, 0, 0, 40, 9654, 29059, 7435, 0, 0, 0, 0 ], "lora_B": [ 112, 8143, 0, 0, 0, 0, 0, 0, 128, 8001, 0, 0, 0, 0, 0, 0 ], "original": [ 5220, 652733, 77455, 2468, 15, 0, 0, 0, 5093, 652441, 76687, 2437, 11, 0, 0, 0 ], "merged": [ 4990, 652190, 78218, 2491, 15, 0, 0, 0, 5142, 651561, 77477, 2465, 11, 0, 0, 0 ] } }, "model.layers.34.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009045210232996877, "l1_avg": 0.007822664454579353, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008421524902970543, "l1_avg": 0.0006830255190531413, "l0_avg": 1.0 }, "original": { "l2_avg": 0.9319992245238286, "l1_avg": 0.6789000193277995, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.9319920451724756, "l1_avg": 0.6788999769422743, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.380351255657474e-05, "l1_avg": 3.443386457446549e-05, "l0_avg": 0.9994411892361111 }, "num_elements": 11796480, "num_changed": 11789888, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 61, 16216, 47416, 1292, 0, 0, 0, 0, 63, 16453, 48185, 1386, 0, 0, 0, 0 ], "lora_B": [ 628, 45399, 0, 0, 0, 0, 0, 0, 613, 45520, 0, 0, 0, 0, 0, 0 ], "original": [ 97, 26041, 77854, 310821, 4154069, 1089736, 239401, 0, 100, 25910, 78312, 311020, 4154952, 1088366, 239801, 0 ], "merged": [ 92, 25951, 77593, 309803, 4148342, 1094155, 242085, 0, 104, 25819, 78047, 310036, 4149080, 1092785, 242588, 0 ] } }, "model.layers.34.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010838229274241003, "l1_avg": 0.009379080931345622, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008230885210653449, "l1_avg": 0.0006665411056019366, "l0_avg": 1.0 }, "original": { "l2_avg": 0.019128157724869397, "l1_avg": 0.013301542070176866, "l0_avg": 0.999998050265842 }, "merged": { "l2_avg": 0.01912476796924917, "l1_avg": 0.013301674524943035, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2201569283583514e-05, "l1_avg": 4.025311985363563e-05, "l0_avg": 0.9999910990397135 }, "num_elements": 11796480, "num_changed": 11796375, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 33, 9573, 28862, 7645, 0, 0, 0, 0, 45, 9528, 28912, 7562, 0, 0, 0, 0 ], "lora_B": [ 941, 64314, 0, 0, 0, 0, 0, 0, 886, 64931, 0, 0, 0, 0, 0, 0 ], "original": [ 6055, 1456198, 2641082, 1735813, 59526, 0, 0, 0, 6254, 1455849, 2641737, 1734397, 59569, 0, 0, 0 ], "merged": [ 6063, 1450278, 2638923, 1743033, 60248, 0, 0, 0, 6197, 1450287, 2639339, 1741756, 60356, 0, 0, 0 ] } }, "model.layers.34.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.01078094353399264, "l1_avg": 0.009319373634126452, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008200621814467013, "l1_avg": 0.0006626153481192887, "l0_avg": 1.0 }, "original": { "l2_avg": 0.1032949652821427, "l1_avg": 0.0805940310160319, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.10328790960653207, "l1_avg": 0.08059409459431967, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.23589796712872e-05, "l1_avg": 3.993473429646757e-05, "l0_avg": 0.9999471028645833 }, "num_elements": 1474560, "num_changed": 1474482, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 23, 9627, 29115, 7475, 0, 0, 0, 0, 30, 9620, 28926, 7344, 0, 0, 0, 0 ], "lora_B": [ 113, 8016, 0, 0, 0, 0, 0, 0, 116, 8139, 0, 0, 0, 0, 0, 0 ], "original": [ 90, 24414, 71447, 257596, 383360, 1, 0, 0, 114, 23950, 71299, 258395, 383892, 2, 0, 0 ], "merged": [ 91, 24350, 71223, 256920, 384344, 1, 0, 0, 78, 23841, 71104, 257720, 384886, 2, 0, 0 ] } }, "model.layers.35.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01077102129382514, "l1_avg": 0.009320253796047635, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007494922610931098, "l1_avg": 0.0005988676566630602, "l0_avg": 1.0 }, "original": { "l2_avg": 0.011011218198965937, "l1_avg": 0.005670623646842109, "l0_avg": 0.9999905056423611 }, "merged": { "l2_avg": 0.011010761119357143, "l1_avg": 0.00567108326488071, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.681499964181027e-05, "l1_avg": 3.547788494163089e-05, "l0_avg": 0.9999925401475694 }, "num_elements": 1474560, "num_changed": 1474549, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 38, 9570, 29051, 7272, 0, 0, 0, 0, 37, 9579, 29142, 7471, 0, 0, 0, 0 ], "lora_B": [ 153, 8047, 0, 0, 0, 0, 0, 0, 141, 8043, 0, 0, 0, 0, 0, 0 ], "original": [ 4414, 424798, 256519, 47225, 3408, 0, 0, 0, 4568, 425047, 257700, 47471, 3410, 0, 0, 0 ], "merged": [ 4289, 424146, 257213, 47547, 3435, 0, 0, 0, 4215, 424118, 258353, 47814, 3430, 0, 0, 0 ] } }, "model.layers.35.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009037544116379451, "l1_avg": 0.007818716578185558, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008217659151167459, "l1_avg": 0.0006655888424979316, "l0_avg": 1.0 }, "original": { "l2_avg": 0.8542163548990249, "l1_avg": 0.5608454386393229, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.8542135826742451, "l1_avg": 0.5608454386393229, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.272079880929232e-05, "l1_avg": 3.320748089916176e-05, "l0_avg": 0.9995221455891927 }, "num_elements": 11796480, "num_changed": 11790843, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 69, 16427, 47795, 1172, 0, 0, 0, 0, 56, 16321, 48124, 1108, 0, 0, 0, 0 ], "lora_B": [ 632, 45434, 0, 0, 0, 0, 0, 0, 667, 45427, 0, 0, 0, 0, 0, 0 ], "original": [ 171, 36902, 110377, 436929, 4394901, 718165, 203066, 0, 161, 36828, 109973, 436381, 4393465, 716440, 202721, 0 ], "merged": [ 144, 36739, 110082, 435569, 4391910, 721211, 204849, 0, 149, 36707, 109615, 435027, 4390381, 719487, 204610, 0 ] } }, "model.layers.35.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010812475744118106, "l1_avg": 0.009351201852162679, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007864029821591516, "l1_avg": 0.0006312452023848891, "l0_avg": 1.0 }, "original": { "l2_avg": 0.016963383371904946, "l1_avg": 0.011783044868045383, "l0_avg": 0.9999963548448351 }, "merged": { "l2_avg": 0.01696012578565523, "l1_avg": 0.011783211761050754, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.0845336438525136e-05, "l1_avg": 3.782585780653689e-05, "l0_avg": 0.9999903361002604 }, "num_elements": 11796480, "num_changed": 11796366, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 52, 9599, 28917, 7488, 0, 0, 0, 0, 36, 9639, 28892, 7537, 0, 0, 0, 0 ], "lora_B": [ 1009, 64506, 0, 0, 0, 0, 0, 0, 1058, 64499, 0, 0, 0, 0, 0, 0 ], "original": [ 10527, 1679653, 2651466, 1520432, 34936, 0, 0, 0, 10512, 1679571, 2652309, 1521591, 35483, 0, 0, 0 ], "merged": [ 8947, 1675416, 2649758, 1527513, 35440, 0, 0, 0, 9089, 1674916, 2650772, 1528695, 35934, 0, 0, 0 ] } }, "model.layers.35.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010808365954439736, "l1_avg": 0.009352295928531222, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007836657459847629, "l1_avg": 0.0006292859907262027, "l0_avg": 1.0 }, "original": { "l2_avg": 0.07677665772556017, "l1_avg": 0.056597137451171876, "l0_avg": 1.0 }, "merged": { "l2_avg": 0.07677169424939596, "l1_avg": 0.056597137451171876, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.2236665963151466e-05, "l1_avg": 3.7552669851316344e-05, "l0_avg": 0.9999484592013889 }, "num_elements": 1474560, "num_changed": 1474484, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 31, 9563, 29233, 7467, 0, 0, 0, 0, 36, 9659, 28864, 7307, 0, 0, 0, 0 ], "lora_B": [ 147, 8040, 0, 0, 0, 0, 0, 0, 148, 8049, 0, 0, 0, 0, 0, 0 ], "original": [ 136, 37843, 111327, 338331, 249390, 6, 0, 0, 135, 37619, 111036, 338426, 250308, 3, 0, 0 ], "merged": [ 136, 37698, 110918, 337941, 250333, 6, 0, 0, 143, 37451, 110747, 337887, 251297, 3, 0, 0 ] } }, "model.layers.4.self_attn.k_proj.weight": { "lora_A": { "l2_avg": 0.01076591959427408, "l1_avg": 0.009306605656941731, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008587538613937795, "l1_avg": 0.0006952178664505482, "l0_avg": 1.0 }, "original": { "l2_avg": 0.01585797178121923, "l1_avg": 0.011376127931806776, "l0_avg": 0.9999972873263889 }, "merged": { "l2_avg": 0.015857173855304225, "l1_avg": 0.011376256412929959, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.275526749577031e-05, "l1_avg": 4.112234390858147e-05, "l0_avg": 0.9999925401475694 }, "num_elements": 1474560, "num_changed": 1474549, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9781, 28858, 7389, 0, 0, 0, 0, 26, 9739, 28844, 7488, 0, 0, 0, 0 ], "lora_B": [ 107, 8116, 0, 0, 0, 0, 0, 0, 121, 8040, 0, 0, 0, 0, 0, 0 ], "original": [ 819, 192984, 356122, 183557, 2433, 0, 0, 0, 749, 194201, 356916, 184419, 2359, 1, 0, 0 ], "merged": [ 813, 192195, 355941, 184535, 2472, 0, 0, 0, 816, 193294, 356703, 185399, 2391, 1, 0, 0 ] } }, "model.layers.4.self_attn.o_proj.weight": { "lora_A": { "l2_avg": 0.009024229663517055, "l1_avg": 0.007800421677529812, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008284640555179307, "l1_avg": 0.0006717250992854437, "l0_avg": 1.0 }, "original": { "l2_avg": 0.035922444270539686, "l1_avg": 0.022439919577704536, "l0_avg": 0.9999992370605468 }, "merged": { "l2_avg": 0.03591821929335118, "l1_avg": 0.02243996196322971, "l0_avg": 1.0 }, "diff": { "l2_avg": 4.276477861853911e-05, "l1_avg": 3.359156350294749e-05, "l0_avg": 0.9999812655978733 }, "num_elements": 11796480, "num_changed": 11796259, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 62, 16432, 47829, 1096, 0, 0, 0, 0, 62, 16517, 48022, 1052, 0, 0, 0, 0 ], "lora_B": [ 651, 45374, 0, 0, 0, 0, 0, 0, 641, 45494, 0, 0, 0, 0, 0, 0 ], "original": [ 3443, 874311, 2157680, 2536737, 326888, 82, 2, 0, 3430, 872607, 2158124, 2536498, 326585, 92, 1, 0 ], "merged": [ 3396, 870574, 2153544, 2542219, 329359, 86, 2, 0, 3445, 868750, 2154064, 2541833, 329114, 93, 1, 0 ] } }, "model.layers.4.self_attn.q_proj.weight": { "lora_A": { "l2_avg": 0.010772516619555623, "l1_avg": 0.009311723046832615, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008821631738358712, "l1_avg": 0.000717909773811698, "l0_avg": 1.0 }, "original": { "l2_avg": 0.015796447782305392, "l1_avg": 0.011326229572296143, "l0_avg": 0.999997795952691 }, "merged": { "l2_avg": 0.01579325350407349, "l1_avg": 0.011326328913370768, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.46793762045643e-05, "l1_avg": 4.188904487010505e-05, "l0_avg": 0.9999920315212674 }, "num_elements": 11796480, "num_changed": 11796386, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 35, 9779, 28805, 7421, 0, 0, 0, 0, 34, 9610, 28933, 7543, 0, 0, 0, 0 ], "lora_B": [ 836, 64694, 0, 0, 0, 0, 0, 0, 863, 64679, 0, 0, 0, 0, 0, 0 ], "original": [ 7357, 1643505, 2764641, 1459698, 21786, 0, 0, 0, 7366, 1643690, 2764209, 1462486, 21742, 0, 0, 0 ], "merged": [ 7361, 1637115, 2763257, 1467135, 22136, 0, 0, 0, 7393, 1637280, 2762663, 1470055, 22085, 0, 0, 0 ] } }, "model.layers.4.self_attn.v_proj.weight": { "lora_A": { "l2_avg": 0.010777247629114672, "l1_avg": 0.009313287999894884, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008274975698441267, "l1_avg": 0.0006694136536680162, "l0_avg": 1.0 }, "original": { "l2_avg": 0.03744736687390824, "l1_avg": 0.028611909018622504, "l0_avg": 0.9999993218315972 }, "merged": { "l2_avg": 0.03744461183049304, "l1_avg": 0.02861188252766927, "l0_avg": 1.0 }, "diff": { "l2_avg": 5.13245069597049e-05, "l1_avg": 3.997667485641109e-05, "l0_avg": 0.9999776204427083 }, "num_elements": 1474560, "num_changed": 1474527, "precision": "bf16", "fp4_dist_before": null, "fp4_dist_after": null, "bf16_dists": { "lora_A": [ 44, 9786, 28692, 7595, 0, 0, 0, 0, 29, 9716, 28989, 7309, 0, 0, 0, 0 ], "lora_B": [ 124, 8070, 0, 0, 0, 0, 0, 0, 111, 8079, 0, 0, 0, 0, 0, 0 ], "original": [ 293, 73234, 198463, 396065, 69494, 0, 0, 0, 289, 73611, 198850, 394612, 69649, 0, 0, 0 ], "merged": [ 308, 72892, 197923, 396277, 70169, 0, 0, 0, 281, 73298, 198267, 394792, 70353, 0, 0, 0 ] } }, "model.layers.33.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.001967066051181213, "l1_avg": 0.0016857585973209805, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008013494962506399, "l1_avg": 0.0006469392942057715, "l0_avg": 1.0 }, "original": { "l2_avg": 6.791428409733145, "l1_avg": 6.038957851080247, "l0_avg": 0.8664285184130257 }, "merged": { "l2_avg": 6.7914375209451965, "l1_avg": 6.040175540123457, "l0_avg": 0.8531173734311704 }, "diff": { "l2_avg": 0.46055840209221327, "l1_avg": 0.08964060088734568, "l0_avg": 0.045917565616560574 }, "num_elements": 1061683200, "num_changed": 48749908, "precision": "mxfp4", "fp4_dist_before": [ 70914420, 94840699, 109395747, 59592082, 83304942, 58256644, 40269888, 14303634, 70896178, 94820455, 109373773, 59586321, 83291014, 58245810, 40278115, 14313478 ], "fp4_dist_after": [ 77968047, 126400688, 107803847, 68943435, 71161555, 49141838, 25541364, 3903093, 77974770, 126377503, 107789166, 68929267, 71146897, 49140531, 25552538, 3908661 ], "bf16_dists": null }, "model.layers.34.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0019787727588077156, "l1_avg": 0.0016925851504007974, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007910636343325022, "l1_avg": 0.000638117930955357, "l0_avg": 1.0 }, "original": { "l2_avg": 5.3758625516766285, "l1_avg": 4.474571759259259, "l0_avg": 0.8645557733229648 }, "merged": { "l2_avg": 5.374715977570578, "l1_avg": 4.475190972222222, "l0_avg": 0.8514242431263864 }, "diff": { "l2_avg": 0.32662715149629473, "l1_avg": 0.06130116403838735, "l0_avg": 0.045334484900957275 }, "num_elements": 1061683200, "num_changed": 48130861, "precision": "mxfp4", "fp4_dist_before": [ 71899592, 93191745, 111314334, 57043169, 82840027, 56791184, 41665238, 16071438, 71899268, 93169283, 111336125, 57033556, 82856459, 56811640, 41685453, 16074689 ], "fp4_dist_after": [ 78868030, 128555392, 109297295, 69335377, 70411928, 46620860, 23847936, 3877023, 78872355, 128537594, 109319578, 69339363, 70432847, 46635438, 23857101, 3875083 ], "bf16_dists": null }, "model.layers.34.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010761618880643465, "l1_avg": 0.009315382109747992, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0008455924659353061, "l1_avg": 0.000693919344080819, "l0_avg": 0.9913045247395833 }, "original": { "l2_avg": 0.018961252106560602, "l1_avg": 0.018871778323326583, "l0_avg": 0.877765907946928 }, "merged": { "l2_avg": 0.018962615066104464, "l1_avg": 0.01887175195011092, "l0_avg": 0.8635812236644603 }, "diff": { "l2_avg": 0.0013919431302282545, "l1_avg": 0.0002883671831201624, "l0_avg": 0.0501850688604661 }, "num_elements": 2123366400, "num_changed": 106561289, "precision": "mxfp4", "fp4_dist_before": [ 129762541, 175417112, 213550750, 120440431, 175500393, 127505889, 88812664, 30547128, 129785223, 175427284, 213593428, 120463546, 175525892, 127527179, 88888730, 30618210 ], "fp4_dist_after": [ 146081098, 244003607, 214503345, 143687476, 149379404, 104237048, 53487222, 7415725, 143585948, 244024792, 214555262, 143709633, 149394795, 104294099, 53561385, 7445561 ], "bf16_dists": null }, "model.layers.35.mlp.experts.down_proj": { "lora_A": { "l2_avg": 0.0019737768399023326, "l1_avg": 0.0016896353827582466, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007663603270736226, "l1_avg": 0.0006171308043930266, "l0_avg": 1.0 }, "original": { "l2_avg": 3.2189761123367853, "l1_avg": 2.7294820601851852, "l0_avg": 0.8542120568546248 }, "merged": { "l2_avg": 3.2192573610666884, "l1_avg": 2.7297779224537035, "l0_avg": 0.8390532119185836 }, "diff": { "l2_avg": 0.2214708318965717, "l1_avg": 0.04508605580271026, "l0_avg": 0.05089314119315442 }, "num_elements": 1061683200, "num_changed": 54032393, "precision": "mxfp4", "fp4_dist_before": [ 77400038, 97503793, 111007345, 56366728, 79830722, 54997849, 39338294, 14484184, 77380572, 97485295, 110974809, 56345864, 79794568, 54980040, 39319123, 14473976 ], "fp4_dist_after": [ 85435939, 132119234, 107616878, 65617820, 66343045, 45399420, 24438148, 3948031, 85438562, 132094578, 107581941, 65593335, 66302961, 45378178, 24429375, 3945755 ], "bf16_dists": null }, "model.layers.35.mlp.experts.gate_up_proj": { "lora_A": { "l2_avg": 0.010757444994135992, "l1_avg": 0.009298588832219441, "l0_avg": 1.0 }, "lora_B": { "l2_avg": 0.0007726734446600128, "l1_avg": 0.0006101116951968935, "l0_avg": 0.9690185546875 }, "original": { "l2_avg": 0.01955436070760091, "l1_avg": 0.01941656795548804, "l0_avg": 0.8758031506008572 }, "merged": { "l2_avg": 0.01955662038591173, "l1_avg": 0.019416575490692516, "l0_avg": 0.8620844570207007 }, "diff": { "l2_avg": 0.0014051520162158542, "l1_avg": 0.000283958853026967, "l0_avg": 0.048404886693130304 }, "num_elements": 2123366400, "num_changed": 102781310, "precision": "mxfp4", "fp4_dist_before": [ 131852904, 176939904, 215330074, 119190590, 173928706, 125058342, 87904997, 30892485, 131862513, 176987358, 215460566, 119326667, 174151607, 125259439, 88176332, 31043916 ], "fp4_dist_after": [ 150809623, 244712190, 215643038, 142454528, 148431570, 102439976, 53001559, 7999802, 142035607, 244812224, 215801701, 142623443, 148664136, 102680816, 53201327, 8054860 ], "bf16_dists": null } }