| { |
| "model.layers.26.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010766946059993482, |
| "l1_avg": 0.00931929416126675, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008345548412762582, |
| "l1_avg": 0.0006767219165340066, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.003155256077041962, |
| "l1_avg": 0.0022022065189149643, |
| "l0_avg": 0.9999898274739584 |
| }, |
| "merged": { |
| "l2_avg": 0.0031555024837898295, |
| "l1_avg": 0.0022027111715740627, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.123568149932461e-05, |
| "l1_avg": 3.999663620359368e-05, |
| "l0_avg": 0.9999972873263889 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474556, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9481, |
| 29372, |
| 7367, |
| 0, |
| 0, |
| 0, |
| 0, |
| 32, |
| 9735, |
| 28682, |
| 7455, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 113, |
| 8139, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 112, |
| 8020, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4211, |
| 621965, |
| 109785, |
| 1876, |
| 2, |
| 0, |
| 0, |
| 0, |
| 4062, |
| 621345, |
| 109382, |
| 1930, |
| 2, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4099, |
| 621055, |
| 110781, |
| 1899, |
| 2, |
| 0, |
| 0, |
| 0, |
| 4163, |
| 620178, |
| 110424, |
| 1957, |
| 2, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.26.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00904296657185818, |
| "l1_avg": 0.007825018838047981, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000839955347100236, |
| "l1_avg": 0.0006810474726888869, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.5739066136674896, |
| "l1_avg": 0.4265228271484375, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.5738916863032905, |
| "l1_avg": 0.4265228271484375, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3251016474070836e-05, |
| "l1_avg": 3.397064542190896e-05, |
| "l0_avg": 0.9996445549858941 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11792287, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16271, |
| 47836, |
| 1100, |
| 0, |
| 0, |
| 0, |
| 0, |
| 71, |
| 16336, |
| 48245, |
| 1149, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 652, |
| 45513, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 625, |
| 45370, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 160, |
| 39149, |
| 117348, |
| 466748, |
| 4835430, |
| 413361, |
| 25312, |
| 0, |
| 150, |
| 39530, |
| 118057, |
| 467304, |
| 4833691, |
| 415223, |
| 25017, |
| 0 |
| ], |
| "merged": [ |
| 148, |
| 38965, |
| 116991, |
| 465273, |
| 4833039, |
| 417399, |
| 25675, |
| 0, |
| 148, |
| 39361, |
| 117706, |
| 465913, |
| 4831303, |
| 419192, |
| 25367, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.26.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010805541013970962, |
| "l1_avg": 0.009349275297588772, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008316474593994693, |
| "l1_avg": 0.0006737917428836226, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018660558041330703, |
| "l1_avg": 0.01316273742251926, |
| "l0_avg": 0.9999978807237413 |
| }, |
| "merged": { |
| "l2_avg": 0.018656948373648615, |
| "l1_avg": 0.013162826167212592, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.194742611834147e-05, |
| "l1_avg": 4.056890288160907e-05, |
| "l0_avg": 0.9999892340766059 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796353, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9723, |
| 29077, |
| 7438, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9578, |
| 28856, |
| 7421, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 904, |
| 64804, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 974, |
| 64390, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5694, |
| 1379743, |
| 2738757, |
| 1717055, |
| 52163, |
| 0, |
| 0, |
| 0, |
| 5481, |
| 1379254, |
| 2742659, |
| 1722895, |
| 52779, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5539, |
| 1373990, |
| 2736017, |
| 1724854, |
| 52830, |
| 0, |
| 0, |
| 0, |
| 5454, |
| 1373678, |
| 2740052, |
| 1730605, |
| 53461, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.26.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01077978277168715, |
| "l1_avg": 0.009322817458046808, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008329921984113753, |
| "l1_avg": 0.0006745918653905392, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.09150312186697204, |
| "l1_avg": 0.0706755585140652, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.09149651227592806, |
| "l1_avg": 0.07067557440863716, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.15914864621935e-05, |
| "l1_avg": 4.0135918081634575e-05, |
| "l0_avg": 0.9999525282118056 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474490, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 42, |
| 9649, |
| 28737, |
| 7437, |
| 0, |
| 0, |
| 0, |
| 0, |
| 32, |
| 9691, |
| 29166, |
| 7406, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 111, |
| 8020, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 112, |
| 8141, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 109, |
| 28045, |
| 83366, |
| 286419, |
| 339710, |
| 3, |
| 0, |
| 0, |
| 91, |
| 28126, |
| 83120, |
| 286220, |
| 339351, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 111, |
| 27943, |
| 83098, |
| 285780, |
| 340724, |
| 3, |
| 0, |
| 0, |
| 119, |
| 27960, |
| 82844, |
| 285599, |
| 340379, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.27.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010790132247777645, |
| "l1_avg": 0.009337523248460558, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008659385493956506, |
| "l1_avg": 0.0007031520362943411, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.016746883240619712, |
| "l1_avg": 0.007028773095872667, |
| "l0_avg": 0.9999844021267361 |
| }, |
| "merged": { |
| "l2_avg": 0.016746565955118076, |
| "l1_avg": 0.007029466496573554, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.347201907625691e-05, |
| "l1_avg": 4.1479596661196816e-05, |
| "l0_avg": 0.9999945746527777 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474552, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 45, |
| 9613, |
| 28967, |
| 7443, |
| 0, |
| 0, |
| 0, |
| 0, |
| 27, |
| 9667, |
| 28929, |
| 7469, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 103, |
| 8052, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 122, |
| 8107, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4864, |
| 433282, |
| 223885, |
| 66807, |
| 8907, |
| 2, |
| 0, |
| 0, |
| 4965, |
| 433009, |
| 223375, |
| 66743, |
| 8721, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4549, |
| 432610, |
| 224458, |
| 67096, |
| 8973, |
| 2, |
| 0, |
| 0, |
| 4700, |
| 432492, |
| 223857, |
| 67039, |
| 8784, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.27.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009039407797337041, |
| "l1_avg": 0.007823619991540909, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008432656401432955, |
| "l1_avg": 0.0006827777458561791, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.032636173068293, |
| "l1_avg": 0.75189208984375, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 1.03262856722082, |
| "l1_avg": 0.75189208984375, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.334834734520061e-05, |
| "l1_avg": 3.416586098157697e-05, |
| "l0_avg": 0.9993773566351997 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11789135, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 69, |
| 16165, |
| 48104, |
| 1083, |
| 0, |
| 0, |
| 0, |
| 0, |
| 59, |
| 16395, |
| 48101, |
| 1096, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 638, |
| 45435, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 631, |
| 45456, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 88, |
| 23473, |
| 70541, |
| 281150, |
| 3960564, |
| 1226605, |
| 335317, |
| 0, |
| 89, |
| 23658, |
| 70494, |
| 280977, |
| 3962119, |
| 1225679, |
| 335726, |
| 0 |
| ], |
| "merged": [ |
| 95, |
| 23363, |
| 70331, |
| 280275, |
| 3954174, |
| 1230846, |
| 338678, |
| 0, |
| 101, |
| 23525, |
| 70255, |
| 280184, |
| 3955617, |
| 1230027, |
| 339009, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.27.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010792848808545367, |
| "l1_avg": 0.009342168437110053, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008573143962342228, |
| "l1_avg": 0.0006956140277907252, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014658488383523691, |
| "l1_avg": 0.010131521357430352, |
| "l0_avg": 0.9999967787000869 |
| }, |
| "merged": { |
| "l2_avg": 0.014656044916169661, |
| "l1_avg": 0.010131658448113336, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.418228331472899e-05, |
| "l1_avg": 4.121837102704578e-05, |
| "l0_avg": 0.9999916076660156 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796381, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 40, |
| 9608, |
| 29306, |
| 7448, |
| 0, |
| 0, |
| 0, |
| 0, |
| 31, |
| 9528, |
| 28886, |
| 7313, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 880, |
| 64718, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 891, |
| 64583, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 9549, |
| 1965362, |
| 2666681, |
| 1240615, |
| 18617, |
| 0, |
| 0, |
| 0, |
| 9424, |
| 1961997, |
| 2666379, |
| 1239053, |
| 18803, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 9608, |
| 1958707, |
| 2666571, |
| 1247292, |
| 18878, |
| 0, |
| 0, |
| 0, |
| 9473, |
| 1955004, |
| 2666264, |
| 1245616, |
| 19067, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.27.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010795555944991255, |
| "l1_avg": 0.00933817360136244, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007879803306423128, |
| "l1_avg": 0.0006345532601699233, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.08900514253691306, |
| "l1_avg": 0.0657051510281033, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.0889988785043362, |
| "l1_avg": 0.06570514572991265, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.023143288236367e-05, |
| "l1_avg": 3.822926535374588e-05, |
| "l0_avg": 0.9999491373697916 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474485, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9572, |
| 28966, |
| 7349, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9699, |
| 29072, |
| 7429, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 133, |
| 8072, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 89, |
| 8090, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 141, |
| 32502, |
| 95392, |
| 306684, |
| 301929, |
| 37, |
| 0, |
| 0, |
| 126, |
| 32557, |
| 95108, |
| 306508, |
| 303537, |
| 39, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 148, |
| 32335, |
| 95073, |
| 306127, |
| 302942, |
| 39, |
| 0, |
| 0, |
| 121, |
| 32440, |
| 94844, |
| 305864, |
| 304587, |
| 40, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.25.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002000422851398021, |
| "l1_avg": 0.0017060213618808322, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008185398028650394, |
| "l1_avg": 0.0006615045169989268, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 4.0613539418571944, |
| "l1_avg": 3.7896387924382715, |
| "l0_avg": 0.8775889719268422 |
| }, |
| "merged": { |
| "l2_avg": 4.06205742333506, |
| "l1_avg": 3.789996624228395, |
| "l0_avg": 0.8640339688901548 |
| }, |
| "diff": { |
| "l2_avg": 0.25089770907746645, |
| "l1_avg": 0.051717039508584105, |
| "l0_avg": 0.048337044421537426 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 51318628, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 64986169, |
| 87070506, |
| 108547529, |
| 58971136, |
| 87587357, |
| 62672034, |
| 44973823, |
| 16098348, |
| 64975563, |
| 87049572, |
| 108549531, |
| 58950984, |
| 87570784, |
| 62663456, |
| 44941781, |
| 16074627 |
| ], |
| "fp4_dist_after": [ |
| 72177511, |
| 122683360, |
| 108803937, |
| 71748466, |
| 74388234, |
| 50804407, |
| 26339533, |
| 3955674, |
| 72175340, |
| 122661159, |
| 108801809, |
| 71723045, |
| 74365687, |
| 50785194, |
| 26319747, |
| 3950097 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.25.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010765967141866659, |
| "l1_avg": 0.009310300482643976, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008544194232000362, |
| "l1_avg": 0.0007012253834141625, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01918210850821601, |
| "l1_avg": 0.018842505937741127, |
| "l0_avg": 0.8818231403680495 |
| }, |
| "merged": { |
| "l2_avg": 0.019182174735599095, |
| "l1_avg": 0.018842494634934412, |
| "l0_avg": 0.8676299036285023 |
| }, |
| "diff": { |
| "l2_avg": 0.0013812993963559469, |
| "l1_avg": 0.00028921801366923767, |
| "l0_avg": 0.05055580374635296 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 107348495, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125452403, |
| 171888598, |
| 209541789, |
| 122138342, |
| 178129394, |
| 132043861, |
| 91110448, |
| 30364319, |
| 125480370, |
| 172011104, |
| 209803968, |
| 122392484, |
| 178522837, |
| 132444987, |
| 91508449, |
| 30533047 |
| ], |
| "fp4_dist_after": [ |
| 140549992, |
| 239195221, |
| 211975588, |
| 145569229, |
| 152682393, |
| 108355715, |
| 55266237, |
| 7109173, |
| 140520223, |
| 239399944, |
| 212325817, |
| 145887649, |
| 153061661, |
| 108768136, |
| 55540117, |
| 7159305 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.26.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020022993232592175, |
| "l1_avg": 0.0017070838146739536, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008172544137203389, |
| "l1_avg": 0.0006606092469559775, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 4.757513841532697, |
| "l1_avg": 4.247316261574074, |
| "l0_avg": 0.8775068937701943 |
| }, |
| "merged": { |
| "l2_avg": 4.758053800731113, |
| "l1_avg": 4.247750289351852, |
| "l0_avg": 0.8644967180416908 |
| }, |
| "diff": { |
| "l2_avg": 0.2786144806898216, |
| "l1_avg": 0.05556074731143904, |
| "l0_avg": 0.046482247246636285 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 49349421, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65028997, |
| 87021464, |
| 108739228, |
| 58952611, |
| 87401871, |
| 62428763, |
| 44909620, |
| 16452015, |
| 65019876, |
| 86999475, |
| 108695549, |
| 58920541, |
| 87370359, |
| 62416780, |
| 44889493, |
| 16436558 |
| ], |
| "fp4_dist_after": [ |
| 71926272, |
| 122657776, |
| 108889726, |
| 72396216, |
| 74655798, |
| 50721249, |
| 25785775, |
| 3893262, |
| 71935286, |
| 122617574, |
| 108850639, |
| 72363775, |
| 74625405, |
| 50706771, |
| 25771844, |
| 3885832 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.26.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010747190205917924, |
| "l1_avg": 0.00928418238957723, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008567289710686904, |
| "l1_avg": 0.0007040916217697992, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019176065921783447, |
| "l1_avg": 0.01892027113172743, |
| "l0_avg": 0.8833653862093702 |
| }, |
| "merged": { |
| "l2_avg": 0.019176728195614286, |
| "l1_avg": 0.01892025982892072, |
| "l0_avg": 0.8692425028483073 |
| }, |
| "diff": { |
| "l2_avg": 0.0013815913763311175, |
| "l1_avg": 0.0002891710363788369, |
| "l0_avg": 0.0505540692364728 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 107344812, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 123826687, |
| 170161019, |
| 209599497, |
| 121972296, |
| 179119840, |
| 133025361, |
| 92278414, |
| 30868991, |
| 123831333, |
| 170265757, |
| 209825308, |
| 122149054, |
| 179490477, |
| 133329598, |
| 92611662, |
| 31011106 |
| ], |
| "fp4_dist_after": [ |
| 138806993, |
| 238395375, |
| 212474237, |
| 146304459, |
| 153550059, |
| 108729623, |
| 55421354, |
| 7160482, |
| 138839083, |
| 238565892, |
| 212789138, |
| 146544358, |
| 153902638, |
| 109037194, |
| 55642129, |
| 7203386 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.27.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010778423272037312, |
| "l1_avg": 0.009323409530851576, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008572019542205724, |
| "l1_avg": 0.000702155547009574, |
| "l0_avg": 0.9921875 |
| }, |
| "original": { |
| "l2_avg": 0.01982214715745714, |
| "l1_avg": 0.019469127890504437, |
| "l0_avg": 0.8831083707456235 |
| }, |
| "merged": { |
| "l2_avg": 0.01982258823182848, |
| "l1_avg": 0.019469114703896603, |
| "l0_avg": 0.8690021086327824 |
| }, |
| "diff": { |
| "l2_avg": 0.0013948198821809557, |
| "l1_avg": 0.00029413670669367284, |
| "l0_avg": 0.05057332027105638 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 107385689, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 124097559, |
| 168859436, |
| 209959306, |
| 120700426, |
| 179172337, |
| 133165364, |
| 93289506, |
| 31434952, |
| 124106199, |
| 168983237, |
| 210242044, |
| 120931986, |
| 179590844, |
| 133561858, |
| 93665745, |
| 31605601 |
| ], |
| "fp4_dist_after": [ |
| 140056864, |
| 238021974, |
| 212895279, |
| 145782491, |
| 153432926, |
| 108321228, |
| 55677265, |
| 7479395, |
| 138099657, |
| 238255078, |
| 213218327, |
| 146093172, |
| 153861213, |
| 108712865, |
| 55929003, |
| 7529663 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.28.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010810012069319511, |
| "l1_avg": 0.009359150462680392, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008048431482166052, |
| "l1_avg": 0.0006504426710307598, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.003463627937286467, |
| "l1_avg": 0.002391345136695438, |
| "l0_avg": 0.9999898274739584 |
| }, |
| "merged": { |
| "l2_avg": 0.00346374063646836, |
| "l1_avg": 0.0023916984597841897, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.9752574868402915e-05, |
| "l1_avg": 3.860517301493221e-05, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474559, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9567, |
| 29219, |
| 7555, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9575, |
| 28695, |
| 7475, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 139, |
| 8093, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 8032, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3620, |
| 601769, |
| 128862, |
| 2632, |
| 13, |
| 0, |
| 0, |
| 0, |
| 3662, |
| 603012, |
| 128418, |
| 2556, |
| 16, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3568, |
| 600572, |
| 129964, |
| 2655, |
| 13, |
| 0, |
| 0, |
| 0, |
| 3823, |
| 601850, |
| 129523, |
| 2575, |
| 17, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.28.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009057530942196726, |
| "l1_avg": 0.007840946316719055, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008484044479615634, |
| "l1_avg": 0.0006878553993172116, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7318254741763778, |
| "l1_avg": 0.534382332695855, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.731817655080845, |
| "l1_avg": 0.534382332695855, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3981197580516975e-05, |
| "l1_avg": 3.459338719646136e-05, |
| "l0_avg": 0.9995767381456163 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11791487, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 72, |
| 16224, |
| 47868, |
| 1225, |
| 0, |
| 0, |
| 0, |
| 0, |
| 73, |
| 16198, |
| 48184, |
| 1228, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 664, |
| 45310, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 637, |
| 45549, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 123, |
| 31709, |
| 95264, |
| 379902, |
| 4573104, |
| 740027, |
| 79244, |
| 0, |
| 136, |
| 31707, |
| 94886, |
| 379570, |
| 4572277, |
| 739316, |
| 79215, |
| 0 |
| ], |
| "merged": [ |
| 127, |
| 31569, |
| 94964, |
| 378692, |
| 4568209, |
| 745376, |
| 80425, |
| 0, |
| 104, |
| 31603, |
| 94626, |
| 378339, |
| 4567589, |
| 744412, |
| 80445, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.28.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010775784503150443, |
| "l1_avg": 0.009319879611333211, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008292231101113944, |
| "l1_avg": 0.0006714356131851673, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018121926761810386, |
| "l1_avg": 0.012966130839453803, |
| "l0_avg": 0.999998050265842 |
| }, |
| "merged": { |
| "l2_avg": 0.018118183814090805, |
| "l1_avg": 0.012966214285956488, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1620560297225415e-05, |
| "l1_avg": 4.030338540259335e-05, |
| "l0_avg": 0.999991692437066 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796382, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 58, |
| 9741, |
| 28992, |
| 7396, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9545, |
| 29084, |
| 7308, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 937, |
| 64470, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 948, |
| 64717, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5815, |
| 1425983, |
| 2699823, |
| 1731421, |
| 41329, |
| 0, |
| 0, |
| 0, |
| 5993, |
| 1422916, |
| 2695433, |
| 1726794, |
| 40973, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5938, |
| 1420159, |
| 2697217, |
| 1739040, |
| 41921, |
| 0, |
| 0, |
| 0, |
| 6102, |
| 1417109, |
| 2692992, |
| 1734415, |
| 41587, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.28.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010789667314567326, |
| "l1_avg": 0.009341112772623698, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008449797751381993, |
| "l1_avg": 0.0006853116792626679, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.09382251658121907, |
| "l1_avg": 0.07140800158182779, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.09381513419578497, |
| "l1_avg": 0.07140800688001844, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2714177207163084e-05, |
| "l1_avg": 4.110819556646877e-05, |
| "l0_avg": 0.9999484592013889 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474484, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 41, |
| 9670, |
| 29090, |
| 7383, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9604, |
| 28869, |
| 7470, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 118, |
| 8102, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 110, |
| 8054, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 118, |
| 28482, |
| 83261, |
| 286892, |
| 339137, |
| 27, |
| 0, |
| 0, |
| 90, |
| 28075, |
| 83079, |
| 286356, |
| 339020, |
| 23, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 87, |
| 28345, |
| 82994, |
| 286266, |
| 340184, |
| 27, |
| 0, |
| 0, |
| 98, |
| 27972, |
| 82788, |
| 285773, |
| 340003, |
| 23, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.29.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010805650964392321, |
| "l1_avg": 0.009358567661709255, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007964848773553967, |
| "l1_avg": 0.0006428650231100619, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01831603753760458, |
| "l1_avg": 0.006918079985512628, |
| "l0_avg": 0.9999864366319444 |
| }, |
| "merged": { |
| "l2_avg": 0.018315886748455287, |
| "l1_avg": 0.006918765438927545, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.901686727079641e-05, |
| "l1_avg": 3.7851626984775066e-05, |
| "l0_avg": 0.9999891493055556 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474544, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9496, |
| 28970, |
| 7574, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9580, |
| 29112, |
| 7360, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 145, |
| 8000, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 108, |
| 8131, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5186, |
| 473777, |
| 182667, |
| 63572, |
| 11312, |
| 1, |
| 0, |
| 0, |
| 5292, |
| 474531, |
| 183534, |
| 63263, |
| 11422, |
| 3, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5238, |
| 472838, |
| 183206, |
| 63754, |
| 11392, |
| 1, |
| 0, |
| 0, |
| 5304, |
| 473774, |
| 184116, |
| 63440, |
| 11494, |
| 3, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.29.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009019692291857055, |
| "l1_avg": 0.007794913370162249, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008406885789726741, |
| "l1_avg": 0.0006801109347078535, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.2352355490154205, |
| "l1_avg": 0.8746747334798177, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 1.2352298624004876, |
| "l1_avg": 0.874674818250868, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.339004508088877e-05, |
| "l1_avg": 3.388719633221626e-05, |
| "l0_avg": 0.9992626614040798 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11787782, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16437, |
| 48005, |
| 1091, |
| 0, |
| 0, |
| 0, |
| 0, |
| 66, |
| 16613, |
| 47640, |
| 1156, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 651, |
| 45243, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 616, |
| 45650, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 97, |
| 22653, |
| 68116, |
| 272118, |
| 3676438, |
| 1268935, |
| 590540, |
| 0, |
| 100, |
| 22693, |
| 68582, |
| 271901, |
| 3674729, |
| 1268940, |
| 590638, |
| 0 |
| ], |
| "merged": [ |
| 83, |
| 22570, |
| 67896, |
| 271300, |
| 3670770, |
| 1271307, |
| 594972, |
| 0, |
| 99, |
| 22593, |
| 68370, |
| 271035, |
| 3668848, |
| 1271635, |
| 595002, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.29.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010789284058812876, |
| "l1_avg": 0.009333509869045681, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008381325587598926, |
| "l1_avg": 0.0006798229878768325, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.015157132319795322, |
| "l1_avg": 0.00999078220791287, |
| "l0_avg": 0.9999954223632812 |
| }, |
| "merged": { |
| "l2_avg": 0.015154674413770565, |
| "l1_avg": 0.009990967644585503, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2978133752035916e-05, |
| "l1_avg": 4.034021662341224e-05, |
| "l0_avg": 0.9999938117133247 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796407, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 25, |
| 9639, |
| 28862, |
| 7511, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9631, |
| 29223, |
| 7231, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 881, |
| 64912, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 881, |
| 64398, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 12035, |
| 2169583, |
| 2490683, |
| 1201503, |
| 28635, |
| 0, |
| 0, |
| 0, |
| 11896, |
| 2164897, |
| 2489137, |
| 1199559, |
| 28552, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 11828, |
| 2162766, |
| 2490894, |
| 1207573, |
| 29013, |
| 0, |
| 0, |
| 0, |
| 11887, |
| 2158718, |
| 2489228, |
| 1205607, |
| 28966, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.29.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.0108124890952407, |
| "l1_avg": 0.009356691439946493, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000817171938251704, |
| "l1_avg": 0.0006587974494323134, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.10857068807724782, |
| "l1_avg": 0.07766112751430936, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.10856436749873997, |
| "l1_avg": 0.07766112751430936, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.305007819866542e-05, |
| "l1_avg": 3.9018271490931514e-05, |
| "l0_avg": 0.9999416775173611 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474474, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 28, |
| 9515, |
| 29150, |
| 7352, |
| 0, |
| 0, |
| 0, |
| 0, |
| 32, |
| 9650, |
| 28902, |
| 7531, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 119, |
| 7908, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 115, |
| 8242, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 117, |
| 28901, |
| 85592, |
| 281281, |
| 341604, |
| 119, |
| 1, |
| 0, |
| 102, |
| 28864, |
| 84741, |
| 281774, |
| 341344, |
| 119, |
| 1, |
| 0 |
| ], |
| "merged": [ |
| 116, |
| 28794, |
| 85333, |
| 280672, |
| 342572, |
| 123, |
| 1, |
| 0, |
| 116, |
| 28734, |
| 84491, |
| 281184, |
| 342301, |
| 122, |
| 1, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.3.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010762721607732837, |
| "l1_avg": 0.00931224160724216, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008383524254895747, |
| "l1_avg": 0.0006790679763071239, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.023332343308555187, |
| "l1_avg": 0.014238279395633274, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.023331531246157434, |
| "l1_avg": 0.01423833900027805, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.137493873918931e-05, |
| "l1_avg": 3.9990479126572606e-05, |
| "l0_avg": 0.9999884711371527 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474543, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 28, |
| 9498, |
| 29179, |
| 7351, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9761, |
| 28963, |
| 7345, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 104, |
| 8129, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 116, |
| 8035, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 981, |
| 215679, |
| 304510, |
| 196664, |
| 18972, |
| 1, |
| 0, |
| 0, |
| 923, |
| 215384, |
| 305210, |
| 197262, |
| 18973, |
| 1, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 947, |
| 214876, |
| 304528, |
| 197255, |
| 19167, |
| 1, |
| 0, |
| 0, |
| 934, |
| 214547, |
| 305300, |
| 197857, |
| 19147, |
| 1, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.3.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009036711716121361, |
| "l1_avg": 0.007817039266228676, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000819172312297911, |
| "l1_avg": 0.0006622512307431962, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.048183163692889434, |
| "l1_avg": 0.031027240223354763, |
| "l0_avg": 0.9999994066026475 |
| }, |
| "merged": { |
| "l2_avg": 0.04817969841191463, |
| "l1_avg": 0.03102724552154541, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.2257585600862566e-05, |
| "l1_avg": 3.3145387553506426e-05, |
| "l0_avg": 0.9999760097927517 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796197, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 78, |
| 16428, |
| 47762, |
| 999, |
| 0, |
| 0, |
| 0, |
| 0, |
| 48, |
| 16296, |
| 48439, |
| 1022, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 695, |
| 45406, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 667, |
| 45392, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2329, |
| 613699, |
| 1673843, |
| 2942640, |
| 667463, |
| 149, |
| 30, |
| 0, |
| 2423, |
| 613368, |
| 1672028, |
| 2942314, |
| 666035, |
| 134, |
| 25, |
| 0 |
| ], |
| "merged": [ |
| 2299, |
| 611041, |
| 1669575, |
| 2944829, |
| 672235, |
| 151, |
| 30, |
| 0, |
| 2381, |
| 610647, |
| 1667684, |
| 2944608, |
| 670838, |
| 137, |
| 25, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.3.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010788861535050796, |
| "l1_avg": 0.009336900711059571, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008400954329839986, |
| "l1_avg": 0.0006819110130891204, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01594474959069028, |
| "l1_avg": 0.011490448315938314, |
| "l0_avg": 0.9999967787000869 |
| }, |
| "merged": { |
| "l2_avg": 0.015941711916502427, |
| "l1_avg": 0.011490541034274632, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3044018864319804e-05, |
| "l1_avg": 4.0564737800094816e-05, |
| "l0_avg": 0.9999910142686632 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796374, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 42, |
| 9665, |
| 28924, |
| 7415, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9531, |
| 29143, |
| 7399, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 917, |
| 64589, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 876, |
| 64690, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6754, |
| 1590300, |
| 2798632, |
| 1482117, |
| 20891, |
| 0, |
| 0, |
| 0, |
| 6579, |
| 1589714, |
| 2799856, |
| 1480332, |
| 21305, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6895, |
| 1583818, |
| 2797086, |
| 1489572, |
| 21294, |
| 0, |
| 0, |
| 0, |
| 6786, |
| 1583110, |
| 2798408, |
| 1487852, |
| 21659, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.3.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010814645694219643, |
| "l1_avg": 0.009359280930625068, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008026993018575013, |
| "l1_avg": 0.0006486807833425701, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.03911333251689973, |
| "l1_avg": 0.029167109065585665, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.03911073140407444, |
| "l1_avg": 0.02916712760925293, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1104118334704575e-05, |
| "l1_avg": 3.854390751156542e-05, |
| "l0_avg": 0.9999701605902778 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474516, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9508, |
| 28901, |
| 7493, |
| 0, |
| 0, |
| 0, |
| 0, |
| 28, |
| 9639, |
| 29154, |
| 7388, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 122, |
| 8004, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 138, |
| 8120, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 318, |
| 76638, |
| 201504, |
| 381644, |
| 76251, |
| 0, |
| 0, |
| 0, |
| 312, |
| 76945, |
| 201345, |
| 382535, |
| 77068, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 337, |
| 76372, |
| 200953, |
| 381803, |
| 76926, |
| 0, |
| 0, |
| 0, |
| 303, |
| 76589, |
| 200882, |
| 382620, |
| 77775, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.27.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001994298078341751, |
| "l1_avg": 0.0017022747132513258, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008189234022145576, |
| "l1_avg": 0.0006612297147512436, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 5.172785623487348, |
| "l1_avg": 4.545165895061729, |
| "l0_avg": 0.8781153408097632 |
| }, |
| "merged": { |
| "l2_avg": 5.173085334410093, |
| "l1_avg": 4.545679976851852, |
| "l0_avg": 0.8656067497347608 |
| }, |
| "diff": { |
| "l2_avg": 0.29276644083760006, |
| "l1_avg": 0.05736385392554012, |
| "l0_avg": 0.044707727314513404 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 47465443, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 64706798, |
| 87186013, |
| 108517089, |
| 59260905, |
| 87512903, |
| 62327191, |
| 44866163, |
| 16578408, |
| 64696097, |
| 87180312, |
| 108493612, |
| 59230465, |
| 87462664, |
| 62273593, |
| 44833087, |
| 16557900 |
| ], |
| "fp4_dist_after": [ |
| 71332183, |
| 122239817, |
| 108718199, |
| 72968979, |
| 75354398, |
| 50988398, |
| 25534157, |
| 3802944, |
| 71350873, |
| 122248812, |
| 108663706, |
| 72913163, |
| 75317937, |
| 50944891, |
| 25510837, |
| 3793906 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.28.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001993763569858057, |
| "l1_avg": 0.0017016258504655626, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008157822070293471, |
| "l1_avg": 0.0006595191028383043, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 5.726502272563353, |
| "l1_avg": 4.972302758487654, |
| "l0_avg": 0.8773102428294994 |
| }, |
| "merged": { |
| "l2_avg": 5.726521933599885, |
| "l1_avg": 4.972933063271605, |
| "l0_avg": 0.8648221267888575 |
| }, |
| "diff": { |
| "l2_avg": 0.32972769096317506, |
| "l1_avg": 0.06470050576292438, |
| "l0_avg": 0.044336093855492864 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 47070886, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65128265, |
| 88960602, |
| 108384819, |
| 60095389, |
| 87078507, |
| 61653249, |
| 43729242, |
| 15874083, |
| 65129389, |
| 88952177, |
| 108371159, |
| 60083245, |
| 87052110, |
| 61634634, |
| 43696774, |
| 15859556 |
| ], |
| "fp4_dist_after": [ |
| 71756097, |
| 122552874, |
| 108351263, |
| 72549756, |
| 75181145, |
| 51103011, |
| 25668786, |
| 3739655, |
| 71759980, |
| 122534728, |
| 108334402, |
| 72525602, |
| 75153341, |
| 51087975, |
| 25647044, |
| 3737541 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.28.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010759081006596212, |
| "l1_avg": 0.009307051367229886, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008570590186728056, |
| "l1_avg": 0.0007053390973144108, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019779033131069608, |
| "l1_avg": 0.019491040265118633, |
| "l0_avg": 0.883017717997233 |
| }, |
| "merged": { |
| "l2_avg": 0.019779890113406712, |
| "l1_avg": 0.019491123152367864, |
| "l0_avg": 0.8688193478996371 |
| }, |
| "diff": { |
| "l2_avg": 0.00139568911658393, |
| "l1_avg": 0.0002951524875782154, |
| "l0_avg": 0.05096444259455175 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 108216185, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 124191516, |
| 168342949, |
| 210202277, |
| 120134704, |
| 179113975, |
| 132985826, |
| 93653653, |
| 31779337, |
| 124204731, |
| 168468828, |
| 210557840, |
| 120465025, |
| 179630505, |
| 133512754, |
| 94140771, |
| 31981709 |
| ], |
| "fp4_dist_after": [ |
| 139269413, |
| 238791539, |
| 213113192, |
| 145963096, |
| 153109069, |
| 107697673, |
| 55146496, |
| 7319523, |
| 139275176, |
| 239056692, |
| 213553655, |
| 146417661, |
| 153626235, |
| 108183605, |
| 55465811, |
| 7377564 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.29.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001985215043786637, |
| "l1_avg": 0.0016966135965453253, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008216605296012736, |
| "l1_avg": 0.000664299229780833, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 5.79639821650962, |
| "l1_avg": 5.1767655285493825, |
| "l0_avg": 0.8782469111313055 |
| }, |
| "merged": { |
| "l2_avg": 5.7964341818203495, |
| "l1_avg": 5.177458526234568, |
| "l0_avg": 0.8660369750599802 |
| }, |
| "diff": { |
| "l2_avg": 0.34188042940078883, |
| "l1_avg": 0.06670924810715663, |
| "l0_avg": 0.04340526062765239 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 46082636, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 64634999, |
| 89040611, |
| 107921163, |
| 60762929, |
| 87292001, |
| 62033260, |
| 43482525, |
| 15723135, |
| 64628210, |
| 89022495, |
| 107928168, |
| 60742517, |
| 87280323, |
| 62019874, |
| 43458219, |
| 15712771 |
| ], |
| "fp4_dist_after": [ |
| 71112688, |
| 121849240, |
| 107969669, |
| 73000140, |
| 75740210, |
| 51826889, |
| 25723586, |
| 3664658, |
| 71113605, |
| 121827191, |
| 107957658, |
| 72978712, |
| 75729853, |
| 51812987, |
| 25711496, |
| 3664618 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.29.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010774385997568268, |
| "l1_avg": 0.009332007831997342, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008565526577144397, |
| "l1_avg": 0.000705041570795907, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.020131920443640816, |
| "l1_avg": 0.01991726533866223, |
| "l0_avg": 0.8832663660873602 |
| }, |
| "merged": { |
| "l2_avg": 0.020132837030622693, |
| "l1_avg": 0.01991721824363426, |
| "l0_avg": 0.8689539572633342 |
| }, |
| "diff": { |
| "l2_avg": 0.0014092466897434658, |
| "l1_avg": 0.00030103580451306, |
| "l0_avg": 0.051408352322048614 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 109158768, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 123916106, |
| 167698515, |
| 210197846, |
| 119997346, |
| 179525433, |
| 133521802, |
| 94086447, |
| 31782504, |
| 123952170, |
| 167797708, |
| 210473232, |
| 120193408, |
| 179922792, |
| 133892641, |
| 94458096, |
| 31950354 |
| ], |
| "fp4_dist_after": [ |
| 139109587, |
| 238500857, |
| 213334947, |
| 145892329, |
| 153189747, |
| 107837056, |
| 55468925, |
| 7392946, |
| 139149177, |
| 238699505, |
| 213663580, |
| 146172541, |
| 153578237, |
| 108205757, |
| 55726351, |
| 7444858 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.20.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01077971287463357, |
| "l1_avg": 0.009326177173190647, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008267820812761784, |
| "l1_avg": 0.000668535940349102, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.011110650291801675, |
| "l1_avg": 0.005703594949510362, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "merged": { |
| "l2_avg": 0.011110371488947515, |
| "l1_avg": 0.0057037949562072756, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.097647276740807e-05, |
| "l1_avg": 4.0100152707762186e-05, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474555, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9521, |
| 28940, |
| 7359, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9722, |
| 28941, |
| 7607, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 123, |
| 8121, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 100, |
| 8040, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2012, |
| 412409, |
| 275271, |
| 43705, |
| 2861, |
| 1, |
| 0, |
| 0, |
| 2143, |
| 412187, |
| 277108, |
| 43976, |
| 2885, |
| 2, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 2092, |
| 411129, |
| 276303, |
| 43968, |
| 2891, |
| 1, |
| 0, |
| 0, |
| 2109, |
| 410899, |
| 277983, |
| 44277, |
| 2906, |
| 2, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.20.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00906222768162766, |
| "l1_avg": 0.007846260443329811, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008315652464203699, |
| "l1_avg": 0.0006733985410796271, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.5230913780395154, |
| "l1_avg": 0.3821019066704644, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.5230795783135294, |
| "l1_avg": 0.3821019066704644, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3284961233619706e-05, |
| "l1_avg": 3.390424470934603e-05, |
| "l0_avg": 0.9996790568033854 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11792694, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 65, |
| 16318, |
| 47997, |
| 1100, |
| 0, |
| 0, |
| 0, |
| 0, |
| 64, |
| 16145, |
| 48192, |
| 1191, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 647, |
| 45590, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 636, |
| 45287, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 242, |
| 52376, |
| 153542, |
| 566086, |
| 4798057, |
| 304046, |
| 24474, |
| 0, |
| 193, |
| 52204, |
| 153085, |
| 564232, |
| 4798179, |
| 305034, |
| 24730, |
| 0 |
| ], |
| "merged": [ |
| 218, |
| 52178, |
| 153117, |
| 564305, |
| 4796917, |
| 307288, |
| 24791, |
| 0, |
| 207, |
| 51972, |
| 152617, |
| 562588, |
| 4797046, |
| 308139, |
| 25097, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.20.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010780598760885664, |
| "l1_avg": 0.009319801463021173, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008505089149706659, |
| "l1_avg": 0.0006889156647957861, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01121986563755112, |
| "l1_avg": 0.007476954989963107, |
| "l0_avg": 0.9999957614474826 |
| }, |
| "merged": { |
| "l2_avg": 0.011218405110473598, |
| "l1_avg": 0.007477120558420817, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.284496217811559e-05, |
| "l1_avg": 4.134531805498733e-05, |
| "l0_avg": 0.9999957614474826 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796430, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9603, |
| 29057, |
| 7463, |
| 0, |
| 0, |
| 0, |
| 0, |
| 43, |
| 9665, |
| 28790, |
| 7490, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 898, |
| 64567, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 887, |
| 64720, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 13088, |
| 2512481, |
| 2655695, |
| 709569, |
| 8409, |
| 0, |
| 0, |
| 0, |
| 13149, |
| 2512983, |
| 2650928, |
| 711654, |
| 8524, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 13002, |
| 2504461, |
| 2658508, |
| 714586, |
| 8538, |
| 0, |
| 0, |
| 0, |
| 13011, |
| 2505238, |
| 2653849, |
| 716643, |
| 8644, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.20.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010777037152593785, |
| "l1_avg": 0.00931810008154975, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008292080019600689, |
| "l1_avg": 0.0006730236927978694, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.09784263069592887, |
| "l1_avg": 0.06403142081366645, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.0978370326487614, |
| "l1_avg": 0.06403147379557292, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.134074489442179e-05, |
| "l1_avg": 4.0287397698395784e-05, |
| "l0_avg": 0.9999464246961806 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474481, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9739, |
| 28890, |
| 7422, |
| 0, |
| 0, |
| 0, |
| 0, |
| 40, |
| 9715, |
| 28959, |
| 7363, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 123, |
| 8076, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 112, |
| 8073, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 111, |
| 33628, |
| 98582, |
| 316751, |
| 287355, |
| 382, |
| 78, |
| 0, |
| 135, |
| 33636, |
| 98574, |
| 317556, |
| 287288, |
| 404, |
| 80, |
| 0 |
| ], |
| "merged": [ |
| 128, |
| 33497, |
| 98257, |
| 316104, |
| 288465, |
| 381, |
| 80, |
| 0, |
| 112, |
| 33500, |
| 98263, |
| 316987, |
| 288299, |
| 407, |
| 80, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.30.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010786892637148316, |
| "l1_avg": 0.009331688616010877, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008125023450702429, |
| "l1_avg": 0.0006593838334083557, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.004045844869388282, |
| "l1_avg": 0.002253444989522298, |
| "l0_avg": 0.9999864366319444 |
| }, |
| "merged": { |
| "l2_avg": 0.004046011758420702, |
| "l1_avg": 0.002253915038373735, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.034365041760061e-05, |
| "l1_avg": 3.94337655355533e-05, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474559, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 42, |
| 9630, |
| 28783, |
| 7528, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9748, |
| 29004, |
| 7384, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 114, |
| 8139, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 106, |
| 8025, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4182, |
| 619344, |
| 111010, |
| 2325, |
| 301, |
| 0, |
| 0, |
| 0, |
| 4272, |
| 620041, |
| 110395, |
| 2354, |
| 336, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4203, |
| 618211, |
| 112051, |
| 2343, |
| 302, |
| 0, |
| 0, |
| 0, |
| 4294, |
| 618971, |
| 111460, |
| 2387, |
| 338, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.30.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009065461135161854, |
| "l1_avg": 0.007843557745218277, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008481676618755652, |
| "l1_avg": 0.0006886688371499379, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7615882926608433, |
| "l1_avg": 0.5662435743543837, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.7615809711441169, |
| "l1_avg": 0.5662435743543837, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.465642670275478e-05, |
| "l1_avg": 3.481312758392758e-05, |
| "l0_avg": 0.9995451185438368 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11791114, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 52, |
| 16243, |
| 47670, |
| 1367, |
| 0, |
| 0, |
| 0, |
| 0, |
| 63, |
| 16345, |
| 47960, |
| 1372, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 619, |
| 45341, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 641, |
| 45559, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 126, |
| 30380, |
| 91149, |
| 361722, |
| 4467756, |
| 845888, |
| 100812, |
| 0, |
| 122, |
| 30123, |
| 91089, |
| 361452, |
| 4468881, |
| 847226, |
| 99754, |
| 0 |
| ], |
| "merged": [ |
| 123, |
| 30265, |
| 90869, |
| 360563, |
| 4463020, |
| 850843, |
| 102145, |
| 0, |
| 107, |
| 29993, |
| 90823, |
| 360370, |
| 4464045, |
| 852152, |
| 101162, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.30.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010787176152163392, |
| "l1_avg": 0.00933884514702691, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008493948222992861, |
| "l1_avg": 0.0006910503725521266, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018427869977207443, |
| "l1_avg": 0.01305855909983317, |
| "l0_avg": 0.9999983893500434 |
| }, |
| "merged": { |
| "l2_avg": 0.018424195890840565, |
| "l1_avg": 0.013058642546335856, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2994325194090665e-05, |
| "l1_avg": 4.116384726431635e-05, |
| "l0_avg": 0.9999916076660156 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796381, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 43, |
| 9640, |
| 29001, |
| 7402, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9523, |
| 29065, |
| 7447, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 878, |
| 64526, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 874, |
| 64794, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6498, |
| 1372591, |
| 2739907, |
| 1732286, |
| 43552, |
| 3, |
| 0, |
| 0, |
| 6502, |
| 1373944, |
| 2743554, |
| 1733809, |
| 43834, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6557, |
| 1367013, |
| 2736787, |
| 1740303, |
| 44132, |
| 3, |
| 0, |
| 0, |
| 6443, |
| 1368345, |
| 2740777, |
| 1741730, |
| 44390, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.30.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010784727399207696, |
| "l1_avg": 0.009321362442440457, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008291946724057198, |
| "l1_avg": 0.0006713047623634338, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.09833387032991739, |
| "l1_avg": 0.07573429743448894, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.0983268335029504, |
| "l1_avg": 0.0757343504163954, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.230280801350026e-05, |
| "l1_avg": 4.028390006472667e-05, |
| "l0_avg": 0.9999430338541667 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474476, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 39, |
| 9635, |
| 28934, |
| 7477, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9666, |
| 28957, |
| 7407, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 107, |
| 8075, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 118, |
| 8084, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 98, |
| 25885, |
| 77865, |
| 272074, |
| 360059, |
| 16, |
| 0, |
| 0, |
| 112, |
| 26457, |
| 77897, |
| 271885, |
| 362191, |
| 21, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 99, |
| 25748, |
| 77606, |
| 271381, |
| 361153, |
| 17, |
| 0, |
| 0, |
| 94, |
| 26371, |
| 77595, |
| 271263, |
| 363212, |
| 21, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.2.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020336592830809685, |
| "l1_avg": 0.0017260968685150146, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007377161807391934, |
| "l1_avg": 0.0005894208119975196, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0547906370707183, |
| "l1_avg": 0.0423538057303723, |
| "l0_avg": 0.8805751225977768 |
| }, |
| "merged": { |
| "l2_avg": 0.054802610522081935, |
| "l1_avg": 0.0423537454487365, |
| "l0_avg": 0.8669240296917197 |
| }, |
| "diff": { |
| "l2_avg": 0.003357817879140355, |
| "l1_avg": 0.000593499489772467, |
| "l0_avg": 0.048910564846462674 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 51927525, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 63400327, |
| 85929975, |
| 107611388, |
| 59849439, |
| 88652513, |
| 63895968, |
| 45453637, |
| 16212964, |
| 63391059, |
| 85896238, |
| 107551357, |
| 59790305, |
| 88587386, |
| 63852511, |
| 45409461, |
| 16198672 |
| ], |
| "fp4_dist_after": [ |
| 70640121, |
| 121947554, |
| 108433682, |
| 73258609, |
| 75502524, |
| 51610722, |
| 25995661, |
| 3610050, |
| 70644401, |
| 121890038, |
| 108367635, |
| 73195535, |
| 75429269, |
| 51576925, |
| 25974473, |
| 3606001 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.2.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010730993349361647, |
| "l1_avg": 0.009282155831654867, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008486811892453234, |
| "l1_avg": 0.0006908608393536674, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01956046952141656, |
| "l1_avg": 0.020537746099778164, |
| "l0_avg": 0.859332054985894 |
| }, |
| "merged": { |
| "l2_avg": 0.01956085761388143, |
| "l1_avg": 0.020537763053988235, |
| "l0_avg": 0.8436974537225417 |
| }, |
| "diff": { |
| "l2_avg": 0.0014389187097549438, |
| "l1_avg": 0.00033076757266197674, |
| "l0_avg": 0.054252329696843654 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 115197574, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 149294487, |
| 173179958, |
| 210585589, |
| 113656353, |
| 169965209, |
| 124578801, |
| 89430947, |
| 30885264, |
| 149395101, |
| 173294988, |
| 210673591, |
| 113686468, |
| 169977556, |
| 124535509, |
| 89374778, |
| 30851801 |
| ], |
| "fp4_dist_after": [ |
| 165941805, |
| 245002256, |
| 209735200, |
| 136447345, |
| 142174259, |
| 100414763, |
| 54045074, |
| 7865664, |
| 165945770, |
| 245166917, |
| 209800288, |
| 136454836, |
| 142153323, |
| 100359026, |
| 54006781, |
| 7853093 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.3.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020378198415846927, |
| "l1_avg": 0.0017286078797446357, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007521364730104881, |
| "l1_avg": 0.0006033498379919264, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.08045138051388905, |
| "l1_avg": 0.06251861949025848, |
| "l0_avg": 0.8774585017451534 |
| }, |
| "merged": { |
| "l2_avg": 0.08042313275942041, |
| "l1_avg": 0.06251869860990548, |
| "l0_avg": 0.8636997590241609 |
| }, |
| "diff": { |
| "l2_avg": 0.004954643489743982, |
| "l1_avg": 0.0009055054629290545, |
| "l0_avg": 0.04879861525547357 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 51808670, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65053858, |
| 88517275, |
| 108843169, |
| 60143562, |
| 87307694, |
| 61851233, |
| 43580391, |
| 15673236, |
| 65046392, |
| 88501443, |
| 108807195, |
| 60097490, |
| 87271034, |
| 61798364, |
| 43530528, |
| 15660336 |
| ], |
| "fp4_dist_after": [ |
| 72360874, |
| 124002722, |
| 108880539, |
| 72441379, |
| 73977531, |
| 50357829, |
| 25387518, |
| 3564788, |
| 72346802, |
| 123966002, |
| 108838797, |
| 72398499, |
| 73940722, |
| 50312047, |
| 25347553, |
| 3559598 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.3.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010717844162996032, |
| "l1_avg": 0.0092696832285987, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008514652909861647, |
| "l1_avg": 0.0006938665277428098, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018427687221103245, |
| "l1_avg": 0.019744160970052084, |
| "l0_avg": 0.868210115785952 |
| }, |
| "merged": { |
| "l2_avg": 0.018427757422129314, |
| "l1_avg": 0.019744127061631943, |
| "l0_avg": 0.8533203845553928 |
| }, |
| "diff": { |
| "l2_avg": 0.0013792149722576142, |
| "l1_avg": 0.00031005147062701944, |
| "l0_avg": 0.051989156934950084 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 110392029, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 139894867, |
| 175379146, |
| 209383143, |
| 119148172, |
| 172823256, |
| 126999818, |
| 88121303, |
| 29630329, |
| 139943345, |
| 175483061, |
| 209571674, |
| 119219266, |
| 172932810, |
| 127058432, |
| 88137684, |
| 29640094 |
| ], |
| "fp4_dist_after": [ |
| 155703227, |
| 242553237, |
| 209760807, |
| 140419137, |
| 146667872, |
| 104457790, |
| 54495541, |
| 7326523, |
| 155751340, |
| 242732613, |
| 209905630, |
| 140505883, |
| 146766487, |
| 104502240, |
| 54493368, |
| 7324705 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.20.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010748046530158812, |
| "l1_avg": 0.009298345777723524, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008585734875570073, |
| "l1_avg": 0.000702471203274197, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019724296198950872, |
| "l1_avg": 0.018105713644145448, |
| "l0_avg": 0.877213892524625 |
| }, |
| "merged": { |
| "l2_avg": 0.019730424880981444, |
| "l1_avg": 0.018105766390576775, |
| "l0_avg": 0.8634405461064092 |
| }, |
| "diff": { |
| "l2_avg": 0.0014511032236946954, |
| "l1_avg": 0.00027247052133819204, |
| "l0_avg": 0.048608094203619306 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 103212794, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 130342756, |
| 176544011, |
| 212704908, |
| 120894334, |
| 174776168, |
| 126826241, |
| 88136575, |
| 30629046, |
| 130377139, |
| 176659404, |
| 212900162, |
| 121106992, |
| 175094527, |
| 127175353, |
| 88438499, |
| 30760285 |
| ], |
| "fp4_dist_after": [ |
| 144971005, |
| 244079397, |
| 213499750, |
| 144692256, |
| 149944394, |
| 104374978, |
| 52357300, |
| 6947721, |
| 144994751, |
| 244267758, |
| 213740034, |
| 144971914, |
| 150279442, |
| 104713133, |
| 52553484, |
| 6979083 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.21.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010771066844713989, |
| "l1_avg": 0.009318417972988553, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008377618505619466, |
| "l1_avg": 0.000681149133015424, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.053839793516870994, |
| "l1_avg": 0.021034638086954754, |
| "l0_avg": 0.9999911838107639 |
| }, |
| "merged": { |
| "l2_avg": 0.05383888249909402, |
| "l1_avg": 0.021035121546851263, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.178050669828769e-05, |
| "l1_avg": 4.049870185554028e-05, |
| "l0_avg": 0.9999864366319444 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474540, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 26, |
| 9608, |
| 28985, |
| 7373, |
| 0, |
| 0, |
| 0, |
| 0, |
| 37, |
| 9619, |
| 29018, |
| 7494, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 118, |
| 8074, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 107, |
| 8085, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4597, |
| 332337, |
| 188076, |
| 147520, |
| 64495, |
| 114, |
| 14, |
| 0, |
| 4606, |
| 332897, |
| 187265, |
| 147717, |
| 64798, |
| 116, |
| 8, |
| 0 |
| ], |
| "merged": [ |
| 4324, |
| 332002, |
| 188215, |
| 147698, |
| 64752, |
| 115, |
| 14, |
| 0, |
| 4353, |
| 332598, |
| 187419, |
| 147904, |
| 65041, |
| 117, |
| 8, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.21.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009042230319098257, |
| "l1_avg": 0.007820227183401585, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008354607309471092, |
| "l1_avg": 0.0006770810733238857, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.907006907306668, |
| "l1_avg": 0.6183514065212674, |
| "l0_avg": 0.9999998304578993 |
| }, |
| "merged": { |
| "l2_avg": 0.907003139924275, |
| "l1_avg": 0.6183514912923177, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.306456325495372e-05, |
| "l1_avg": 3.391810848067204e-05, |
| "l0_avg": 0.999492899576823 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11790498, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 68, |
| 16492, |
| 47955, |
| 1231, |
| 0, |
| 0, |
| 0, |
| 0, |
| 53, |
| 16202, |
| 47871, |
| 1200, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 618, |
| 45259, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 636, |
| 45647, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 116, |
| 33012, |
| 98476, |
| 373586, |
| 4324610, |
| 881646, |
| 189445, |
| 0, |
| 135, |
| 33223, |
| 98037, |
| 374194, |
| 4318786, |
| 882278, |
| 188936, |
| 0 |
| ], |
| "merged": [ |
| 126, |
| 32865, |
| 98187, |
| 372534, |
| 4319480, |
| 886412, |
| 191306, |
| 0, |
| 119, |
| 33104, |
| 97697, |
| 373022, |
| 4313581, |
| 887273, |
| 190774, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.21.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010794490211264225, |
| "l1_avg": 0.009332219759623209, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008729116936335395, |
| "l1_avg": 0.0007092058076523244, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012157117517205886, |
| "l1_avg": 0.0073843830161624486, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "merged": { |
| "l2_avg": 0.012155778052829087, |
| "l1_avg": 0.007384726074006823, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.493750649071962e-05, |
| "l1_avg": 4.286409045259158e-05, |
| "l0_avg": 0.9999950832790798 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796422, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 27, |
| 9570, |
| 28917, |
| 7392, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9745, |
| 28999, |
| 7476, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 826, |
| 64910, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 924, |
| 64412, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 20358, |
| 2971271, |
| 2097971, |
| 795437, |
| 15230, |
| 0, |
| 0, |
| 0, |
| 20110, |
| 2971702, |
| 2097240, |
| 792121, |
| 15040, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 20224, |
| 2964496, |
| 2099739, |
| 799996, |
| 15457, |
| 0, |
| 0, |
| 0, |
| 20348, |
| 2964718, |
| 2099548, |
| 796691, |
| 15263, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.21.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010778957358166804, |
| "l1_avg": 0.009329879283905029, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007955392356961966, |
| "l1_avg": 0.0006371060153469443, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.09734348091448373, |
| "l1_avg": 0.06765524546305339, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.09733749961156181, |
| "l1_avg": 0.06765520307752822, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.0041197792283485e-05, |
| "l1_avg": 3.773680008533928e-05, |
| "l0_avg": 0.9999464246961806 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474481, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9711, |
| 28852, |
| 7311, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9451, |
| 29337, |
| 7422, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 134, |
| 8195, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 128, |
| 7927, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 115, |
| 31781, |
| 93401, |
| 303335, |
| 307870, |
| 275, |
| 14, |
| 0, |
| 126, |
| 32351, |
| 93442, |
| 303677, |
| 307889, |
| 274, |
| 10, |
| 0 |
| ], |
| "merged": [ |
| 114, |
| 31692, |
| 93110, |
| 302724, |
| 308894, |
| 277, |
| 14, |
| 0, |
| 118, |
| 32202, |
| 93157, |
| 303102, |
| 308869, |
| 277, |
| 10, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.22.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010796605971515235, |
| "l1_avg": 0.00934712224536472, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008611490484327078, |
| "l1_avg": 0.0006989951943978667, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.006499489048114691, |
| "l1_avg": 0.0034466332859463163, |
| "l0_avg": 0.9999905056423611 |
| }, |
| "merged": { |
| "l2_avg": 0.006499540096524608, |
| "l1_avg": 0.003447045882542928, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.354420471840474e-05, |
| "l1_avg": 4.179206832001607e-05, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474558, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9596, |
| 28838, |
| 7394, |
| 0, |
| 0, |
| 0, |
| 0, |
| 46, |
| 9579, |
| 29196, |
| 7477, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 116, |
| 7960, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 84, |
| 8224, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3067, |
| 529820, |
| 189162, |
| 13611, |
| 736, |
| 0, |
| 0, |
| 0, |
| 3006, |
| 530849, |
| 189730, |
| 13869, |
| 710, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3010, |
| 528490, |
| 190318, |
| 13711, |
| 745, |
| 0, |
| 0, |
| 0, |
| 3069, |
| 529624, |
| 190904, |
| 13975, |
| 714, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.22.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009036404175836133, |
| "l1_avg": 0.007812882773578167, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008302949263735968, |
| "l1_avg": 0.0006729288233651055, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.5406267304709854, |
| "l1_avg": 0.39375084771050345, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.5406145042488795, |
| "l1_avg": 0.39375084771050345, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.293868477162665e-05, |
| "l1_avg": 3.369286294198699e-05, |
| "l0_avg": 0.9996807522243923 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11792714, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 65, |
| 16230, |
| 47781, |
| 1053, |
| 0, |
| 0, |
| 0, |
| 0, |
| 68, |
| 16628, |
| 48099, |
| 1148, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 646, |
| 45319, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 621, |
| 45574, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 201, |
| 44710, |
| 135681, |
| 534032, |
| 4822405, |
| 336604, |
| 24942, |
| 0, |
| 186, |
| 45113, |
| 135629, |
| 533562, |
| 4822286, |
| 336100, |
| 25029, |
| 0 |
| ], |
| "merged": [ |
| 186, |
| 44495, |
| 135304, |
| 532236, |
| 4821111, |
| 339958, |
| 25283, |
| 0, |
| 167, |
| 44915, |
| 135254, |
| 531849, |
| 4820836, |
| 339505, |
| 25381, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.22.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010772965060202738, |
| "l1_avg": 0.009326036771138508, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008539909690566246, |
| "l1_avg": 0.0006916338461451232, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01462056243818822, |
| "l1_avg": 0.010086721844143338, |
| "l0_avg": 0.9999968634711371 |
| }, |
| "merged": { |
| "l2_avg": 0.014618301120218764, |
| "l1_avg": 0.010086852974361843, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.258036312451557e-05, |
| "l1_avg": 4.1472606567872893e-05, |
| "l0_avg": 0.999993642171224 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796405, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 41, |
| 9578, |
| 29108, |
| 7441, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9619, |
| 28991, |
| 7343, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 913, |
| 64553, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 878, |
| 64728, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8233, |
| 1852939, |
| 2821475, |
| 1195790, |
| 20179, |
| 0, |
| 0, |
| 0, |
| 8235, |
| 1853092, |
| 2820515, |
| 1195512, |
| 20510, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8165, |
| 1845665, |
| 2821575, |
| 1202787, |
| 20402, |
| 0, |
| 0, |
| 0, |
| 8185, |
| 1846185, |
| 2820228, |
| 1202537, |
| 20751, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.22.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010760989103236281, |
| "l1_avg": 0.009312045574188233, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008442169055342674, |
| "l1_avg": 0.0006820269045419991, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.10051231488686335, |
| "l1_avg": 0.06882318390740289, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.10050656605054657, |
| "l1_avg": 0.06882323688930936, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2326466988096264e-05, |
| "l1_avg": 4.08066643608941e-05, |
| "l0_avg": 0.9999498155381944 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474486, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 26, |
| 9578, |
| 29035, |
| 7362, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9552, |
| 29213, |
| 7349, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 117, |
| 8115, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 134, |
| 8018, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 130, |
| 30715, |
| 90491, |
| 298976, |
| 318024, |
| 351, |
| 60, |
| 0, |
| 105, |
| 30676, |
| 90073, |
| 297842, |
| 316697, |
| 359, |
| 61, |
| 0 |
| ], |
| "merged": [ |
| 103, |
| 30589, |
| 90221, |
| 298316, |
| 319105, |
| 353, |
| 60, |
| 0, |
| 112, |
| 30533, |
| 89751, |
| 297230, |
| 317765, |
| 359, |
| 63, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.23.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010784600956223134, |
| "l1_avg": 0.009333602587381999, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008398827048949897, |
| "l1_avg": 0.0006806112360209227, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.026107510792300906, |
| "l1_avg": 0.01140317784415351, |
| "l0_avg": 0.9999911838107639 |
| }, |
| "merged": { |
| "l2_avg": 0.026106950045151976, |
| "l1_avg": 0.011403585804833307, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.179717719465111e-05, |
| "l1_avg": 4.061065717703766e-05, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474549, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9634, |
| 29250, |
| 7395, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9666, |
| 28743, |
| 7402, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 128, |
| 8080, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 122, |
| 8054, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2877, |
| 347257, |
| 245107, |
| 122146, |
| 20247, |
| 6, |
| 0, |
| 0, |
| 2817, |
| 346485, |
| 244668, |
| 122719, |
| 20222, |
| 9, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 2777, |
| 346517, |
| 245453, |
| 122595, |
| 20354, |
| 6, |
| 0, |
| 0, |
| 2765, |
| 345669, |
| 244903, |
| 123151, |
| 20361, |
| 9, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.23.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009039553994217814, |
| "l1_avg": 0.007819723337888718, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008301222453100517, |
| "l1_avg": 0.0006720126503043705, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.8038526364967319, |
| "l1_avg": 0.5722463819715712, |
| "l0_avg": 0.9999998304578993 |
| }, |
| "merged": { |
| "l2_avg": 0.8038480872047855, |
| "l1_avg": 0.572246339586046, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.299852629366878e-05, |
| "l1_avg": 3.350639664050606e-05, |
| "l0_avg": 0.9995093451605903 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11790692, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 53, |
| 16320, |
| 48006, |
| 1257, |
| 0, |
| 0, |
| 0, |
| 0, |
| 50, |
| 16456, |
| 47804, |
| 1126, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 606, |
| 44978, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 691, |
| 45885, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 114, |
| 31195, |
| 94169, |
| 373063, |
| 4451301, |
| 817187, |
| 129026, |
| 0, |
| 126, |
| 31279, |
| 94322, |
| 374639, |
| 4453934, |
| 817832, |
| 128293, |
| 0 |
| ], |
| "merged": [ |
| 125, |
| 31056, |
| 93886, |
| 371872, |
| 4446201, |
| 822236, |
| 130674, |
| 0, |
| 128, |
| 31114, |
| 94039, |
| 373464, |
| 4448918, |
| 822853, |
| 129914, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.23.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010790148740340849, |
| "l1_avg": 0.009329810407426623, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008604829008163181, |
| "l1_avg": 0.0006986594526097178, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012875610207322336, |
| "l1_avg": 0.008607690864139133, |
| "l0_avg": 0.9999952528211805 |
| }, |
| "merged": { |
| "l2_avg": 0.012873886452170765, |
| "l1_avg": 0.008607943852742514, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3859222395724944e-05, |
| "l1_avg": 4.169020232641035e-05, |
| "l0_avg": 0.999993896484375 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796408, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9675, |
| 28911, |
| 7504, |
| 0, |
| 0, |
| 0, |
| 0, |
| 44, |
| 9649, |
| 28926, |
| 7420, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 891, |
| 64692, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 915, |
| 64574, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 14462, |
| 2318624, |
| 2599252, |
| 953126, |
| 13190, |
| 0, |
| 0, |
| 0, |
| 14435, |
| 2316936, |
| 2598670, |
| 954376, |
| 13409, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 14441, |
| 2311379, |
| 2600418, |
| 958955, |
| 13372, |
| 0, |
| 0, |
| 0, |
| 14439, |
| 2309683, |
| 2599977, |
| 960229, |
| 13587, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.23.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010808987174320415, |
| "l1_avg": 0.009344257248772515, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008043375564739108, |
| "l1_avg": 0.0006496068090200424, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.084807888869072, |
| "l1_avg": 0.06196324560377333, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.08480207720394303, |
| "l1_avg": 0.0619632879892985, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.166462926202726e-05, |
| "l1_avg": 3.959390386525128e-05, |
| "l0_avg": 0.99996337890625 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474506, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9490, |
| 29095, |
| 7389, |
| 0, |
| 0, |
| 0, |
| 0, |
| 48, |
| 9608, |
| 29079, |
| 7416, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 116, |
| 8078, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 127, |
| 8063, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 142, |
| 33709, |
| 99627, |
| 317531, |
| 285948, |
| 61, |
| 2, |
| 0, |
| 142, |
| 34151, |
| 99911, |
| 317786, |
| 285503, |
| 47, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 148, |
| 33557, |
| 99327, |
| 316969, |
| 286994, |
| 63, |
| 2, |
| 0, |
| 129, |
| 33971, |
| 99589, |
| 317189, |
| 286575, |
| 47, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.20.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002030883587466815, |
| "l1_avg": 0.0017247060934702555, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000731367820115957, |
| "l1_avg": 0.0005821262796719869, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 2.4045152763106703, |
| "l1_avg": 2.0074358603395064, |
| "l0_avg": 0.8699720519266011 |
| }, |
| "merged": { |
| "l2_avg": 2.404280542715977, |
| "l1_avg": 2.0075828269675924, |
| "l0_avg": 0.8555212543628834 |
| }, |
| "diff": { |
| "l2_avg": 0.14973108133912885, |
| "l1_avg": 0.029946942741488233, |
| "l0_avg": 0.05061127179934655 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 53733137, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 69025875, |
| 91153369, |
| 111387359, |
| 58057493, |
| 84642296, |
| 58861441, |
| 42260480, |
| 15567076, |
| 69022613, |
| 91118135, |
| 111349414, |
| 58052920, |
| 84602068, |
| 58809090, |
| 42224037, |
| 15549534 |
| ], |
| "fp4_dist_after": [ |
| 76696287, |
| 127916526, |
| 110072071, |
| 69596079, |
| 70302264, |
| 47448640, |
| 24975967, |
| 3946200, |
| 76694370, |
| 127871045, |
| 110042576, |
| 69570864, |
| 70255069, |
| 47406744, |
| 24951880, |
| 3936618 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.21.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020244170066477255, |
| "l1_avg": 0.001720605625046624, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007659171385225286, |
| "l1_avg": 0.0006136181867784924, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 2.8834401435751977, |
| "l1_avg": 2.415666232638889, |
| "l0_avg": 0.8729031155433183 |
| }, |
| "merged": { |
| "l2_avg": 2.8833010777070447, |
| "l1_avg": 2.415836226851852, |
| "l0_avg": 0.8597204768804856 |
| }, |
| "diff": { |
| "l2_avg": 0.17085154522390056, |
| "l1_avg": 0.03273703492717978, |
| "l0_avg": 0.04641446148907697 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 49277454, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 67461442, |
| 90088993, |
| 110579461, |
| 58503509, |
| 85544861, |
| 59386540, |
| 43146535, |
| 16241616, |
| 67475185, |
| 90089663, |
| 110547151, |
| 58481367, |
| 85490866, |
| 59340315, |
| 43084399, |
| 16221297 |
| ], |
| "fp4_dist_after": [ |
| 74459322, |
| 126155441, |
| 109737288, |
| 71473646, |
| 72599817, |
| 48313271, |
| 24485597, |
| 3727863, |
| 74473091, |
| 126169367, |
| 109681480, |
| 71435828, |
| 72535181, |
| 48272626, |
| 24444969, |
| 3718413 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.21.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010719845584892378, |
| "l1_avg": 0.009263752566443548, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008575750984630596, |
| "l1_avg": 0.0007037789457374149, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019388441244761148, |
| "l1_avg": 0.018510713930483216, |
| "l0_avg": 0.8803407565458321 |
| }, |
| "merged": { |
| "l2_avg": 0.019390572441948785, |
| "l1_avg": 0.018510719581886576, |
| "l0_avg": 0.8662978457227165 |
| }, |
| "diff": { |
| "l2_avg": 0.0013935221566094293, |
| "l1_avg": 0.00028024879502661434, |
| "l0_avg": 0.049886514169198495 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 105927348, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 127060646, |
| 173731316, |
| 210794589, |
| 122023649, |
| 177038021, |
| 130139070, |
| 89754173, |
| 30300610, |
| 127019771, |
| 173831259, |
| 211019942, |
| 122206626, |
| 177405368, |
| 130503804, |
| 90094291, |
| 30443265 |
| ], |
| "fp4_dist_after": [ |
| 141953493, |
| 240937244, |
| 212668221, |
| 145324261, |
| 151713497, |
| 106970185, |
| 54217982, |
| 7043869, |
| 141945169, |
| 241087923, |
| 212964686, |
| 145587090, |
| 152092374, |
| 107322815, |
| 54449945, |
| 7087646 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.22.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020199546243924297, |
| "l1_avg": 0.0017175926102532281, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000768431567220983, |
| "l1_avg": 0.0006156521124972237, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 3.130351112943868, |
| "l1_avg": 2.7752736786265433, |
| "l0_avg": 0.874039623119213 |
| }, |
| "merged": { |
| "l2_avg": 3.1302674336542378, |
| "l1_avg": 2.7755164930555556, |
| "l0_avg": 0.8609791715645496 |
| }, |
| "diff": { |
| "l2_avg": 0.19619268817821656, |
| "l1_avg": 0.038364721227575234, |
| "l0_avg": 0.045864537556966144 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 48693609, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66858935, |
| 91135646, |
| 109806186, |
| 60003011, |
| 85749492, |
| 59755892, |
| 42189413, |
| 15455563, |
| 66871081, |
| 91104027, |
| 109771737, |
| 59970033, |
| 85701124, |
| 59710301, |
| 42158555, |
| 15442204 |
| ], |
| "fp4_dist_after": [ |
| 73801399, |
| 125295474, |
| 108996218, |
| 71667252, |
| 73185977, |
| 49467281, |
| 24914861, |
| 3632722, |
| 73794679, |
| 125257936, |
| 108952091, |
| 71626832, |
| 73147128, |
| 49423737, |
| 24890839, |
| 3628774 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.22.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010755526872263079, |
| "l1_avg": 0.009294064839680989, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008589188496840976, |
| "l1_avg": 0.000703904777765274, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019145807955000135, |
| "l1_avg": 0.018444686701268326, |
| "l0_avg": 0.8810588426943179 |
| }, |
| "merged": { |
| "l2_avg": 0.019146772225697835, |
| "l1_avg": 0.018444690468870564, |
| "l0_avg": 0.8671551504252869 |
| }, |
| "diff": { |
| "l2_avg": 0.0013634593122535282, |
| "l1_avg": 0.00027582053784970886, |
| "l0_avg": 0.0494805281839253 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 105065291, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 126255630, |
| 173030622, |
| 210470288, |
| 122158008, |
| 177476582, |
| 130580156, |
| 90172639, |
| 30522305, |
| 126300027, |
| 173168748, |
| 210714619, |
| 122391758, |
| 177849236, |
| 131010036, |
| 90570207, |
| 30695539 |
| ], |
| "fp4_dist_after": [ |
| 141046908, |
| 240231131, |
| 212548588, |
| 145884898, |
| 152409717, |
| 107391090, |
| 54192151, |
| 6996050, |
| 141031382, |
| 240444550, |
| 212861253, |
| 146213500, |
| 152790172, |
| 107811373, |
| 54468590, |
| 7045047 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.24.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010819099471644836, |
| "l1_avg": 0.009368165996339587, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008699767058715224, |
| "l1_avg": 0.0007025087252259254, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.004480044188176106, |
| "l1_avg": 0.0029819644159740873, |
| "l0_avg": 0.9999918619791667 |
| }, |
| "merged": { |
| "l2_avg": 0.004480137253354184, |
| "l1_avg": 0.002982382641898261, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3591565616980094e-05, |
| "l1_avg": 4.174705698258347e-05, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474558, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9453, |
| 28858, |
| 7473, |
| 0, |
| 0, |
| 0, |
| 0, |
| 44, |
| 9535, |
| 29180, |
| 7584, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 126, |
| 8111, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 123, |
| 8024, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3056, |
| 548800, |
| 178586, |
| 6773, |
| 53, |
| 0, |
| 0, |
| 0, |
| 3167, |
| 548833, |
| 178549, |
| 6684, |
| 59, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3129, |
| 547571, |
| 179682, |
| 6859, |
| 53, |
| 0, |
| 0, |
| 0, |
| 3097, |
| 547498, |
| 179857, |
| 6755, |
| 59, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.24.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009028850670329571, |
| "l1_avg": 0.00781028438359499, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008391099068960002, |
| "l1_avg": 0.0006794935713211695, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.64126058691858, |
| "l1_avg": 0.47400021023220484, |
| "l0_avg": 0.9999998304578993 |
| }, |
| "merged": { |
| "l2_avg": 0.6412482896137874, |
| "l1_avg": 0.47400025261773004, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.324666491555505e-05, |
| "l1_avg": 3.3978688427143626e-05, |
| "l0_avg": 0.9996036105685764 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11791804, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 57, |
| 16549, |
| 48088, |
| 1079, |
| 0, |
| 0, |
| 0, |
| 0, |
| 67, |
| 16241, |
| 47868, |
| 1123, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 628, |
| 45482, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 641, |
| 45409, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 148, |
| 35798, |
| 107259, |
| 427080, |
| 4724309, |
| 562138, |
| 39971, |
| 0, |
| 126, |
| 35494, |
| 107339, |
| 426547, |
| 4727643, |
| 562594, |
| 40034, |
| 0 |
| ], |
| "merged": [ |
| 144, |
| 35578, |
| 106953, |
| 425685, |
| 4720793, |
| 566924, |
| 40585, |
| 0, |
| 142, |
| 35354, |
| 107026, |
| 425167, |
| 4724099, |
| 567396, |
| 40634, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.24.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010786800750010466, |
| "l1_avg": 0.009334439039230346, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008458328373736711, |
| "l1_avg": 0.0006751060718670487, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01720783228866994, |
| "l1_avg": 0.012095442083146836, |
| "l0_avg": 0.9999974568684896 |
| }, |
| "merged": { |
| "l2_avg": 0.017204519169071272, |
| "l1_avg": 0.01209552420510186, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2252130662390074e-05, |
| "l1_avg": 4.017627539320125e-05, |
| "l0_avg": 0.9999899970160591 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796362, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 37, |
| 9548, |
| 28896, |
| 7276, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9630, |
| 29223, |
| 7517, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 919, |
| 64654, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 974, |
| 64525, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6348, |
| 1518333, |
| 2782556, |
| 1550571, |
| 38346, |
| 0, |
| 0, |
| 0, |
| 6276, |
| 1518947, |
| 2782292, |
| 1554702, |
| 38109, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6381, |
| 1512144, |
| 2780437, |
| 1558274, |
| 38842, |
| 0, |
| 0, |
| 0, |
| 6332, |
| 1512651, |
| 2780509, |
| 1562307, |
| 38603, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.24.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010778310221401091, |
| "l1_avg": 0.009323243962393866, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008590805809944868, |
| "l1_avg": 0.0006984431529417634, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.07881692287582331, |
| "l1_avg": 0.05986563894483778, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.07881124943408119, |
| "l1_avg": 0.059865575366550024, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3353423557593474e-05, |
| "l1_avg": 4.149440210312605e-05, |
| "l0_avg": 0.9999525282118056 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474490, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 41, |
| 9621, |
| 28897, |
| 7421, |
| 0, |
| 0, |
| 0, |
| 0, |
| 28, |
| 9668, |
| 29100, |
| 7384, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 106, |
| 8114, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 116, |
| 8048, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 131, |
| 34121, |
| 100706, |
| 321973, |
| 281355, |
| 3, |
| 0, |
| 0, |
| 123, |
| 34380, |
| 100260, |
| 320729, |
| 280777, |
| 2, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 130, |
| 33935, |
| 100367, |
| 321372, |
| 282457, |
| 3, |
| 0, |
| 0, |
| 136, |
| 34205, |
| 100012, |
| 320093, |
| 281848, |
| 2, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.25.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010779846385859507, |
| "l1_avg": 0.0093255877494812, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007874944130890071, |
| "l1_avg": 0.0006364521686919034, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.029491209878100347, |
| "l1_avg": 0.011532900068495009, |
| "l0_avg": 0.9999857584635417 |
| }, |
| "merged": { |
| "l2_avg": 0.02949079520793979, |
| "l1_avg": 0.011533357037438286, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.864190074733019e-05, |
| "l1_avg": 3.7881144736376074e-05, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474549, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 47, |
| 9568, |
| 28982, |
| 7466, |
| 0, |
| 0, |
| 0, |
| 0, |
| 49, |
| 9712, |
| 28986, |
| 7350, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 127, |
| 8116, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 121, |
| 8020, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4528, |
| 401078, |
| 199214, |
| 105009, |
| 26824, |
| 17, |
| 0, |
| 0, |
| 4679, |
| 401594, |
| 199290, |
| 105455, |
| 26860, |
| 12, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4356, |
| 400521, |
| 199604, |
| 105230, |
| 26956, |
| 17, |
| 0, |
| 0, |
| 4394, |
| 401128, |
| 199676, |
| 105632, |
| 27034, |
| 12, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.25.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009032682740821522, |
| "l1_avg": 0.007818262092769146, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008361176847147294, |
| "l1_avg": 0.0006787609722879198, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.8117830475169391, |
| "l1_avg": 0.5716776106092665, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.8117789247211127, |
| "l1_avg": 0.5716776106092665, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.299274735454313e-05, |
| "l1_avg": 3.368981803456942e-05, |
| "l0_avg": 0.9995328267415364 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11790969, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 67, |
| 16268, |
| 48228, |
| 1075, |
| 0, |
| 0, |
| 0, |
| 0, |
| 61, |
| 16273, |
| 48055, |
| 1045, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 643, |
| 45208, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 626, |
| 45683, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 137, |
| 32221, |
| 97154, |
| 386218, |
| 4419807, |
| 825612, |
| 137463, |
| 0, |
| 122, |
| 32142, |
| 97522, |
| 386448, |
| 4418142, |
| 825185, |
| 138307, |
| 0 |
| ], |
| "merged": [ |
| 124, |
| 32070, |
| 96902, |
| 384943, |
| 4415067, |
| 830423, |
| 139079, |
| 0, |
| 108, |
| 32008, |
| 97230, |
| 385220, |
| 4413547, |
| 829771, |
| 139988, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.25.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010795721655983448, |
| "l1_avg": 0.009343740675184462, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008427170986702779, |
| "l1_avg": 0.0006810717168264091, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014872922635845375, |
| "l1_avg": 0.009592245684729682, |
| "l0_avg": 0.9999960157606337 |
| }, |
| "merged": { |
| "l2_avg": 0.014870393647133955, |
| "l1_avg": 0.009592440393235948, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3259080435234624e-05, |
| "l1_avg": 4.0632194011575644e-05, |
| "l0_avg": 0.9999921162923177 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796387, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 39, |
| 9529, |
| 28952, |
| 7406, |
| 0, |
| 0, |
| 0, |
| 0, |
| 49, |
| 9662, |
| 29082, |
| 7441, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 961, |
| 64449, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 950, |
| 64712, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 13231, |
| 2358957, |
| 2352431, |
| 1148268, |
| 28244, |
| 0, |
| 0, |
| 0, |
| 13422, |
| 2357763, |
| 2349286, |
| 1146287, |
| 28591, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 13303, |
| 2352283, |
| 2353138, |
| 1153798, |
| 28615, |
| 0, |
| 0, |
| 0, |
| 13322, |
| 2351109, |
| 2350045, |
| 1151893, |
| 28974, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.25.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010797007290553195, |
| "l1_avg": 0.009336858325534396, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000804371724370867, |
| "l1_avg": 0.0006448579370044172, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0981751207701186, |
| "l1_avg": 0.07090628412034776, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.0981684169358563, |
| "l1_avg": 0.070906310611301, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.112263565455176e-05, |
| "l1_avg": 3.87831823900342e-05, |
| "l0_avg": 0.9999423556857638 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474475, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9550, |
| 28953, |
| 7373, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9737, |
| 28901, |
| 7562, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 107, |
| 8131, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 119, |
| 8027, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 97, |
| 29859, |
| 88874, |
| 294969, |
| 323442, |
| 129, |
| 0, |
| 0, |
| 104, |
| 30424, |
| 88683, |
| 294333, |
| 323525, |
| 117, |
| 4, |
| 0 |
| ], |
| "merged": [ |
| 119, |
| 29719, |
| 88580, |
| 294425, |
| 324419, |
| 130, |
| 0, |
| 0, |
| 101, |
| 30254, |
| 88414, |
| 293660, |
| 324616, |
| 119, |
| 4, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010698106500110372, |
| "l1_avg": 0.009250824981265598, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008520695274535441, |
| "l1_avg": 0.0006950206226772732, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018361657195621068, |
| "l1_avg": 0.019673968656563463, |
| "l0_avg": 0.8730530420939128 |
| }, |
| "merged": { |
| "l2_avg": 0.01836204926172892, |
| "l1_avg": 0.019674011983989197, |
| "l0_avg": 0.8583575453581633 |
| }, |
| "diff": { |
| "l2_avg": 0.0013679358694288467, |
| "l1_avg": 0.00030545063960699386, |
| "l0_avg": 0.0517106600160952 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 109800678, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 134746115, |
| 173786862, |
| 208954748, |
| 120467095, |
| 174941392, |
| 129211867, |
| 89239649, |
| 29692360, |
| 134808790, |
| 173928620, |
| 209198230, |
| 120636129, |
| 175183344, |
| 129419362, |
| 89397777, |
| 29754060 |
| ], |
| "fp4_dist_after": [ |
| 150382730, |
| 240676499, |
| 210272048, |
| 142197683, |
| 148956676, |
| 106231642, |
| 55099365, |
| 7262012, |
| 150376099, |
| 240903601, |
| 210542298, |
| 142412652, |
| 149164837, |
| 106404647, |
| 55202514, |
| 7281097 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.23.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020084456154882075, |
| "l1_avg": 0.001710525651772817, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007948405294762041, |
| "l1_avg": 0.0006400759021441142, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 3.2615487301845296, |
| "l1_avg": 3.0401063368055556, |
| "l0_avg": 0.8769792128197941 |
| }, |
| "merged": { |
| "l2_avg": 3.2615412973536455, |
| "l1_avg": 3.040370611496914, |
| "l0_avg": 0.863701432781455 |
| }, |
| "diff": { |
| "l2_avg": 0.20961200497534702, |
| "l1_avg": 0.042712794174382715, |
| "l0_avg": 0.04691104370870708 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 49804667, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65306874, |
| 89870303, |
| 108599758, |
| 60715197, |
| 86883361, |
| 61441356, |
| 42854979, |
| 15253611, |
| 65302229, |
| 89855639, |
| 108591982, |
| 60677162, |
| 86865998, |
| 61415796, |
| 42817728, |
| 15231227 |
| ], |
| "fp4_dist_after": [ |
| 72339490, |
| 123453660, |
| 108421436, |
| 72051315, |
| 74275641, |
| 50906422, |
| 25784624, |
| 3677324, |
| 72366409, |
| 123427329, |
| 108425306, |
| 72013078, |
| 74239467, |
| 50864048, |
| 25764683, |
| 3672968 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.23.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010744322463777876, |
| "l1_avg": 0.009288077221976386, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008544443583848801, |
| "l1_avg": 0.0007008546756373512, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019668798976474337, |
| "l1_avg": 0.019160295651282792, |
| "l0_avg": 0.8816407719364873 |
| }, |
| "merged": { |
| "l2_avg": 0.019669885105556912, |
| "l1_avg": 0.019160282464674962, |
| "l0_avg": 0.8672053725631149 |
| }, |
| "diff": { |
| "l2_avg": 0.001398082905345493, |
| "l1_avg": 0.000294346927124777, |
| "l0_avg": 0.05148156578158155 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 109314227, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125658923, |
| 170924926, |
| 210117193, |
| 121268157, |
| 178292705, |
| 132174931, |
| 91754092, |
| 30600924, |
| 125661085, |
| 171001664, |
| 210346415, |
| 121421779, |
| 178638493, |
| 132563404, |
| 92158828, |
| 30782881 |
| ], |
| "fp4_dist_after": [ |
| 140982415, |
| 239576920, |
| 212657899, |
| 145069813, |
| 152106653, |
| 107705067, |
| 55418673, |
| 7277347, |
| 140989235, |
| 239738600, |
| 212919056, |
| 145311221, |
| 152478545, |
| 108102331, |
| 55700389, |
| 7332236 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.24.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002013011983939422, |
| "l1_avg": 0.001713620788521237, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007806835782810662, |
| "l1_avg": 0.0006285067233774397, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 3.573751124651164, |
| "l1_avg": 3.338803047839506, |
| "l0_avg": 0.8758075412703149 |
| }, |
| "merged": { |
| "l2_avg": 3.5737295454647264, |
| "l1_avg": 3.3391372492283953, |
| "l0_avg": 0.8620955073980637 |
| }, |
| "diff": { |
| "l2_avg": 0.2315679580139064, |
| "l1_avg": 0.04794540593653549, |
| "l0_avg": 0.048482144202715086 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 51472678, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65922572, |
| 89681799, |
| 108941894, |
| 60132987, |
| 86545875, |
| 61438476, |
| 42913399, |
| 15271440, |
| 65930475, |
| 89673427, |
| 108943354, |
| 60141935, |
| 86539633, |
| 61422031, |
| 42921754, |
| 15262149 |
| ], |
| "fp4_dist_after": [ |
| 73202656, |
| 123916385, |
| 108698915, |
| 71376591, |
| 73368837, |
| 50493703, |
| 25954451, |
| 3838285, |
| 73208227, |
| 123919050, |
| 108685871, |
| 71367475, |
| 73374475, |
| 50487982, |
| 25956557, |
| 3833740 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.24.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010761263467210153, |
| "l1_avg": 0.009320998191833496, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008564608687466087, |
| "l1_avg": 0.0007027107808325026, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01953603161705865, |
| "l1_avg": 0.01901042608567226, |
| "l0_avg": 0.8821097790753399 |
| }, |
| "merged": { |
| "l2_avg": 0.019537376032935247, |
| "l1_avg": 0.019010456226490162, |
| "l0_avg": 0.8678220767739379 |
| }, |
| "diff": { |
| "l2_avg": 0.0013940725061628554, |
| "l1_avg": 0.00029004514953236523, |
| "l0_avg": 0.050966728116259165 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 108221038, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125161636, |
| 171272210, |
| 210042987, |
| 121712205, |
| 178386487, |
| 132188528, |
| 91590181, |
| 30588126, |
| 125162498, |
| 171356984, |
| 210221640, |
| 121883112, |
| 178693095, |
| 132492108, |
| 91898292, |
| 30716311 |
| ], |
| "fp4_dist_after": [ |
| 140339326, |
| 239447011, |
| 212570648, |
| 145505670, |
| 152558498, |
| 108025866, |
| 55305514, |
| 7203157, |
| 140322835, |
| 239587837, |
| 212819785, |
| 145726454, |
| 152858080, |
| 108334155, |
| 55523192, |
| 7238372 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.5.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010772961133401975, |
| "l1_avg": 0.00931628942489624, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008557713008485734, |
| "l1_avg": 0.0006940073799341917, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01818343104656436, |
| "l1_avg": 0.012079694535997179, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.018182711656664612, |
| "l1_avg": 0.012079849508073595, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.297912581738216e-05, |
| "l1_avg": 4.13146439111895e-05, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474554, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9733, |
| 28980, |
| 7307, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9605, |
| 28917, |
| 7550, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 94, |
| 8079, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 117, |
| 8094, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 928, |
| 216397, |
| 330194, |
| 181223, |
| 7779, |
| 0, |
| 0, |
| 0, |
| 945, |
| 216012, |
| 332320, |
| 180912, |
| 7850, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 930, |
| 215574, |
| 330281, |
| 181905, |
| 7881, |
| 0, |
| 0, |
| 0, |
| 940, |
| 215062, |
| 332426, |
| 181603, |
| 7958, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.5.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009034230320415282, |
| "l1_avg": 0.00781539548188448, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008243071442304066, |
| "l1_avg": 0.0006668103237946828, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.055528324297795334, |
| "l1_avg": 0.035152064429389104, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.055524441406036386, |
| "l1_avg": 0.03515208032396105, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.214325198015814e-05, |
| "l1_avg": 3.280039462778303e-05, |
| "l0_avg": 0.9999695671929254 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796121, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 62, |
| 16184, |
| 48135, |
| 1108, |
| 0, |
| 0, |
| 0, |
| 0, |
| 60, |
| 16562, |
| 47872, |
| 1089, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 666, |
| 45441, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 643, |
| 45410, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2262, |
| 566056, |
| 1552728, |
| 2919938, |
| 857681, |
| 257, |
| 20, |
| 0, |
| 2279, |
| 566163, |
| 1553850, |
| 2916014, |
| 858978, |
| 240, |
| 14, |
| 0 |
| ], |
| "merged": [ |
| 2246, |
| 563674, |
| 1548928, |
| 2921125, |
| 862757, |
| 263, |
| 20, |
| 0, |
| 2260, |
| 563687, |
| 1549955, |
| 2917089, |
| 864218, |
| 244, |
| 14, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.5.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01078247184484953, |
| "l1_avg": 0.009334115187327068, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008375995492987441, |
| "l1_avg": 0.0006764185964129865, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.016304067018435165, |
| "l1_avg": 0.011417382293277316, |
| "l0_avg": 0.9999970330132378 |
| }, |
| "merged": { |
| "l2_avg": 0.01630116262428481, |
| "l1_avg": 0.011417514748043485, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2757045945050874e-05, |
| "l1_avg": 4.037901655667358e-05, |
| "l0_avg": 0.9999915228949653 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796380, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 30, |
| 9672, |
| 29137, |
| 7409, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9500, |
| 28950, |
| 7423, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 959, |
| 64830, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 867, |
| 64416, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7319, |
| 1667380, |
| 2749498, |
| 1447883, |
| 27246, |
| 0, |
| 0, |
| 0, |
| 7142, |
| 1664817, |
| 2748608, |
| 1449325, |
| 27262, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7216, |
| 1660681, |
| 2748577, |
| 1454993, |
| 27651, |
| 0, |
| 0, |
| 0, |
| 7228, |
| 1658503, |
| 2747368, |
| 1456601, |
| 27662, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.5.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010798238735272416, |
| "l1_avg": 0.009350393878089057, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008171678055077791, |
| "l1_avg": 0.0006584478542208672, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.034579665115546755, |
| "l1_avg": 0.026100669966803658, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.03457720222610831, |
| "l1_avg": 0.026100709703233506, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1156933804964536e-05, |
| "l1_avg": 3.880331189268165e-05, |
| "l0_avg": 0.9999762641059028 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474525, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9575, |
| 29244, |
| 7435, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9538, |
| 28762, |
| 7538, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 99, |
| 8019, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 122, |
| 8144, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 310, |
| 82438, |
| 218614, |
| 379322, |
| 55991, |
| 0, |
| 0, |
| 0, |
| 333, |
| 82834, |
| 218164, |
| 380475, |
| 56079, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 333, |
| 82024, |
| 218037, |
| 379684, |
| 56561, |
| 0, |
| 0, |
| 0, |
| 317, |
| 82537, |
| 217645, |
| 380787, |
| 56635, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.6.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01078786491301719, |
| "l1_avg": 0.00933735900455051, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008475874783471227, |
| "l1_avg": 0.0006894966936670244, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.013932914253176387, |
| "l1_avg": 0.009702063931359185, |
| "l0_avg": 0.9999952528211805 |
| }, |
| "merged": { |
| "l2_avg": 0.013932265545690369, |
| "l1_avg": 0.009702152676052518, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.239760957379147e-05, |
| "l1_avg": 4.095471878018644e-05, |
| "l0_avg": 0.9999905056423611 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474546, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 46, |
| 9716, |
| 28998, |
| 7467, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9579, |
| 29051, |
| 7264, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 106, |
| 8126, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 109, |
| 8043, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1019, |
| 228131, |
| 369319, |
| 136403, |
| 2537, |
| 0, |
| 0, |
| 0, |
| 972, |
| 228052, |
| 369627, |
| 136194, |
| 2306, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1006, |
| 227271, |
| 369252, |
| 137325, |
| 2557, |
| 0, |
| 0, |
| 0, |
| 996, |
| 227181, |
| 369567, |
| 137079, |
| 2326, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.6.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009038333052700014, |
| "l1_avg": 0.007824741303920746, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008314129847207915, |
| "l1_avg": 0.0006738426370753182, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.042997481780791616, |
| "l1_avg": 0.028255184491475422, |
| "l0_avg": 0.9999994913736979 |
| }, |
| "merged": { |
| "l2_avg": 0.042993119080897695, |
| "l1_avg": 0.028255216280619302, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.298137169509308e-05, |
| "l1_avg": 3.362005938672357e-05, |
| "l0_avg": 0.9999747382269966 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796182, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 52, |
| 16294, |
| 48205, |
| 1131, |
| 0, |
| 0, |
| 0, |
| 0, |
| 62, |
| 16297, |
| 47884, |
| 1147, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 613, |
| 45274, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 613, |
| 45660, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2460, |
| 643759, |
| 1752393, |
| 2977926, |
| 522307, |
| 163, |
| 15, |
| 0, |
| 2552, |
| 643891, |
| 1749930, |
| 2978672, |
| 522256, |
| 143, |
| 13, |
| 0 |
| ], |
| "merged": [ |
| 2569, |
| 640898, |
| 1748018, |
| 2981181, |
| 526208, |
| 164, |
| 15, |
| 0, |
| 2629, |
| 641077, |
| 1745499, |
| 2981873, |
| 526190, |
| 145, |
| 14, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.6.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01078013853983626, |
| "l1_avg": 0.009327860010994806, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008580438165948772, |
| "l1_avg": 0.0006974077550694346, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014632543202891818, |
| "l1_avg": 0.01064317226409912, |
| "l0_avg": 0.9999967787000869 |
| }, |
| "merged": { |
| "l2_avg": 0.014629896483480611, |
| "l1_avg": 0.010643288824293348, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.293156383268551e-05, |
| "l1_avg": 4.125621376766099e-05, |
| "l0_avg": 0.9999927096896701 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796394, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9671, |
| 28884, |
| 7435, |
| 0, |
| 0, |
| 0, |
| 0, |
| 48, |
| 9613, |
| 29016, |
| 7457, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 817, |
| 64739, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 857, |
| 64659, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7553, |
| 1694106, |
| 2836149, |
| 1350586, |
| 10707, |
| 0, |
| 0, |
| 0, |
| 7653, |
| 1694423, |
| 2835491, |
| 1349123, |
| 10689, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7655, |
| 1687523, |
| 2834732, |
| 1358312, |
| 10910, |
| 0, |
| 0, |
| 0, |
| 7631, |
| 1687866, |
| 2834246, |
| 1356677, |
| 10928, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.6.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01077620624155237, |
| "l1_avg": 0.009323281712002224, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008347448310814798, |
| "l1_avg": 0.0006749710300937295, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.04069512640017998, |
| "l1_avg": 0.03152393500010173, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.04069237135676478, |
| "l1_avg": 0.03152395354376899, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.181884822636093e-05, |
| "l1_avg": 3.9833618534935845e-05, |
| "l0_avg": 0.999981689453125 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474533, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 44, |
| 9703, |
| 29011, |
| 7342, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9663, |
| 28960, |
| 7401, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 107, |
| 8102, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 101, |
| 8074, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 232, |
| 64683, |
| 178903, |
| 402958, |
| 89095, |
| 0, |
| 0, |
| 0, |
| 249, |
| 64275, |
| 180610, |
| 403782, |
| 89773, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 228, |
| 64366, |
| 178447, |
| 402860, |
| 89939, |
| 0, |
| 0, |
| 0, |
| 245, |
| 64040, |
| 180092, |
| 403722, |
| 90621, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.7.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010770072578760842, |
| "l1_avg": 0.009317102697160509, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008315400336869061, |
| "l1_avg": 0.0006750680622644722, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.016864327568554518, |
| "l1_avg": 0.010634958744049072, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "merged": { |
| "l2_avg": 0.016863634880899956, |
| "l1_avg": 0.010635082589255438, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.115281986760286e-05, |
| "l1_avg": 3.961154984103309e-05, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474549, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9660, |
| 28864, |
| 7485, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9668, |
| 29091, |
| 7323, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 98, |
| 8078, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 99, |
| 8109, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1306, |
| 269723, |
| 302990, |
| 157445, |
| 5569, |
| 1, |
| 1, |
| 0, |
| 1310, |
| 269006, |
| 303638, |
| 157877, |
| 5693, |
| 1, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1323, |
| 268868, |
| 303049, |
| 158167, |
| 5634, |
| 1, |
| 1, |
| 0, |
| 1367, |
| 268008, |
| 303819, |
| 158561, |
| 5761, |
| 1, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.7.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009038340296689602, |
| "l1_avg": 0.007817857898771763, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008301829143818372, |
| "l1_avg": 0.0006727387507756551, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.060754043533155895, |
| "l1_avg": 0.0396838903427124, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.060748921137048266, |
| "l1_avg": 0.039683898289998375, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3314272429365124e-05, |
| "l1_avg": 3.361018736743265e-05, |
| "l0_avg": 0.9999666002061632 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796086, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 79, |
| 16534, |
| 47602, |
| 1411, |
| 0, |
| 0, |
| 0, |
| 0, |
| 55, |
| 16145, |
| 47800, |
| 1446, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 600, |
| 45538, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 649, |
| 45373, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1839, |
| 473183, |
| 1331765, |
| 3013416, |
| 1075628, |
| 368, |
| 34, |
| 0, |
| 1876, |
| 473981, |
| 1330237, |
| 3019408, |
| 1074333, |
| 382, |
| 30, |
| 0 |
| ], |
| "merged": [ |
| 1823, |
| 471181, |
| 1328012, |
| 3012941, |
| 1081870, |
| 375, |
| 34, |
| 0, |
| 1874, |
| 471932, |
| 1326675, |
| 3018709, |
| 1080636, |
| 388, |
| 30, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.7.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010777694499041481, |
| "l1_avg": 0.009327399730682372, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008313311933995106, |
| "l1_avg": 0.0006735086208209395, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014769896056196693, |
| "l1_avg": 0.010669123464160496, |
| "l0_avg": 0.9999972873263889 |
| }, |
| "merged": { |
| "l2_avg": 0.014767160483427159, |
| "l1_avg": 0.010669224129782783, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.249858159109325e-05, |
| "l1_avg": 3.975418706734975e-05, |
| "l0_avg": 0.999991946750217 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796385, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9527, |
| 29170, |
| 7256, |
| 0, |
| 0, |
| 0, |
| 0, |
| 37, |
| 9632, |
| 29190, |
| 7299, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 945, |
| 64448, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 895, |
| 64784, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8132, |
| 1703870, |
| 2830265, |
| 1343477, |
| 12182, |
| 0, |
| 0, |
| 0, |
| 7822, |
| 1706287, |
| 2827607, |
| 1344761, |
| 12077, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7834, |
| 1697664, |
| 2829108, |
| 1350788, |
| 12443, |
| 0, |
| 0, |
| 0, |
| 8007, |
| 1699708, |
| 2826497, |
| 1352105, |
| 12326, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.7.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01081958482421912, |
| "l1_avg": 0.009372735685772366, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008273483836092055, |
| "l1_avg": 0.0006685453117825091, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.037099895269448194, |
| "l1_avg": 0.02862698237101237, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.03709730986382595, |
| "l1_avg": 0.028626969125535754, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1544604142461115e-05, |
| "l1_avg": 3.934708527392811e-05, |
| "l0_avg": 0.9999742296006945 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474522, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9306, |
| 29029, |
| 7453, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9671, |
| 29058, |
| 7578, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 120, |
| 8027, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 8117, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 299, |
| 70518, |
| 195966, |
| 403118, |
| 67195, |
| 0, |
| 0, |
| 0, |
| 271, |
| 70909, |
| 196039, |
| 402982, |
| 67263, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 248, |
| 70211, |
| 195470, |
| 403269, |
| 67877, |
| 0, |
| 0, |
| 0, |
| 269, |
| 70594, |
| 195557, |
| 403146, |
| 67919, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020404646173286848, |
| "l1_avg": 0.0017304119136598375, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007539397580907955, |
| "l1_avg": 0.0006055277254846361, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.1028455483853657, |
| "l1_avg": 0.08412422839506173, |
| "l0_avg": 0.8775528161319686 |
| }, |
| "merged": { |
| "l2_avg": 0.10285520656985113, |
| "l1_avg": 0.08412446952160493, |
| "l0_avg": 0.8637402795862269 |
| }, |
| "diff": { |
| "l2_avg": 0.006441677028275048, |
| "l1_avg": 0.0012037416152012202, |
| "l0_avg": 0.049160565034842785 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 52192946, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65005712, |
| 87549511, |
| 108937004, |
| 59473771, |
| 87420821, |
| 62149225, |
| 44306515, |
| 16089606, |
| 64994406, |
| 87542158, |
| 108895637, |
| 59452136, |
| 87382108, |
| 62140549, |
| 44273977, |
| 16070064 |
| ], |
| "fp4_dist_after": [ |
| 72333669, |
| 123954563, |
| 109090314, |
| 72506531, |
| 73904624, |
| 50137580, |
| 25360502, |
| 3637083, |
| 72330987, |
| 123932682, |
| 109060389, |
| 72475643, |
| 73855364, |
| 50127848, |
| 25347879, |
| 3627542 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.5.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020420263539346773, |
| "l1_avg": 0.0017315245336956449, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007704136689910274, |
| "l1_avg": 0.0006190724670886993, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.13272244151676074, |
| "l1_avg": 0.11090077341338735, |
| "l0_avg": 0.8797932547110099 |
| }, |
| "merged": { |
| "l2_avg": 0.13274240226421552, |
| "l1_avg": 0.11090159475067515, |
| "l0_avg": 0.8664019992027754 |
| }, |
| "diff": { |
| "l2_avg": 0.008254620438882938, |
| "l1_avg": 0.001547347410225574, |
| "l0_avg": 0.04779431660970052 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50742423, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 63815425, |
| 87166244, |
| 107880223, |
| 60328240, |
| 88097326, |
| 63073430, |
| 44533538, |
| 16015264, |
| 63806057, |
| 87161268, |
| 107869781, |
| 60296843, |
| 88067120, |
| 63050069, |
| 44509020, |
| 16013352 |
| ], |
| "fp4_dist_after": [ |
| 70919074, |
| 122494812, |
| 108372830, |
| 73344027, |
| 75270185, |
| 51407656, |
| 25585338, |
| 3510729, |
| 70919679, |
| 122487789, |
| 108349235, |
| 73322949, |
| 75239682, |
| 51382414, |
| 25566544, |
| 3510257 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.5.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010741260354916486, |
| "l1_avg": 0.009292061461342706, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008539641106515869, |
| "l1_avg": 0.0006964626411596934, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018294099966684976, |
| "l1_avg": 0.01948886259102527, |
| "l0_avg": 0.8760210192645037 |
| }, |
| "merged": { |
| "l2_avg": 0.018294578128390842, |
| "l1_avg": 0.019488853172019677, |
| "l0_avg": 0.8612912175684799 |
| }, |
| "diff": { |
| "l2_avg": 0.0013610189159711202, |
| "l1_avg": 0.00030463280501189053, |
| "l0_avg": 0.05205302721188392 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 110527649, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 131631832, |
| 172796340, |
| 208177975, |
| 121602685, |
| 176487118, |
| 131146175, |
| 89976493, |
| 29436485, |
| 131620970, |
| 172896771, |
| 208387989, |
| 121718061, |
| 176646594, |
| 131293169, |
| 90086898, |
| 29460845 |
| ], |
| "fp4_dist_after": [ |
| 147270366, |
| 239084680, |
| 210231209, |
| 143074946, |
| 150435834, |
| 107874461, |
| 56022787, |
| 7265362, |
| 147259202, |
| 239264164, |
| 210427490, |
| 143212687, |
| 150591408, |
| 107985875, |
| 56090914, |
| 7275015 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.6.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002043166453588728, |
| "l1_avg": 0.001732067929373847, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007627212626366959, |
| "l1_avg": 0.0006127946078777313, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.17607023169509053, |
| "l1_avg": 0.13871083200713735, |
| "l0_avg": 0.8795213082395954 |
| }, |
| "merged": { |
| "l2_avg": 0.17600171777815124, |
| "l1_avg": 0.13871148003472222, |
| "l0_avg": 0.8656621014630353 |
| }, |
| "diff": { |
| "l2_avg": 0.010531183397795511, |
| "l1_avg": 0.001972601855242694, |
| "l0_avg": 0.04945856447573061 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 52509327, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 63963793, |
| 86830572, |
| 107903538, |
| 59952604, |
| 88198366, |
| 63369227, |
| 44863086, |
| 15928345, |
| 63946410, |
| 86797865, |
| 107869608, |
| 59912762, |
| 88148379, |
| 63315974, |
| 44777077, |
| 15905594 |
| ], |
| "fp4_dist_after": [ |
| 71316557, |
| 122610051, |
| 108482804, |
| 72769607, |
| 74863022, |
| 51305187, |
| 26022601, |
| 3632277, |
| 71307733, |
| 122568460, |
| 108438984, |
| 72717973, |
| 74796796, |
| 51258728, |
| 25968935, |
| 3623485 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.6.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010734328682299897, |
| "l1_avg": 0.009283513493008084, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008575196324022847, |
| "l1_avg": 0.0007001336250040266, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01838554408815172, |
| "l1_avg": 0.019190099268783758, |
| "l0_avg": 0.8756417568819023 |
| }, |
| "merged": { |
| "l2_avg": 0.01838574806849162, |
| "l1_avg": 0.019190082314573687, |
| "l0_avg": 0.8609388511563525 |
| }, |
| "diff": { |
| "l2_avg": 0.0013549397389094034, |
| "l1_avg": 0.00029968202849965035, |
| "l0_avg": 0.051994682123631605 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 110403761, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 132002410, |
| 172129850, |
| 208812735, |
| 120598571, |
| 176337901, |
| 130830325, |
| 90594718, |
| 30000841, |
| 132055705, |
| 172223533, |
| 208961793, |
| 120683230, |
| 176459655, |
| 130948725, |
| 90689615, |
| 30036793 |
| ], |
| "fp4_dist_after": [ |
| 147648638, |
| 239649127, |
| 210760018, |
| 143008312, |
| 150184919, |
| 107120315, |
| 55654386, |
| 7325342, |
| 147629133, |
| 239793190, |
| 210907561, |
| 143118241, |
| 150278122, |
| 107226476, |
| 55727807, |
| 7334813 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.8.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010781431242647382, |
| "l1_avg": 0.009331438276502822, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008318949257954955, |
| "l1_avg": 0.000674999610055238, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.013458143621587156, |
| "l1_avg": 0.009617459774017335, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.013457375539357948, |
| "l1_avg": 0.009617517391840616, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.11229823174316e-05, |
| "l1_avg": 3.997757772190703e-05, |
| "l0_avg": 0.9999952528211805 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474553, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9616, |
| 29204, |
| 7481, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9666, |
| 28749, |
| 7380, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 109, |
| 8047, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 106, |
| 8122, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 973, |
| 228361, |
| 370697, |
| 136769, |
| 1460, |
| 0, |
| 0, |
| 0, |
| 956, |
| 227933, |
| 369430, |
| 136602, |
| 1379, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 959, |
| 227594, |
| 370678, |
| 137622, |
| 1480, |
| 0, |
| 0, |
| 0, |
| 931, |
| 226940, |
| 369439, |
| 137513, |
| 1404, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.8.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009026369933168, |
| "l1_avg": 0.007807998917996883, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000823530030359444, |
| "l1_avg": 0.0006667687661117978, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0722211869562678, |
| "l1_avg": 0.05007237328423394, |
| "l0_avg": 0.9999997456868489 |
| }, |
| "merged": { |
| "l2_avg": 0.07221287028192826, |
| "l1_avg": 0.050072367986043295, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.262449617282679e-05, |
| "l1_avg": 3.323618374350998e-05, |
| "l0_avg": 0.9999551561143664 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795951, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 52, |
| 16185, |
| 48073, |
| 1144, |
| 0, |
| 0, |
| 0, |
| 0, |
| 75, |
| 16568, |
| 47864, |
| 1111, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 662, |
| 45554, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 691, |
| 45253, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1449, |
| 358975, |
| 1025245, |
| 2860808, |
| 1652885, |
| 571, |
| 68, |
| 0, |
| 1352, |
| 358448, |
| 1026447, |
| 2856508, |
| 1653114, |
| 536, |
| 74, |
| 0 |
| ], |
| "merged": [ |
| 1397, |
| 357476, |
| 1022260, |
| 2857298, |
| 1660893, |
| 579, |
| 69, |
| 0, |
| 1400, |
| 356881, |
| 1023498, |
| 2853060, |
| 1661055, |
| 539, |
| 75, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.8.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010776133988418335, |
| "l1_avg": 0.009314872158898247, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008363525952728789, |
| "l1_avg": 0.000680675613693893, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0142865049095491, |
| "l1_avg": 0.010290181636810303, |
| "l0_avg": 0.9999975416395399 |
| }, |
| "merged": { |
| "l2_avg": 0.014284056999527154, |
| "l1_avg": 0.010290318065219455, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.274482426973784e-05, |
| "l1_avg": 4.0372279990050526e-05, |
| "l0_avg": 0.9999910142686632 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796374, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9681, |
| 28610, |
| 7410, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9730, |
| 29234, |
| 7416, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 891, |
| 64639, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 867, |
| 64675, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8315, |
| 1775398, |
| 2832803, |
| 1272740, |
| 10332, |
| 0, |
| 0, |
| 0, |
| 8258, |
| 1775777, |
| 2831332, |
| 1271121, |
| 10404, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8331, |
| 1768437, |
| 2832300, |
| 1280024, |
| 10560, |
| 0, |
| 0, |
| 0, |
| 8253, |
| 1768864, |
| 2830626, |
| 1278474, |
| 10611, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.8.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010814242804461376, |
| "l1_avg": 0.009356847074296739, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008226762292906642, |
| "l1_avg": 0.0006694967742078006, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.045009329619059714, |
| "l1_avg": 0.03469284905327691, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.0450062949874302, |
| "l1_avg": 0.03469282256232368, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.284949844282567e-05, |
| "l1_avg": 4.049704617096318e-05, |
| "l0_avg": 0.9999708387586805 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474517, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9560, |
| 28947, |
| 7415, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9491, |
| 29175, |
| 7503, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 107, |
| 8064, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 108, |
| 8105, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 244, |
| 59510, |
| 166504, |
| 396452, |
| 115004, |
| 0, |
| 0, |
| 0, |
| 228, |
| 59545, |
| 166727, |
| 395518, |
| 114828, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 237, |
| 59170, |
| 166142, |
| 396268, |
| 115878, |
| 0, |
| 0, |
| 0, |
| 244, |
| 59281, |
| 166240, |
| 395399, |
| 115701, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.9.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010794954359114392, |
| "l1_avg": 0.009343953927357991, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008009890443645418, |
| "l1_avg": 0.0006431396468542516, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.021762318832521277, |
| "l1_avg": 0.012549552652570937, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.021761736095288073, |
| "l1_avg": 0.012549648020002578, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.921143411296809e-05, |
| "l1_avg": 3.81711068459683e-05, |
| "l0_avg": 0.9999884711371527 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474543, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 42, |
| 9634, |
| 28868, |
| 7439, |
| 0, |
| 0, |
| 0, |
| 0, |
| 27, |
| 9598, |
| 28931, |
| 7621, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 134, |
| 8123, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 122, |
| 8005, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1122, |
| 254094, |
| 300735, |
| 164258, |
| 17049, |
| 0, |
| 0, |
| 0, |
| 1246, |
| 254283, |
| 300459, |
| 164499, |
| 16815, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1182, |
| 253225, |
| 300908, |
| 164838, |
| 17177, |
| 0, |
| 0, |
| 0, |
| 1218, |
| 253397, |
| 300559, |
| 165133, |
| 16923, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.9.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009037366309362296, |
| "l1_avg": 0.007817314937710762, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008382669209422412, |
| "l1_avg": 0.0006795832680331337, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.16035449099671278, |
| "l1_avg": 0.11237069235907661, |
| "l0_avg": 0.9999996609157986 |
| }, |
| "merged": { |
| "l2_avg": 0.16034452164990837, |
| "l1_avg": 0.11237069235907661, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3380938479368695e-05, |
| "l1_avg": 3.411272644168801e-05, |
| "l0_avg": 0.999906243218316 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795374, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 66, |
| 16343, |
| 48019, |
| 1153, |
| 0, |
| 0, |
| 0, |
| 0, |
| 67, |
| 16303, |
| 47850, |
| 1271, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 661, |
| 45475, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 596, |
| 45428, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 694, |
| 168120, |
| 483178, |
| 1703422, |
| 3538113, |
| 4967, |
| 379, |
| 0, |
| 659, |
| 167906, |
| 482479, |
| 1701481, |
| 3539697, |
| 5030, |
| 355, |
| 0 |
| ], |
| "merged": [ |
| 636, |
| 167491, |
| 481722, |
| 1699091, |
| 3544534, |
| 5015, |
| 386, |
| 0, |
| 674, |
| 167159, |
| 481019, |
| 1697101, |
| 3546219, |
| 5074, |
| 359, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.9.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01081409986891361, |
| "l1_avg": 0.009356022543377347, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008433460086754014, |
| "l1_avg": 0.0006851624348200858, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.013875715841991586, |
| "l1_avg": 0.009847031699286567, |
| "l0_avg": 0.9999978807237413 |
| }, |
| "merged": { |
| "l2_avg": 0.01387366666141514, |
| "l1_avg": 0.009847160180409749, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.367309456732437e-05, |
| "l1_avg": 4.136027095632421e-05, |
| "l0_avg": 0.999991946750217 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796385, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9528, |
| 28927, |
| 7474, |
| 0, |
| 0, |
| 0, |
| 0, |
| 40, |
| 9676, |
| 28970, |
| 7510, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 881, |
| 64865, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 890, |
| 64436, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8018, |
| 1868935, |
| 2845896, |
| 1162408, |
| 11975, |
| 0, |
| 0, |
| 0, |
| 8317, |
| 1871880, |
| 2846253, |
| 1160855, |
| 11943, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8007, |
| 1861764, |
| 2846119, |
| 1169198, |
| 12210, |
| 0, |
| 0, |
| 0, |
| 8106, |
| 1865069, |
| 2846058, |
| 1167784, |
| 12165, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.9.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01086135263321296, |
| "l1_avg": 0.009388493167029487, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000781990063842386, |
| "l1_avg": 0.000628103909548372, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.04543239999180562, |
| "l1_avg": 0.03402172724405925, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.0454295444222909, |
| "l1_avg": 0.03402175108591716, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.202647168106899e-05, |
| "l1_avg": 3.926820225185818e-05, |
| "l0_avg": 0.9999721950954861 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474519, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 45, |
| 9615, |
| 28886, |
| 7506, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9486, |
| 29018, |
| 7568, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 128, |
| 8060, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 126, |
| 8070, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 257, |
| 63799, |
| 174747, |
| 391162, |
| 107767, |
| 0, |
| 0, |
| 0, |
| 257, |
| 63558, |
| 174856, |
| 390152, |
| 108005, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 266, |
| 63495, |
| 174256, |
| 391101, |
| 108587, |
| 0, |
| 0, |
| 0, |
| 251, |
| 63275, |
| 174414, |
| 390100, |
| 108815, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.7.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020420574526100925, |
| "l1_avg": 0.001731216079658932, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000766474057040716, |
| "l1_avg": 0.0006162363621923659, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.23229763422641928, |
| "l1_avg": 0.19562062958140433, |
| "l0_avg": 0.8773027594295549 |
| }, |
| "merged": { |
| "l2_avg": 0.2323404928883717, |
| "l1_avg": 0.19562183521412038, |
| "l0_avg": 0.8637842729356554 |
| }, |
| "diff": { |
| "l2_avg": 0.014872433361769863, |
| "l1_avg": 0.0027718885445300444, |
| "l0_avg": 0.047947522387092496 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50905079, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65134183, |
| 88292541, |
| 108879905, |
| 59758330, |
| 87227183, |
| 61767423, |
| 43964503, |
| 16030232, |
| 65131416, |
| 88257135, |
| 108817267, |
| 59714956, |
| 87140744, |
| 61701648, |
| 43879758, |
| 15985976 |
| ], |
| "fp4_dist_after": [ |
| 72309496, |
| 124037911, |
| 108896720, |
| 72644102, |
| 74176580, |
| 50249439, |
| 25182240, |
| 3554866, |
| 72308453, |
| 123994636, |
| 108805915, |
| 72591828, |
| 74091895, |
| 50172636, |
| 25121442, |
| 3545041 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.7.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.01074353722222365, |
| "l1_avg": 0.0092946277724372, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008572226680945963, |
| "l1_avg": 0.0006999401582611931, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01874063809712728, |
| "l1_avg": 0.019104623676818096, |
| "l0_avg": 0.8777095333146461 |
| }, |
| "merged": { |
| "l2_avg": 0.018741254011789957, |
| "l1_avg": 0.019104676423249423, |
| "l0_avg": 0.8630692733953028 |
| }, |
| "diff": { |
| "l2_avg": 0.0013544018897745345, |
| "l1_avg": 0.0002954027093487021, |
| "l0_avg": 0.05197732713487413 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 110366910, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 129802701, |
| 171236183, |
| 209976059, |
| 119996925, |
| 176968510, |
| 131038757, |
| 91572054, |
| 30680170, |
| 129864767, |
| 171271913, |
| 210085142, |
| 120099045, |
| 177123335, |
| 131181666, |
| 91715945, |
| 30753228 |
| ], |
| "fp4_dist_after": [ |
| 145391619, |
| 240378705, |
| 212020382, |
| 143621625, |
| 150533347, |
| 106629131, |
| 55355674, |
| 7384824, |
| 145362485, |
| 240451381, |
| 212167336, |
| 143744519, |
| 150686780, |
| 106788216, |
| 55447121, |
| 7403255 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.8.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002038389891411718, |
| "l1_avg": 0.0017289607061280144, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007532092258938817, |
| "l1_avg": 0.0006054655959208806, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.32512811734509767, |
| "l1_avg": 0.2641814959490741, |
| "l0_avg": 0.8758150538691768 |
| }, |
| "merged": { |
| "l2_avg": 0.3251259294553616, |
| "l1_avg": 0.2641846908757716, |
| "l0_avg": 0.861651968308437 |
| }, |
| "diff": { |
| "l2_avg": 0.020269479676278148, |
| "l1_avg": 0.0038632910928608458, |
| "l0_avg": 0.05012097770785108 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 53212600, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65925505, |
| 88681253, |
| 109386701, |
| 59410625, |
| 86863084, |
| 61463569, |
| 43651460, |
| 15702457, |
| 65919566, |
| 88671638, |
| 109323029, |
| 59356734, |
| 86742924, |
| 61383633, |
| 43549254, |
| 15651768 |
| ], |
| "fp4_dist_after": [ |
| 73437029, |
| 124906222, |
| 109185812, |
| 71631995, |
| 73041291, |
| 49684266, |
| 25498829, |
| 3692368, |
| 73444752, |
| 124857226, |
| 109112684, |
| 71558350, |
| 72924754, |
| 49597843, |
| 25432772, |
| 3677007 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.8.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010746242806984748, |
| "l1_avg": 0.009301769733428954, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008488715409123012, |
| "l1_avg": 0.0006911588625775443, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01871579753028022, |
| "l1_avg": 0.018708349157262733, |
| "l0_avg": 0.8782272809817467 |
| }, |
| "merged": { |
| "l2_avg": 0.01871590084499783, |
| "l1_avg": 0.018708369879075037, |
| "l0_avg": 0.8637494150797526 |
| }, |
| "diff": { |
| "l2_avg": 0.0013391941785812378, |
| "l1_avg": 0.0002860362735795386, |
| "l0_avg": 0.05137036782723886 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 109078113, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 129269819, |
| 172032754, |
| 211167121, |
| 119759389, |
| 176579158, |
| 130117824, |
| 91285916, |
| 31000497, |
| 129298281, |
| 172091556, |
| 211284824, |
| 119866806, |
| 176775177, |
| 130320571, |
| 91434230, |
| 31082477 |
| ], |
| "fp4_dist_after": [ |
| 144661088, |
| 241742972, |
| 212780974, |
| 143892326, |
| 150326053, |
| 105865183, |
| 54624192, |
| 7342058, |
| 144648826, |
| 241817515, |
| 212968877, |
| 144035765, |
| 150493127, |
| 106079200, |
| 54736479, |
| 7351765 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.9.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010720297626352874, |
| "l1_avg": 0.009270293182796902, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008420474483766488, |
| "l1_avg": 0.0006846715178754595, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018744774659474692, |
| "l1_avg": 0.018565378071349344, |
| "l0_avg": 0.8811745716613016 |
| }, |
| "merged": { |
| "l2_avg": 0.018744806448618572, |
| "l1_avg": 0.018565317789713542, |
| "l0_avg": 0.8669643063015408 |
| }, |
| "diff": { |
| "l2_avg": 0.0013312463959058126, |
| "l1_avg": 0.0002782252688466767, |
| "l0_avg": 0.05073527771749614 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 107729584, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 126131758, |
| 170355606, |
| 210872534, |
| 120449202, |
| 178091814, |
| 131536033, |
| 92284402, |
| 31415664, |
| 126178164, |
| 170453308, |
| 211005387, |
| 120580958, |
| 178303455, |
| 131747170, |
| 92466192, |
| 31494753 |
| ], |
| "fp4_dist_after": [ |
| 141217810, |
| 240152508, |
| 213212543, |
| 145455366, |
| 152127417, |
| 106981078, |
| 54722002, |
| 7272918, |
| 141265712, |
| 240276533, |
| 213373214, |
| 145645679, |
| 152351429, |
| 107175617, |
| 54838706, |
| 7297868 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.15.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010781049557613236, |
| "l1_avg": 0.009338645140329997, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007807556539773941, |
| "l1_avg": 0.0006248349091038108, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.016609328980618336, |
| "l1_avg": 0.008417503039042155, |
| "l0_avg": 0.9999932183159722 |
| }, |
| "merged": { |
| "l2_avg": 0.016608835774442526, |
| "l1_avg": 0.008417747418085734, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.784002042124393e-05, |
| "l1_avg": 3.73062522461017e-05, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474554, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9545, |
| 29314, |
| 7464, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9561, |
| 28919, |
| 7285, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 146, |
| 8075, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 137, |
| 8026, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2547, |
| 378893, |
| 251180, |
| 94893, |
| 9457, |
| 0, |
| 0, |
| 0, |
| 2491, |
| 378431, |
| 251040, |
| 95947, |
| 9681, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 2522, |
| 378000, |
| 251641, |
| 95275, |
| 9533, |
| 0, |
| 0, |
| 0, |
| 2548, |
| 377427, |
| 251488, |
| 96375, |
| 9751, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.15.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00903176604686641, |
| "l1_avg": 0.00780939357355237, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008258814968262413, |
| "l1_avg": 0.0006675987607902951, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.3481244013748864, |
| "l1_avg": 0.25645147959391273, |
| "l0_avg": 0.9999998304578993 |
| }, |
| "merged": { |
| "l2_avg": 0.34811366788920034, |
| "l1_avg": 0.25645154317220054, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.28374361962698e-05, |
| "l1_avg": 3.352399087614483e-05, |
| "l0_avg": 0.9997828165690105 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11793918, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16268, |
| 47956, |
| 1238, |
| 0, |
| 0, |
| 0, |
| 0, |
| 75, |
| 16399, |
| 47857, |
| 1215, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 620, |
| 45536, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 659, |
| 45345, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 408, |
| 96496, |
| 235600, |
| 792637, |
| 4700494, |
| 69155, |
| 2813, |
| 0, |
| 399, |
| 96581, |
| 236488, |
| 790331, |
| 4703473, |
| 68838, |
| 2767, |
| 0 |
| ], |
| "merged": [ |
| 379, |
| 96096, |
| 234956, |
| 790582, |
| 4702606, |
| 70088, |
| 2843, |
| 0, |
| 435, |
| 96176, |
| 235884, |
| 788117, |
| 4705691, |
| 69827, |
| 2800, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.15.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010814174478128104, |
| "l1_avg": 0.009357402059766982, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008208470001962987, |
| "l1_avg": 0.0006627262337133288, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0110323284074624, |
| "l1_avg": 0.007643702295091417, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "merged": { |
| "l2_avg": 0.011030626865650412, |
| "l1_avg": 0.007643884420394898, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2032222939070334e-05, |
| "l1_avg": 3.9965669727987716e-05, |
| "l0_avg": 0.9999943203396268 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796413, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9527, |
| 28741, |
| 7605, |
| 0, |
| 0, |
| 0, |
| 0, |
| 47, |
| 9626, |
| 29302, |
| 7279, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 975, |
| 64591, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 913, |
| 64593, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 11499, |
| 2387674, |
| 2750803, |
| 741423, |
| 4765, |
| 0, |
| 0, |
| 0, |
| 11319, |
| 2385860, |
| 2755880, |
| 742616, |
| 4641, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 11385, |
| 2379671, |
| 2753860, |
| 746778, |
| 4838, |
| 0, |
| 0, |
| 0, |
| 11127, |
| 2377324, |
| 2758800, |
| 747991, |
| 4706, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.15.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010789261283368452, |
| "l1_avg": 0.009316364924112955, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007767592323943973, |
| "l1_avg": 0.0006252398015931249, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.052994720861186666, |
| "l1_avg": 0.040967122713724775, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.05299125899363416, |
| "l1_avg": 0.04096713331010607, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.953583387317368e-05, |
| "l1_avg": 3.7917078265713324e-05, |
| "l0_avg": 0.9999674479166667 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474512, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9771, |
| 28747, |
| 7405, |
| 0, |
| 0, |
| 0, |
| 0, |
| 27, |
| 9762, |
| 28970, |
| 7446, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 113, |
| 8074, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 133, |
| 8064, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 206, |
| 49331, |
| 139991, |
| 386294, |
| 160322, |
| 0, |
| 0, |
| 0, |
| 185, |
| 49985, |
| 139895, |
| 387499, |
| 160852, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 194, |
| 49089, |
| 139591, |
| 385961, |
| 161310, |
| 0, |
| 0, |
| 0, |
| 197, |
| 49760, |
| 139466, |
| 387107, |
| 161885, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.16.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010780118120472295, |
| "l1_avg": 0.009321108791563245, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008109426125884056, |
| "l1_avg": 0.0006516865105368197, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.005960511689932621, |
| "l1_avg": 0.003513976600435045, |
| "l0_avg": 0.9999918619791667 |
| }, |
| "merged": { |
| "l2_avg": 0.005960453965961408, |
| "l1_avg": 0.003514283233218723, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.988887473644311e-05, |
| "l1_avg": 3.928144772847493e-05, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474557, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 46, |
| 9715, |
| 28876, |
| 7466, |
| 0, |
| 0, |
| 0, |
| 0, |
| 23, |
| 9623, |
| 28931, |
| 7480, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 124, |
| 8220, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 7920, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3032, |
| 516607, |
| 203311, |
| 14238, |
| 342, |
| 0, |
| 0, |
| 0, |
| 3081, |
| 517272, |
| 202106, |
| 14200, |
| 371, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3057, |
| 515539, |
| 204348, |
| 14362, |
| 346, |
| 0, |
| 0, |
| 0, |
| 3018, |
| 515932, |
| 203269, |
| 14313, |
| 376, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.16.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.0090279043418716, |
| "l1_avg": 0.007811993360519409, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008174613561205396, |
| "l1_avg": 0.0006614904436800215, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.2907728994602199, |
| "l1_avg": 0.21354526943630642, |
| "l0_avg": 0.9999996609157986 |
| }, |
| "merged": { |
| "l2_avg": 0.2907589494829624, |
| "l1_avg": 0.21354526943630642, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.189761235897486e-05, |
| "l1_avg": 3.254670235845778e-05, |
| "l0_avg": 0.9998148600260417 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11794296, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16247, |
| 48033, |
| 1163, |
| 0, |
| 0, |
| 0, |
| 0, |
| 88, |
| 16359, |
| 47978, |
| 1140, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 667, |
| 45466, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 697, |
| 45330, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 497, |
| 116246, |
| 281826, |
| 946267, |
| 4521942, |
| 32711, |
| 796, |
| 0, |
| 507, |
| 116017, |
| 281188, |
| 946579, |
| 4518556, |
| 32605, |
| 743, |
| 0 |
| ], |
| "merged": [ |
| 463, |
| 115795, |
| 281090, |
| 943643, |
| 4525205, |
| 33234, |
| 805, |
| 0, |
| 501, |
| 115632, |
| 280450, |
| 943803, |
| 4521956, |
| 33146, |
| 757, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.16.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010754222440161785, |
| "l1_avg": 0.009295562903086344, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008380707378942058, |
| "l1_avg": 0.0006784809520468116, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012426008882180027, |
| "l1_avg": 0.00884959962632921, |
| "l0_avg": 0.9999972025553385 |
| }, |
| "merged": { |
| "l2_avg": 0.012424376201720745, |
| "l1_avg": 0.008849745988845826, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.19389572826258e-05, |
| "l1_avg": 4.063158865190214e-05, |
| "l0_avg": 0.9999949137369791 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796420, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 39, |
| 9825, |
| 28946, |
| 7396, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9637, |
| 28986, |
| 7297, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 899, |
| 64615, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 918, |
| 64640, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 11428, |
| 2057904, |
| 2835067, |
| 987541, |
| 5696, |
| 0, |
| 0, |
| 0, |
| 11391, |
| 2055752, |
| 2836589, |
| 989255, |
| 5857, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 11387, |
| 2050620, |
| 2835680, |
| 994143, |
| 5814, |
| 0, |
| 0, |
| 0, |
| 11240, |
| 2048636, |
| 2836989, |
| 996015, |
| 5956, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.16.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010783799888867518, |
| "l1_avg": 0.009323357211218939, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008077243692241609, |
| "l1_avg": 0.0006530964747071266, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.07024229790131147, |
| "l1_avg": 0.05422021018134223, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.07023685064329327, |
| "l1_avg": 0.05422021547953288, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.036328135360163e-05, |
| "l1_avg": 3.9342237222525806e-05, |
| "l0_avg": 0.9999565972222222 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474496, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 24, |
| 9689, |
| 29038, |
| 7440, |
| 0, |
| 0, |
| 0, |
| 0, |
| 30, |
| 9684, |
| 28888, |
| 7367, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 119, |
| 8149, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 127, |
| 7989, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 137, |
| 36697, |
| 107493, |
| 340996, |
| 252477, |
| 7, |
| 0, |
| 0, |
| 181, |
| 36822, |
| 107414, |
| 339674, |
| 252658, |
| 4, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 146, |
| 36560, |
| 107116, |
| 340408, |
| 253588, |
| 7, |
| 0, |
| 0, |
| 163, |
| 36658, |
| 107096, |
| 339054, |
| 253759, |
| 5, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.9.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020345050559855563, |
| "l1_avg": 0.0017267492082383897, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007400667145908095, |
| "l1_avg": 0.0005938712507486344, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.4148553036564064, |
| "l1_avg": 0.36437964168595677, |
| "l0_avg": 0.8745363956027561 |
| }, |
| "merged": { |
| "l2_avg": 0.4149421299107254, |
| "l1_avg": 0.3643848861882716, |
| "l0_avg": 0.860901528817636 |
| }, |
| "diff": { |
| "l2_avg": 0.02772940068136224, |
| "l1_avg": 0.005290178840543017, |
| "l0_avg": 0.0479514444610219 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50909243, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66607189, |
| 90297190, |
| 109991496, |
| 59727031, |
| 86120660, |
| 60029185, |
| 42636497, |
| 15684275, |
| 66595412, |
| 90260335, |
| 109936286, |
| 59677256, |
| 85989572, |
| 59950352, |
| 42550165, |
| 15630299 |
| ], |
| "fp4_dist_after": [ |
| 73837658, |
| 125838515, |
| 109352768, |
| 71864192, |
| 72897421, |
| 49050536, |
| 24693390, |
| 3548804, |
| 73840852, |
| 125789959, |
| 109279706, |
| 71790832, |
| 72775488, |
| 48953950, |
| 24631267, |
| 3537862 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.14.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020342067030682943, |
| "l1_avg": 0.0017263652549849616, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0006835913187648649, |
| "l1_avg": 0.0005444025413857566, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.9330830025437243, |
| "l1_avg": 0.8644075520833333, |
| "l0_avg": 0.8731042970257041 |
| }, |
| "merged": { |
| "l2_avg": 0.9332293813583925, |
| "l1_avg": 0.8644338348765432, |
| "l0_avg": 0.8582823868739752 |
| }, |
| "diff": { |
| "l2_avg": 0.06390168802751904, |
| "l1_avg": 0.01347087530442226, |
| "l0_avg": 0.0520440975236304 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 55254344, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 67360310, |
| 90401748, |
| 110219947, |
| 59343140, |
| 85925499, |
| 60359081, |
| 42283133, |
| 14932280, |
| 67362726, |
| 90395052, |
| 110233833, |
| 59346468, |
| 85921188, |
| 60364266, |
| 42297586, |
| 14936943 |
| ], |
| "fp4_dist_after": [ |
| 75241485, |
| 126285790, |
| 109439730, |
| 69974127, |
| 71347410, |
| 48939414, |
| 25768279, |
| 3841336, |
| 75217724, |
| 126273802, |
| 109450853, |
| 69980775, |
| 71369026, |
| 48948179, |
| 25767998, |
| 3837272 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.14.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010751217484384155, |
| "l1_avg": 0.009306601683298747, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008445731666272964, |
| "l1_avg": 0.0006879296153783799, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018561837408277722, |
| "l1_avg": 0.016982670536747686, |
| "l0_avg": 0.8819712344511056 |
| }, |
| "merged": { |
| "l2_avg": 0.01856282154719035, |
| "l1_avg": 0.016982640395929782, |
| "l0_avg": 0.8683067806856132 |
| }, |
| "diff": { |
| "l2_avg": 0.0012622382077905868, |
| "l1_avg": 0.0002427173838203336, |
| "l0_avg": 0.04881870787820698 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 103660004, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125306520, |
| 171613091, |
| 211204028, |
| 121311484, |
| 177977949, |
| 130831830, |
| 91561109, |
| 31604574, |
| 125311795, |
| 171650705, |
| 211275525, |
| 121395091, |
| 178100111, |
| 130921446, |
| 91658733, |
| 31642409 |
| ], |
| "fp4_dist_after": [ |
| 139816223, |
| 240412978, |
| 213397694, |
| 146814059, |
| 153130714, |
| 107092617, |
| 53726776, |
| 7022226, |
| 139816734, |
| 240466115, |
| 213499197, |
| 146916221, |
| 153247704, |
| 107186603, |
| 53790683, |
| 7029856 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.15.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020353762353972913, |
| "l1_avg": 0.001727180348502265, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007042398115560536, |
| "l1_avg": 0.0005618224127425088, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.0966820679313316, |
| "l1_avg": 0.9744214771412038, |
| "l0_avg": 0.8714152253704307 |
| }, |
| "merged": { |
| "l2_avg": 1.0966710385693745, |
| "l1_avg": 0.9744592737268518, |
| "l0_avg": 0.8566949029616368 |
| }, |
| "diff": { |
| "l2_avg": 0.0715931169884903, |
| "l1_avg": 0.014769524468315972, |
| "l0_avg": 0.05173106158221209 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 54921999, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 68258298, |
| 89918373, |
| 111056508, |
| 57915237, |
| 85425974, |
| 59722299, |
| 42976867, |
| 15588521, |
| 68257997, |
| 89897166, |
| 111038858, |
| 57925363, |
| 85407605, |
| 59720191, |
| 42982014, |
| 15591929 |
| ], |
| "fp4_dist_after": [ |
| 76080984, |
| 127273097, |
| 110092815, |
| 69698273, |
| 70682613, |
| 47789672, |
| 25335209, |
| 3919669, |
| 76063630, |
| 127239443, |
| 110081876, |
| 69716131, |
| 70674099, |
| 47781610, |
| 25332304, |
| 3921775 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.15.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010794971099358985, |
| "l1_avg": 0.009353300597932603, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008491722356807143, |
| "l1_avg": 0.0006916301118002997, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018389564090304903, |
| "l1_avg": 0.01674142531406732, |
| "l0_avg": 0.8816441100320699 |
| }, |
| "merged": { |
| "l2_avg": 0.018391214476691353, |
| "l1_avg": 0.016741393289448302, |
| "l0_avg": 0.868256911760495 |
| }, |
| "diff": { |
| "l2_avg": 0.0012457914650440215, |
| "l1_avg": 0.00023464929910353672, |
| "l0_avg": 0.04782234380274643 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 101544358, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125635633, |
| 172189629, |
| 211656416, |
| 121395968, |
| 177632445, |
| 129954763, |
| 91104664, |
| 31838432, |
| 125677287, |
| 172271558, |
| 211762524, |
| 121438577, |
| 177726620, |
| 130049056, |
| 91173535, |
| 31859293 |
| ], |
| "fp4_dist_after": [ |
| 139856160, |
| 240817435, |
| 213659509, |
| 147295484, |
| 153262267, |
| 106628000, |
| 52981426, |
| 6920735, |
| 139882687, |
| 240918083, |
| 213792463, |
| 147377247, |
| 153339529, |
| 106691697, |
| 53016977, |
| 6926701 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.17.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010781254536613055, |
| "l1_avg": 0.00932963424258762, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008015065104700625, |
| "l1_avg": 0.0006461135344579816, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.030082485546881917, |
| "l1_avg": 0.012234352694617378, |
| "l0_avg": 0.9999857584635417 |
| }, |
| "merged": { |
| "l2_avg": 0.030081982916384274, |
| "l1_avg": 0.012234886487325033, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.949356861215071e-05, |
| "l1_avg": 3.851168375048373e-05, |
| "l0_avg": 0.9999918619791667 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474548, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 38, |
| 9491, |
| 28909, |
| 7456, |
| 0, |
| 0, |
| 0, |
| 0, |
| 32, |
| 9732, |
| 29044, |
| 7458, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 119, |
| 8135, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 128, |
| 8002, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4306, |
| 416520, |
| 186243, |
| 95394, |
| 35195, |
| 0, |
| 0, |
| 0, |
| 4175, |
| 416123, |
| 186018, |
| 95222, |
| 35364, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4240, |
| 415855, |
| 186590, |
| 95597, |
| 35344, |
| 0, |
| 0, |
| 0, |
| 4195, |
| 415401, |
| 186418, |
| 95409, |
| 35511, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.17.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009044566835012579, |
| "l1_avg": 0.007825983688235283, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000827565799843435, |
| "l1_avg": 0.0006702174743016561, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.5381780029981432, |
| "l1_avg": 0.3927080790201823, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.5381647105357373, |
| "l1_avg": 0.3927080790201823, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.270639918544995e-05, |
| "l1_avg": 3.361788888772329e-05, |
| "l0_avg": 0.9996735466851129 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11792629, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 58, |
| 16211, |
| 47819, |
| 1190, |
| 0, |
| 0, |
| 0, |
| 0, |
| 66, |
| 16320, |
| 48204, |
| 1204, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 670, |
| 45300, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 643, |
| 45547, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 306, |
| 71713, |
| 170323, |
| 550972, |
| 4734174, |
| 345332, |
| 24966, |
| 0, |
| 289, |
| 71458, |
| 170337, |
| 550862, |
| 4735987, |
| 345149, |
| 24612, |
| 0 |
| ], |
| "merged": [ |
| 283, |
| 71440, |
| 169931, |
| 549437, |
| 4732792, |
| 348581, |
| 25326, |
| 0, |
| 299, |
| 71185, |
| 169868, |
| 549294, |
| 4734494, |
| 348561, |
| 24989, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.17.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010812328881769577, |
| "l1_avg": 0.009353110525343154, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008217066477334067, |
| "l1_avg": 0.0006615967722609639, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01324439273906315, |
| "l1_avg": 0.00882129669189453, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "merged": { |
| "l2_avg": 0.013242600122558875, |
| "l1_avg": 0.008821503321329752, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.301265553778877e-05, |
| "l1_avg": 4.052312837706672e-05, |
| "l0_avg": 0.9999947441948784 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796418, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 26, |
| 9382, |
| 29173, |
| 7383, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9710, |
| 29033, |
| 7414, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 969, |
| 64575, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 956, |
| 64572, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 13041, |
| 2393904, |
| 2451004, |
| 1028707, |
| 13474, |
| 0, |
| 0, |
| 0, |
| 13071, |
| 2392054, |
| 2450078, |
| 1027616, |
| 13531, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 13035, |
| 2387178, |
| 2451784, |
| 1034517, |
| 13734, |
| 0, |
| 0, |
| 0, |
| 12978, |
| 2385328, |
| 2450725, |
| 1033442, |
| 13759, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.17.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010807801280490041, |
| "l1_avg": 0.009336581495073106, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007837545708753169, |
| "l1_avg": 0.0006331523763947189, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.05940683466552779, |
| "l1_avg": 0.04598369068569607, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.059403033522389376, |
| "l1_avg": 0.04598373836941189, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1365453061096615e-05, |
| "l1_avg": 3.875713381502363e-05, |
| "l0_avg": 0.9999735514322917 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474521, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 40, |
| 9689, |
| 28804, |
| 7440, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9667, |
| 29146, |
| 7339, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 119, |
| 8076, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 133, |
| 8056, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 179, |
| 42896, |
| 125131, |
| 372468, |
| 197351, |
| 0, |
| 0, |
| 0, |
| 167, |
| 43028, |
| 124970, |
| 371552, |
| 196818, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 166, |
| 42726, |
| 124757, |
| 371917, |
| 198448, |
| 0, |
| 0, |
| 0, |
| 166, |
| 42857, |
| 124578, |
| 371047, |
| 197898, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.18.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010788691897257842, |
| "l1_avg": 0.00934318568971422, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008293594000861049, |
| "l1_avg": 0.0006731941248290241, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.007072280480345502, |
| "l1_avg": 0.00353697935740153, |
| "l0_avg": 0.9999898274739584 |
| }, |
| "merged": { |
| "l2_avg": 0.007072274197464282, |
| "l1_avg": 0.0035372985733879937, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.11475555003302e-05, |
| "l1_avg": 4.0268683288660314e-05, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474557, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 37, |
| 9553, |
| 29357, |
| 7493, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9533, |
| 28834, |
| 7317, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 125, |
| 7979, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 92, |
| 8188, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3152, |
| 524365, |
| 194848, |
| 14208, |
| 1002, |
| 0, |
| 0, |
| 0, |
| 3137, |
| 524427, |
| 194342, |
| 14032, |
| 1047, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3103, |
| 523034, |
| 195967, |
| 14314, |
| 1011, |
| 0, |
| 0, |
| 0, |
| 3314, |
| 523125, |
| 195502, |
| 14132, |
| 1058, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.18.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009025835195027518, |
| "l1_avg": 0.007814658805727959, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008121130043965656, |
| "l1_avg": 0.0006571923693021139, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.361479594251785, |
| "l1_avg": 0.2647099812825521, |
| "l0_avg": 0.9999998304578993 |
| }, |
| "merged": { |
| "l2_avg": 0.36146964267565224, |
| "l1_avg": 0.2647099600897895, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.203388599105948e-05, |
| "l1_avg": 3.243654241992367e-05, |
| "l0_avg": 0.9997639973958333 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11793696, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16298, |
| 48046, |
| 939, |
| 0, |
| 0, |
| 0, |
| 0, |
| 71, |
| 16184, |
| 48563, |
| 907, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 615, |
| 45394, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 670, |
| 45481, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 390, |
| 97881, |
| 236637, |
| 781864, |
| 4688614, |
| 87762, |
| 3358, |
| 0, |
| 385, |
| 97598, |
| 236245, |
| 782360, |
| 4692084, |
| 88020, |
| 3282, |
| 0 |
| ], |
| "merged": [ |
| 380, |
| 97511, |
| 236027, |
| 779758, |
| 4690525, |
| 88891, |
| 3413, |
| 0, |
| 402, |
| 97218, |
| 235653, |
| 780200, |
| 4693984, |
| 89160, |
| 3358, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.18.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010797414107112224, |
| "l1_avg": 0.009344889058007134, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000841873832427806, |
| "l1_avg": 0.000681223114952445, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014272201740192196, |
| "l1_avg": 0.010075535376866658, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "merged": { |
| "l2_avg": 0.014269686079484525, |
| "l1_avg": 0.010075675116644965, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1928210711894386e-05, |
| "l1_avg": 4.086023869199885e-05, |
| "l0_avg": 0.9999935574001736 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796404, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 38, |
| 9570, |
| 28801, |
| 7413, |
| 0, |
| 0, |
| 0, |
| 0, |
| 42, |
| 9575, |
| 29143, |
| 7578, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 901, |
| 64499, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 923, |
| 64749, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 10608, |
| 1886063, |
| 2750879, |
| 1240891, |
| 12533, |
| 0, |
| 0, |
| 0, |
| 10547, |
| 1884531, |
| 2749336, |
| 1238256, |
| 12836, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 10544, |
| 1879586, |
| 2750261, |
| 1247868, |
| 12731, |
| 0, |
| 0, |
| 0, |
| 10346, |
| 1878072, |
| 2748541, |
| 1245464, |
| 13067, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.18.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010781033850410186, |
| "l1_avg": 0.009321710798475477, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.00081983907148242, |
| "l1_avg": 0.0006643868982791901, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.07312905563841478, |
| "l1_avg": 0.05378440221150716, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.07312435604326183, |
| "l1_avg": 0.05378443929884169, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1688950884252e-05, |
| "l1_avg": 4.0249060839414595e-05, |
| "l0_avg": 0.9999559190538194 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474495, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 51, |
| 9559, |
| 29133, |
| 7398, |
| 0, |
| 0, |
| 0, |
| 0, |
| 31, |
| 9711, |
| 28955, |
| 7322, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 117, |
| 8045, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 106, |
| 8116, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 133, |
| 36863, |
| 107838, |
| 345870, |
| 246860, |
| 113, |
| 10, |
| 0, |
| 155, |
| 36876, |
| 107874, |
| 345474, |
| 246374, |
| 113, |
| 7, |
| 0 |
| ], |
| "merged": [ |
| 141, |
| 36707, |
| 107504, |
| 345243, |
| 247985, |
| 113, |
| 10, |
| 0, |
| 137, |
| 36742, |
| 107493, |
| 344844, |
| 247521, |
| 113, |
| 7, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.16.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020295996564389704, |
| "l1_avg": 0.0017235984404881796, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000659751416823711, |
| "l1_avg": 0.0005228545102808211, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.3198592889810008, |
| "l1_avg": 1.0984167631172839, |
| "l0_avg": 0.8711057338008175 |
| }, |
| "merged": { |
| "l2_avg": 1.3196884537550368, |
| "l1_avg": 1.0984665557484568, |
| "l0_avg": 0.8571233386757933 |
| }, |
| "diff": { |
| "l2_avg": 0.08011782473519402, |
| "l1_avg": 0.015762171804169077, |
| "l0_avg": 0.049155537169656635 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 52187608, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 68420399, |
| 89990065, |
| 111405288, |
| 57619710, |
| 85013275, |
| 59052316, |
| 43114652, |
| 16204592, |
| 68424478, |
| 89983239, |
| 111408582, |
| 57629111, |
| 85012867, |
| 59065512, |
| 43125672, |
| 16213442 |
| ], |
| "fp4_dist_after": [ |
| 75845032, |
| 127548840, |
| 110329097, |
| 70534357, |
| 70965847, |
| 47293305, |
| 24480939, |
| 3821395, |
| 75844719, |
| 127561810, |
| 110322145, |
| 70543267, |
| 70970411, |
| 47306331, |
| 24491966, |
| 3823739 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.16.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010712245290754369, |
| "l1_avg": 0.009262936645083958, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008458043168364604, |
| "l1_avg": 0.0006850313809182908, |
| "l0_avg": 0.984375 |
| }, |
| "original": { |
| "l2_avg": 0.01815070178773668, |
| "l1_avg": 0.01661700025016879, |
| "l0_avg": 0.8815845654334551 |
| }, |
| "merged": { |
| "l2_avg": 0.01815212302737766, |
| "l1_avg": 0.01661699836636767, |
| "l0_avg": 0.8683593778257017 |
| }, |
| "diff": { |
| "l2_avg": 0.0012279050217734442, |
| "l1_avg": 0.00023090762856565875, |
| "l0_avg": 0.04724370038067853 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 100315686, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125730596, |
| 171992278, |
| 211473485, |
| 121331114, |
| 177709804, |
| 130184270, |
| 91234280, |
| 31794566, |
| 125708759, |
| 172029160, |
| 211547056, |
| 121373456, |
| 177827209, |
| 130278634, |
| 91306109, |
| 31845624 |
| ], |
| "fp4_dist_after": [ |
| 141665625, |
| 239545996, |
| 213517519, |
| 146798636, |
| 153641965, |
| 107090361, |
| 53740133, |
| 7343460, |
| 137855649, |
| 239606022, |
| 213616701, |
| 146847108, |
| 153737373, |
| 107214939, |
| 53792572, |
| 7352341 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.17.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020334435360202774, |
| "l1_avg": 0.0017258319589826797, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007654136735797253, |
| "l1_avg": 0.0006141725099749035, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.4744728410781853, |
| "l1_avg": 1.234739703896605, |
| "l0_avg": 0.8705659126941069 |
| }, |
| "merged": { |
| "l2_avg": 1.474410980743731, |
| "l1_avg": 1.2347837094907408, |
| "l0_avg": 0.8569933752366055 |
| }, |
| "diff": { |
| "l2_avg": 0.09136422235688337, |
| "l1_avg": 0.017614512502411264, |
| "l0_avg": 0.047382826628508394 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50305551, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 68719342, |
| 91680849, |
| 111432087, |
| 58491550, |
| 84470320, |
| 58058938, |
| 42054794, |
| 15933286, |
| 68698654, |
| 91688818, |
| 111457007, |
| 58487814, |
| 84477738, |
| 58043059, |
| 42057664, |
| 15931280 |
| ], |
| "fp4_dist_after": [ |
| 75909353, |
| 128010369, |
| 110030724, |
| 70901430, |
| 71153892, |
| 47201423, |
| 23972664, |
| 3645896, |
| 75918378, |
| 128028839, |
| 110043926, |
| 70895428, |
| 71162038, |
| 47194441, |
| 23972482, |
| 3641917 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.17.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010775115705773539, |
| "l1_avg": 0.009321137269337972, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008544744965807348, |
| "l1_avg": 0.0006991783777872721, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "original": { |
| "l2_avg": 0.019258979956309, |
| "l1_avg": 0.01748260498046875, |
| "l0_avg": 0.8783673764452522 |
| }, |
| "merged": { |
| "l2_avg": 0.019262904591030545, |
| "l1_avg": 0.017482627586082176, |
| "l0_avg": 0.8647023533008716 |
| }, |
| "diff": { |
| "l2_avg": 0.0013654618627495236, |
| "l1_avg": 0.00025507093947610735, |
| "l0_avg": 0.04844469235267168 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 102865832, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 129145097, |
| 174731831, |
| 212797318, |
| 120437417, |
| 175655317, |
| 127677667, |
| 89512625, |
| 31400883, |
| 129125529, |
| 174769810, |
| 212892374, |
| 120512799, |
| 175778616, |
| 127814407, |
| 89665837, |
| 31448873 |
| ], |
| "fp4_dist_after": [ |
| 143639987, |
| 243501426, |
| 213941359, |
| 145489939, |
| 150807107, |
| 104625741, |
| 52351790, |
| 6992314, |
| 143646490, |
| 243566769, |
| 214034833, |
| 145593481, |
| 150966056, |
| 104763414, |
| 52444755, |
| 7000939 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.18.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010756706400594887, |
| "l1_avg": 0.009315861596001519, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008594910827252599, |
| "l1_avg": 0.000702807969517178, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "original": { |
| "l2_avg": 0.019086371527777778, |
| "l1_avg": 0.01711915381160783, |
| "l0_avg": 0.8772385142761984 |
| }, |
| "merged": { |
| "l2_avg": 0.01909152931637234, |
| "l1_avg": 0.01711909352997203, |
| "l0_avg": 0.8637074482293776 |
| }, |
| "diff": { |
| "l2_avg": 0.0013843056228425767, |
| "l1_avg": 0.00025027781356999905, |
| "l0_avg": 0.047845385516131364 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 101593284, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 130335980, |
| 175821079, |
| 213378146, |
| 120116975, |
| 174846146, |
| 126491522, |
| 88880063, |
| 31511443, |
| 130331634, |
| 175850136, |
| 213466302, |
| 120201354, |
| 174964504, |
| 126631423, |
| 88994147, |
| 31545546 |
| ], |
| "fp4_dist_after": [ |
| 144681951, |
| 244524862, |
| 214137973, |
| 145341273, |
| 150291586, |
| 103779213, |
| 51664333, |
| 6942806, |
| 144717074, |
| 244573429, |
| 214252526, |
| 145440297, |
| 150422525, |
| 103921373, |
| 51733299, |
| 6941880 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.0.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010798477484758796, |
| "l1_avg": 0.009353333049350314, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008446850115433335, |
| "l1_avg": 0.000685103761497885, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.059868456797442786, |
| "l1_avg": 0.037969263394673665, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.05986558552072501, |
| "l1_avg": 0.03796927928924561, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.221639385421339e-05, |
| "l1_avg": 4.114976101037529e-05, |
| "l0_avg": 0.9999796549479166 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474530, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9514, |
| 29022, |
| 7496, |
| 0, |
| 0, |
| 0, |
| 0, |
| 53, |
| 9484, |
| 29118, |
| 7442, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 110, |
| 8080, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 102, |
| 8092, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 323, |
| 83050, |
| 201999, |
| 316920, |
| 134946, |
| 7, |
| 0, |
| 0, |
| 332, |
| 83148, |
| 201113, |
| 317945, |
| 134768, |
| 9, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 319, |
| 82673, |
| 201545, |
| 317131, |
| 135550, |
| 7, |
| 0, |
| 0, |
| 300, |
| 82852, |
| 200611, |
| 318183, |
| 135380, |
| 9, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.0.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00903690137693966, |
| "l1_avg": 0.00781862810254097, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008328965300489859, |
| "l1_avg": 0.0006743582172526254, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.009141611686925394, |
| "l1_avg": 0.005614105198118422, |
| "l0_avg": 0.9999962700737848 |
| }, |
| "merged": { |
| "l2_avg": 0.009140441043929418, |
| "l1_avg": 0.00561422242058648, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.286590571469512e-05, |
| "l1_avg": 3.393135137028164e-05, |
| "l0_avg": 0.9999964396158855 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796438, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 57, |
| 16072, |
| 47879, |
| 1069, |
| 0, |
| 0, |
| 0, |
| 0, |
| 66, |
| 16516, |
| 48292, |
| 1121, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 636, |
| 45595, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 652, |
| 45277, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 13215, |
| 2960866, |
| 2621722, |
| 296628, |
| 8369, |
| 5, |
| 0, |
| 0, |
| 13105, |
| 2955695, |
| 2621019, |
| 297443, |
| 8408, |
| 5, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 13147, |
| 2950970, |
| 2629087, |
| 299133, |
| 8457, |
| 5, |
| 0, |
| 0, |
| 13271, |
| 2945341, |
| 2628539, |
| 300044, |
| 8481, |
| 5, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.0.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010821576497566024, |
| "l1_avg": 0.009367282523049249, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008571080248490337, |
| "l1_avg": 0.0006965264910832047, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.034717892611691836, |
| "l1_avg": 0.02165618207719591, |
| "l0_avg": 0.9999969482421875 |
| }, |
| "merged": { |
| "l2_avg": 0.034712899052953786, |
| "l1_avg": 0.021656303935580785, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3364710938907475e-05, |
| "l1_avg": 4.182119543353716e-05, |
| "l0_avg": 0.9999865214029948 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796321, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 34, |
| 9577, |
| 28882, |
| 7537, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9524, |
| 29010, |
| 7551, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 871, |
| 64593, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 900, |
| 64708, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7134, |
| 1342605, |
| 2070068, |
| 2029911, |
| 448233, |
| 0, |
| 0, |
| 0, |
| 7071, |
| 1342191, |
| 2070960, |
| 2028873, |
| 449434, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6963, |
| 1338021, |
| 2068240, |
| 2033713, |
| 451151, |
| 0, |
| 0, |
| 0, |
| 6914, |
| 1337339, |
| 2069299, |
| 2032381, |
| 452459, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.0.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01078317317146577, |
| "l1_avg": 0.009338992171817356, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008247842197306454, |
| "l1_avg": 0.0006674939068034291, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.05638893434870864, |
| "l1_avg": 0.03865005175272624, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.056385899717079135, |
| "l1_avg": 0.038650112681918675, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.0548770536822374e-05, |
| "l1_avg": 3.948757787131601e-05, |
| "l0_avg": 0.9999708387586805 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474517, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9548, |
| 28960, |
| 7354, |
| 0, |
| 0, |
| 0, |
| 0, |
| 37, |
| 9573, |
| 29063, |
| 7590, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 117, |
| 8110, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 8037, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 284, |
| 69216, |
| 186648, |
| 339684, |
| 142623, |
| 0, |
| 0, |
| 0, |
| 268, |
| 69231, |
| 185226, |
| 339473, |
| 141907, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 287, |
| 68867, |
| 186205, |
| 339763, |
| 143321, |
| 0, |
| 0, |
| 0, |
| 282, |
| 68922, |
| 184728, |
| 339604, |
| 142581, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.1.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01076959115298732, |
| "l1_avg": 0.009315343697865804, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008497001836076379, |
| "l1_avg": 0.0006915585254319012, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.03044991472353893, |
| "l1_avg": 0.01929946608013577, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.030448488509501874, |
| "l1_avg": 0.019299515088399253, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.228140694934287e-05, |
| "l1_avg": 4.098148395617803e-05, |
| "l0_avg": 0.9999857584635417 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474539, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 50, |
| 9655, |
| 29102, |
| 7362, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9708, |
| 28817, |
| 7427, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 104, |
| 8162, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 109, |
| 8009, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 596, |
| 142274, |
| 286225, |
| 275904, |
| 32022, |
| 10, |
| 0, |
| 0, |
| 577, |
| 142552, |
| 286365, |
| 276159, |
| 31873, |
| 3, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 572, |
| 141723, |
| 285812, |
| 276589, |
| 32316, |
| 11, |
| 0, |
| 0, |
| 579, |
| 141979, |
| 285994, |
| 276839, |
| 32143, |
| 3, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.1.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009022557619011287, |
| "l1_avg": 0.007807549089193344, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008140446467768076, |
| "l1_avg": 0.0006586556633313497, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01851023148704291, |
| "l1_avg": 0.01091019180085924, |
| "l0_avg": 0.9999983045789931 |
| }, |
| "merged": { |
| "l2_avg": 0.018508231175813548, |
| "l1_avg": 0.010910280545552571, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.186353744313938e-05, |
| "l1_avg": 3.289390200128158e-05, |
| "l0_avg": 0.9999905056423611 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796368, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 68, |
| 16272, |
| 48095, |
| 971, |
| 0, |
| 0, |
| 0, |
| 0, |
| 67, |
| 16428, |
| 48170, |
| 1001, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 689, |
| 45246, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 652, |
| 45573, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7342, |
| 1782836, |
| 2919966, |
| 1115463, |
| 69215, |
| 9, |
| 0, |
| 0, |
| 7292, |
| 1786070, |
| 2921362, |
| 1117816, |
| 69099, |
| 10, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7339, |
| 1775789, |
| 2920455, |
| 1121574, |
| 69747, |
| 9, |
| 0, |
| 0, |
| 7290, |
| 1778842, |
| 2921817, |
| 1124045, |
| 69563, |
| 10, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.1.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010765749956481127, |
| "l1_avg": 0.009311509132385255, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008839111155961843, |
| "l1_avg": 0.000722558528650552, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01937174032139069, |
| "l1_avg": 0.013153917259640165, |
| "l0_avg": 0.9999970330132378 |
| }, |
| "merged": { |
| "l2_avg": 0.019368510501815456, |
| "l1_avg": 0.01315411196814643, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.586461839735528e-05, |
| "l1_avg": 4.227905948128965e-05, |
| "l0_avg": 0.9999905904134114 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796369, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 37, |
| 9755, |
| 29072, |
| 7391, |
| 0, |
| 0, |
| 0, |
| 0, |
| 25, |
| 9689, |
| 28823, |
| 7368, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 821, |
| 65251, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 810, |
| 64190, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8421, |
| 1632174, |
| 2533661, |
| 1646550, |
| 75263, |
| 0, |
| 0, |
| 0, |
| 8200, |
| 1632601, |
| 2533152, |
| 1650806, |
| 75652, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7821, |
| 1626545, |
| 2532398, |
| 1652824, |
| 76210, |
| 0, |
| 0, |
| 0, |
| 7896, |
| 1627075, |
| 2532017, |
| 1657058, |
| 76636, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.1.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010780751120755262, |
| "l1_avg": 0.00933414101600647, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008545542368665338, |
| "l1_avg": 0.0006911020027473569, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.03547329816018445, |
| "l1_avg": 0.026410606172349717, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.03547099548421713, |
| "l1_avg": 0.026410653856065537, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.266296313533921e-05, |
| "l1_avg": 3.9556312064329786e-05, |
| "l0_avg": 0.9999837239583333 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474536, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 43, |
| 9471, |
| 29022, |
| 7499, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9627, |
| 29137, |
| 7316, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 113, |
| 8048, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 117, |
| 8106, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 353, |
| 84628, |
| 221687, |
| 371164, |
| 60560, |
| 0, |
| 0, |
| 0, |
| 327, |
| 83788, |
| 221694, |
| 369593, |
| 60766, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 362, |
| 84218, |
| 221138, |
| 371539, |
| 61100, |
| 0, |
| 0, |
| 0, |
| 344, |
| 83431, |
| 221144, |
| 369978, |
| 61306, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.19.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010771741469085041, |
| "l1_avg": 0.009318746460808647, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008334285812452435, |
| "l1_avg": 0.0006744927377440035, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.047589872291237505, |
| "l1_avg": 0.019360958205329046, |
| "l0_avg": 0.9999891493055556 |
| }, |
| "merged": { |
| "l2_avg": 0.04758900525362907, |
| "l1_avg": 0.019361480077107748, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.107678412001961e-05, |
| "l1_avg": 4.0284583034614724e-05, |
| "l0_avg": 0.9999864366319444 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474540, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 38, |
| 9583, |
| 28813, |
| 7395, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9641, |
| 29237, |
| 7415, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 119, |
| 8023, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 108, |
| 8134, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4086, |
| 369143, |
| 170869, |
| 129671, |
| 63607, |
| 37, |
| 0, |
| 0, |
| 4037, |
| 369107, |
| 170832, |
| 129681, |
| 63448, |
| 42, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3751, |
| 368796, |
| 171085, |
| 129829, |
| 63840, |
| 37, |
| 0, |
| 0, |
| 3839, |
| 368794, |
| 170997, |
| 129851, |
| 63699, |
| 42, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.19.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009030801279162216, |
| "l1_avg": 0.007816169410943985, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008281975239161541, |
| "l1_avg": 0.00067057932416598, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7311691677304214, |
| "l1_avg": 0.5213995191786024, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.7311629835366817, |
| "l1_avg": 0.5213994344075521, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.2464876841466547e-05, |
| "l1_avg": 3.348399833258655e-05, |
| "l0_avg": 0.9995762295193142 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11791481, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 58, |
| 16337, |
| 48220, |
| 1147, |
| 0, |
| 0, |
| 0, |
| 0, |
| 60, |
| 16334, |
| 47854, |
| 1062, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 615, |
| 45317, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 652, |
| 45576, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 187, |
| 48075, |
| 130528, |
| 436156, |
| 4502758, |
| 685366, |
| 94651, |
| 0, |
| 184, |
| 48004, |
| 130417, |
| 437192, |
| 4501584, |
| 686436, |
| 94942, |
| 0 |
| ], |
| "merged": [ |
| 187, |
| 47885, |
| 130178, |
| 434859, |
| 4498588, |
| 690182, |
| 95839, |
| 0, |
| 196, |
| 47794, |
| 130033, |
| 436027, |
| 4497338, |
| 691245, |
| 96129, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.19.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010805943118369075, |
| "l1_avg": 0.009346950716442532, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008489577133821126, |
| "l1_avg": 0.0006884735776111484, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.011698729705591885, |
| "l1_avg": 0.007001764244503445, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "merged": { |
| "l2_avg": 0.01169727362118228, |
| "l1_avg": 0.0070021079646216495, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.377887258145538e-05, |
| "l1_avg": 4.1669896907276575e-05, |
| "l0_avg": 0.9999945746527777 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796416, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9625, |
| 28769, |
| 7490, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9663, |
| 29138, |
| 7401, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 865, |
| 64573, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 922, |
| 64712, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 22158, |
| 3078070, |
| 2042020, |
| 740699, |
| 12011, |
| 1, |
| 0, |
| 0, |
| 22114, |
| 3077252, |
| 2050319, |
| 740043, |
| 11790, |
| 3, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 22221, |
| 3070856, |
| 2044654, |
| 744949, |
| 12173, |
| 1, |
| 0, |
| 0, |
| 22390, |
| 3070168, |
| 2052765, |
| 744339, |
| 11961, |
| 3, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.19.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010792192247457822, |
| "l1_avg": 0.009332721100913153, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008080072002485394, |
| "l1_avg": 0.0006506533827632666, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.08165449193585121, |
| "l1_avg": 0.05492275026109483, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.08164868027072222, |
| "l1_avg": 0.05492277675204807, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.056056934598942e-05, |
| "l1_avg": 3.880917922490173e-05, |
| "l0_avg": 0.9999525282118056 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474490, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 30, |
| 9736, |
| 28784, |
| 7382, |
| 0, |
| 0, |
| 0, |
| 0, |
| 40, |
| 9540, |
| 29241, |
| 7407, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 112, |
| 8063, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 122, |
| 8087, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 161, |
| 38664, |
| 112157, |
| 344442, |
| 242247, |
| 205, |
| 50, |
| 0, |
| 143, |
| 38823, |
| 111909, |
| 345011, |
| 240504, |
| 193, |
| 51, |
| 0 |
| ], |
| "merged": [ |
| 155, |
| 38460, |
| 111797, |
| 343970, |
| 243264, |
| 206, |
| 50, |
| 0, |
| 137, |
| 38723, |
| 111554, |
| 344399, |
| 241600, |
| 192, |
| 53, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.2.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010807868821463163, |
| "l1_avg": 0.009362379047605727, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000862315995618701, |
| "l1_avg": 0.0006977969314903021, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.020890688437917828, |
| "l1_avg": 0.015108088652292887, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.02088960307018698, |
| "l1_avg": 0.015108156204223632, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.30222592695113e-05, |
| "l1_avg": 4.152200288242764e-05, |
| "l0_avg": 0.9999857584635417 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474539, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 29, |
| 9425, |
| 28849, |
| 7490, |
| 0, |
| 0, |
| 0, |
| 0, |
| 42, |
| 9632, |
| 29197, |
| 7496, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 93, |
| 8143, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 108, |
| 8040, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 595, |
| 148291, |
| 321288, |
| 258567, |
| 9109, |
| 0, |
| 0, |
| 0, |
| 591, |
| 147147, |
| 321436, |
| 258426, |
| 9110, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 572, |
| 147624, |
| 320949, |
| 259468, |
| 9225, |
| 0, |
| 0, |
| 0, |
| 543, |
| 146520, |
| 321045, |
| 259388, |
| 9226, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.2.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009052571443507108, |
| "l1_avg": 0.007835904136300087, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008091770336362221, |
| "l1_avg": 0.0006512484616703457, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.023347590463821993, |
| "l1_avg": 0.01422597434785631, |
| "l0_avg": 0.9999983045789931 |
| }, |
| "merged": { |
| "l2_avg": 0.023345855602000632, |
| "l1_avg": 0.014226052496168349, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.201076589601399e-05, |
| "l1_avg": 3.29847266483638e-05, |
| "l0_avg": 0.9999884711371527 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796344, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 62, |
| 16290, |
| 48420, |
| 913, |
| 0, |
| 0, |
| 0, |
| 0, |
| 79, |
| 16336, |
| 48010, |
| 962, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 700, |
| 45278, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 703, |
| 45479, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5129, |
| 1295939, |
| 2795480, |
| 1697419, |
| 105329, |
| 38, |
| 2, |
| 0, |
| 5194, |
| 1296784, |
| 2793843, |
| 1695274, |
| 106024, |
| 21, |
| 4, |
| 0 |
| ], |
| "merged": [ |
| 5238, |
| 1290443, |
| 2792705, |
| 1704793, |
| 106142, |
| 38, |
| 2, |
| 0, |
| 5152, |
| 1291185, |
| 2791308, |
| 1702605, |
| 106844, |
| 21, |
| 4, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.2.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010786100994114532, |
| "l1_avg": 0.00933652851316664, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008615142638338825, |
| "l1_avg": 0.0006994551513344049, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.017610372432573646, |
| "l1_avg": 0.012524637911054824, |
| "l0_avg": 0.9999979654947917 |
| }, |
| "merged": { |
| "l2_avg": 0.017606794974233952, |
| "l1_avg": 0.012524739901224772, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.355273471054973e-05, |
| "l1_avg": 4.136233797503842e-05, |
| "l0_avg": 0.9999916076660156 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796381, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 30, |
| 9647, |
| 29131, |
| 7284, |
| 0, |
| 0, |
| 0, |
| 0, |
| 31, |
| 9450, |
| 29169, |
| 7418, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 868, |
| 64836, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 907, |
| 64461, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6894, |
| 1566063, |
| 2628654, |
| 1662959, |
| 36419, |
| 0, |
| 0, |
| 0, |
| 6984, |
| 1566508, |
| 2625887, |
| 1659589, |
| 36523, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7088, |
| 1559741, |
| 2626954, |
| 1670139, |
| 37005, |
| 0, |
| 0, |
| 0, |
| 7034, |
| 1560659, |
| 2623747, |
| 1667021, |
| 37092, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.2.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010801776782759719, |
| "l1_avg": 0.009354958269331191, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008587405900470912, |
| "l1_avg": 0.0006984120118431747, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.040144252799086934, |
| "l1_avg": 0.03073741594950358, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.04014148833134991, |
| "l1_avg": 0.030737471580505372, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3371517519233415e-05, |
| "l1_avg": 4.1608077784379326e-05, |
| "l0_avg": 0.9999776204427083 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474527, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 39, |
| 9561, |
| 29098, |
| 7329, |
| 0, |
| 0, |
| 0, |
| 0, |
| 46, |
| 9441, |
| 29115, |
| 7531, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 101, |
| 8104, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 98, |
| 8081, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 263, |
| 68588, |
| 187304, |
| 395072, |
| 85581, |
| 0, |
| 0, |
| 0, |
| 281, |
| 68581, |
| 187478, |
| 395537, |
| 85875, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 262, |
| 68330, |
| 186730, |
| 395143, |
| 86355, |
| 0, |
| 0, |
| 0, |
| 251, |
| 68262, |
| 186987, |
| 395601, |
| 86639, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.0.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020434163536590263, |
| "l1_avg": 0.0017323258850309583, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007847534618466842, |
| "l1_avg": 0.0006307920647992028, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.02562208822690981, |
| "l1_avg": 0.01792925610954379, |
| "l0_avg": 0.8817025851025994 |
| }, |
| "merged": { |
| "l2_avg": 0.025623601767069668, |
| "l1_avg": 0.017929197711709106, |
| "l0_avg": 0.8683371979513286 |
| }, |
| "diff": { |
| "l2_avg": 0.0014872800975948542, |
| "l1_avg": 0.0002452447090619876, |
| "l0_avg": 0.04798251493477527 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50942230, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 62803656, |
| 85527349, |
| 107102535, |
| 60101440, |
| 88946557, |
| 64407995, |
| 45759626, |
| 16371796, |
| 62790722, |
| 85509837, |
| 107030932, |
| 60048466, |
| 88899364, |
| 64338850, |
| 45697037, |
| 16347038 |
| ], |
| "fp4_dist_after": [ |
| 69902776, |
| 121217066, |
| 108097698, |
| 73838855, |
| 76226563, |
| 52212063, |
| 25984885, |
| 3545865, |
| 69881409, |
| 121188151, |
| 108022973, |
| 73761002, |
| 76163216, |
| 52147888, |
| 25951364, |
| 3541426 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.0.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010735510431965663, |
| "l1_avg": 0.00929771794213189, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008559440035962021, |
| "l1_avg": 0.0006972139080365498, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.024459375275505915, |
| "l1_avg": 0.022107131392867477, |
| "l0_avg": 0.7638229308893651 |
| }, |
| "merged": { |
| "l2_avg": 0.024462191263834636, |
| "l1_avg": 0.022107153998480903, |
| "l0_avg": 0.7490986463758681 |
| }, |
| "diff": { |
| "l2_avg": 0.0016972454057799446, |
| "l1_avg": 0.00034829157370108146, |
| "l0_avg": 0.046622724650818625 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 98997127, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 250577062, |
| 181662495, |
| 195614547, |
| 93108623, |
| 137137778, |
| 96395041, |
| 76011042, |
| 31030367, |
| 250913391, |
| 181781448, |
| 195652756, |
| 93087531, |
| 137062370, |
| 96352623, |
| 75977201, |
| 31002125 |
| ], |
| "fp4_dist_after": [ |
| 266371084, |
| 247404620, |
| 185170421, |
| 113537281, |
| 115783205, |
| 81303367, |
| 44676309, |
| 7459471, |
| 266384420, |
| 247529138, |
| 185164900, |
| 113498264, |
| 115722811, |
| 81264596, |
| 44648261, |
| 7448252 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.18.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020337355026024094, |
| "l1_avg": 0.0017263147566053602, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007730728493826013, |
| "l1_avg": 0.0006208479404449463, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.6197555529597611, |
| "l1_avg": 1.3785572193287037, |
| "l0_avg": 0.8703648687292028 |
| }, |
| "merged": { |
| "l2_avg": 1.6197987113326364, |
| "l1_avg": 1.3786207561728394, |
| "l0_avg": 0.8566580077748239 |
| }, |
| "diff": { |
| "l2_avg": 0.10484845653988022, |
| "l1_avg": 0.020308334915726273, |
| "l0_avg": 0.04766754527150849 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 50607832, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 68817216, |
| 92768879, |
| 111281098, |
| 59354446, |
| 84297054, |
| 57864172, |
| 41148504, |
| 15403846, |
| 68814225, |
| 92774908, |
| 111237382, |
| 59325838, |
| 84254229, |
| 57831750, |
| 41117489, |
| 15392164 |
| ], |
| "fp4_dist_after": [ |
| 76083938, |
| 128045280, |
| 109755004, |
| 70733720, |
| 71067542, |
| 47547063, |
| 24092076, |
| 3602854, |
| 76099847, |
| 128036885, |
| 109702831, |
| 70701751, |
| 71023133, |
| 47518089, |
| 24071957, |
| 3601230 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.19.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020343403996059043, |
| "l1_avg": 0.001726949049366845, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008067386867025474, |
| "l1_avg": 0.0006500302917427487, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 2.1149439337360763, |
| "l1_avg": 1.8078583140432098, |
| "l0_avg": 0.8678013422459732 |
| }, |
| "merged": { |
| "l2_avg": 2.1152376504403656, |
| "l1_avg": 1.8079705584490742, |
| "l0_avg": 0.8531920746226369 |
| }, |
| "diff": { |
| "l2_avg": 0.13997731904043598, |
| "l1_avg": 0.028094525749300735, |
| "l0_avg": 0.050642707730516975 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 53766512, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 70175799, |
| 93286448, |
| 111885747, |
| 58693001, |
| 83760075, |
| 57667424, |
| 40637343, |
| 14862850, |
| 70177295, |
| 93265899, |
| 111839588, |
| 58648589, |
| 83730406, |
| 57622631, |
| 40585390, |
| 14844715 |
| ], |
| "fp4_dist_after": [ |
| 77929117, |
| 129034136, |
| 109976540, |
| 68950587, |
| 69441585, |
| 47079330, |
| 24716247, |
| 3839629, |
| 77934391, |
| 129003357, |
| 109931760, |
| 68896437, |
| 69398087, |
| 47035526, |
| 24681727, |
| 3834744 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.19.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010753939729149939, |
| "l1_avg": 0.009303849273257786, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008609100321809058, |
| "l1_avg": 0.0007055915892124176, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019795747598012288, |
| "l1_avg": 0.01771171287254051, |
| "l0_avg": 0.8752191218623409 |
| }, |
| "merged": { |
| "l2_avg": 0.01980352799097697, |
| "l1_avg": 0.017711733594352817, |
| "l0_avg": 0.8614886625313465 |
| }, |
| "diff": { |
| "l2_avg": 0.0014699013696776496, |
| "l1_avg": 0.0002659203682416751, |
| "l0_avg": 0.048263445253725404 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 102480978, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 132464471, |
| 177746538, |
| 213528794, |
| 119948282, |
| 173575667, |
| 125280163, |
| 87577241, |
| 30902589, |
| 132491053, |
| 177848387, |
| 213689448, |
| 120119886, |
| 173828897, |
| 125524217, |
| 87833441, |
| 31007326 |
| ], |
| "fp4_dist_after": [ |
| 147053610, |
| 245780799, |
| 213762671, |
| 144099112, |
| 148873534, |
| 103020221, |
| 51506316, |
| 6944096, |
| 147056710, |
| 245934201, |
| 213957736, |
| 144319539, |
| 149147096, |
| 103260461, |
| 51677967, |
| 6972331 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.10.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010779273072948136, |
| "l1_avg": 0.009317547082901001, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008070803014561534, |
| "l1_avg": 0.0006529029924422503, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012064083014536094, |
| "l1_avg": 0.008005122343699137, |
| "l0_avg": 0.9999952528211805 |
| }, |
| "merged": { |
| "l2_avg": 0.012063501848023198, |
| "l1_avg": 0.008005173338784113, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.981275309009318e-05, |
| "l1_avg": 3.933426406648424e-05, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474554, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 24, |
| 9621, |
| 29104, |
| 7426, |
| 0, |
| 0, |
| 0, |
| 0, |
| 44, |
| 9861, |
| 28592, |
| 7488, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 106, |
| 8023, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 124, |
| 8131, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1212, |
| 278210, |
| 365599, |
| 90652, |
| 2017, |
| 0, |
| 0, |
| 0, |
| 1214, |
| 278001, |
| 364742, |
| 90883, |
| 2030, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1212, |
| 277131, |
| 365974, |
| 91355, |
| 2026, |
| 0, |
| 0, |
| 0, |
| 1258, |
| 276950, |
| 364976, |
| 91635, |
| 2043, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.10.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00904432449063364, |
| "l1_avg": 0.007823658175766468, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008342810218279381, |
| "l1_avg": 0.0006761181271738476, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.12095067440817482, |
| "l1_avg": 0.08636985884772407, |
| "l0_avg": 0.9999997456868489 |
| }, |
| "merged": { |
| "l2_avg": 0.12094122929618452, |
| "l1_avg": 0.086369874742296, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3402644209432986e-05, |
| "l1_avg": 3.4016924392845894e-05, |
| "l0_avg": 0.999927266438802 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795622, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 54, |
| 16255, |
| 47906, |
| 1233, |
| 0, |
| 0, |
| 0, |
| 0, |
| 66, |
| 16209, |
| 48124, |
| 1225, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 668, |
| 45553, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 641, |
| 45298, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 838, |
| 215640, |
| 592731, |
| 2007095, |
| 3082792, |
| 1515, |
| 374, |
| 0, |
| 826, |
| 215191, |
| 593440, |
| 2005673, |
| 3078432, |
| 1537, |
| 396, |
| 0 |
| ], |
| "merged": [ |
| 898, |
| 214707, |
| 590985, |
| 2002282, |
| 3090204, |
| 1528, |
| 376, |
| 0, |
| 856, |
| 214219, |
| 591704, |
| 2000858, |
| 3085919, |
| 1546, |
| 398, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.10.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010758329873759697, |
| "l1_avg": 0.009296438429090711, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008466332982231241, |
| "l1_avg": 0.0006857975386083126, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.013016171777529844, |
| "l1_avg": 0.009479468398623996, |
| "l0_avg": 0.9999971177842882 |
| }, |
| "merged": { |
| "l2_avg": 0.01301415702762975, |
| "l1_avg": 0.009479599528842503, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2514200345486903e-05, |
| "l1_avg": 4.087995168649488e-05, |
| "l0_avg": 0.9999943203396268 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796413, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9684, |
| 29003, |
| 7391, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9810, |
| 28846, |
| 7356, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 888, |
| 64605, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 895, |
| 64684, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7979, |
| 1860782, |
| 2917748, |
| 1109205, |
| 4978, |
| 0, |
| 0, |
| 0, |
| 7976, |
| 1860701, |
| 2913019, |
| 1108987, |
| 5105, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8021, |
| 1853508, |
| 2917625, |
| 1116408, |
| 5083, |
| 0, |
| 0, |
| 0, |
| 8107, |
| 1853602, |
| 2912957, |
| 1115955, |
| 5214, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.10.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01079725075220049, |
| "l1_avg": 0.00933397478527493, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008210999076254666, |
| "l1_avg": 0.000668276334181428, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.049986656394918234, |
| "l1_avg": 0.038393433888753256, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.049983430135411504, |
| "l1_avg": 0.038393492168850366, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.105461610258824e-05, |
| "l1_avg": 3.9556211171050866e-05, |
| "l0_avg": 0.9999708387586805 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474517, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 40, |
| 9671, |
| 28908, |
| 7486, |
| 0, |
| 0, |
| 0, |
| 0, |
| 43, |
| 9671, |
| 28989, |
| 7352, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 101, |
| 8096, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 118, |
| 8069, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 225, |
| 55135, |
| 152903, |
| 384648, |
| 143898, |
| 0, |
| 0, |
| 0, |
| 226, |
| 55881, |
| 153264, |
| 384769, |
| 143611, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 192, |
| 54922, |
| 152469, |
| 384312, |
| 144908, |
| 0, |
| 0, |
| 0, |
| 218, |
| 55679, |
| 152799, |
| 384521, |
| 144540, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.11.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010787378775082753, |
| "l1_avg": 0.009335876835717096, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007994427578523755, |
| "l1_avg": 0.000646946660708636, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01841359654647645, |
| "l1_avg": 0.009716233279969956, |
| "l0_avg": 0.9999945746527777 |
| }, |
| "merged": { |
| "l2_avg": 0.018413178734875288, |
| "l1_avg": 0.00971643262439304, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.939174789549595e-05, |
| "l1_avg": 3.825840540230274e-05, |
| "l0_avg": 0.9999918619791667 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474548, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 50, |
| 9639, |
| 28940, |
| 7374, |
| 0, |
| 0, |
| 0, |
| 0, |
| 49, |
| 9572, |
| 29039, |
| 7497, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 143, |
| 8073, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 107, |
| 8061, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1887, |
| 344171, |
| 260175, |
| 119003, |
| 11945, |
| 0, |
| 0, |
| 0, |
| 1939, |
| 344128, |
| 259813, |
| 119472, |
| 12027, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1960, |
| 343250, |
| 260548, |
| 119494, |
| 12041, |
| 0, |
| 0, |
| 0, |
| 1875, |
| 343234, |
| 260116, |
| 119943, |
| 12099, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.11.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00903718191690006, |
| "l1_avg": 0.00781302060931921, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008365556211698032, |
| "l1_avg": 0.0006785504933860567, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.16030825170903873, |
| "l1_avg": 0.11654889848497178, |
| "l0_avg": 0.9999995761447482 |
| }, |
| "merged": { |
| "l2_avg": 0.16029712726857603, |
| "l1_avg": 0.11654891967773437, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3470190983642647e-05, |
| "l1_avg": 3.4000062280231055e-05, |
| "l0_avg": 0.9999059889051649 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795371, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 78, |
| 16392, |
| 47920, |
| 1315, |
| 0, |
| 0, |
| 0, |
| 0, |
| 65, |
| 16401, |
| 47564, |
| 1337, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 581, |
| 45531, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 637, |
| 45411, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 734, |
| 176143, |
| 459375, |
| 1574681, |
| 3683229, |
| 2390, |
| 375, |
| 0, |
| 725, |
| 175595, |
| 458943, |
| 1578533, |
| 3683069, |
| 2354, |
| 334, |
| 0 |
| ], |
| "merged": [ |
| 700, |
| 175442, |
| 458051, |
| 1570518, |
| 3689433, |
| 2416, |
| 379, |
| 0, |
| 761, |
| 174800, |
| 457479, |
| 1574568, |
| 3689220, |
| 2377, |
| 336, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.11.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010813868973028756, |
| "l1_avg": 0.009337700737847222, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008170174815643157, |
| "l1_avg": 0.0006605401868000627, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012771576252723606, |
| "l1_avg": 0.00921734439002143, |
| "l0_avg": 0.9999966939290365 |
| }, |
| "merged": { |
| "l2_avg": 0.012769648134847881, |
| "l1_avg": 0.009217487441168891, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.506130247346086e-05, |
| "l1_avg": 4.1455035615298486e-05, |
| "l0_avg": 0.9999941507975261 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796411, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 46, |
| 9704, |
| 28916, |
| 7394, |
| 0, |
| 0, |
| 0, |
| 0, |
| 35, |
| 9733, |
| 28816, |
| 7516, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 927, |
| 64575, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 923, |
| 64647, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8938, |
| 1961599, |
| 2856199, |
| 1067518, |
| 4836, |
| 0, |
| 0, |
| 0, |
| 8747, |
| 1959846, |
| 2856608, |
| 1067513, |
| 4676, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8723, |
| 1954401, |
| 2856170, |
| 1074487, |
| 4952, |
| 0, |
| 0, |
| 0, |
| 8962, |
| 1952634, |
| 2856869, |
| 1074503, |
| 4779, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.11.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010804884452883417, |
| "l1_avg": 0.009337617953618368, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007872685673646629, |
| "l1_avg": 0.0006376968813128769, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.053487512366836645, |
| "l1_avg": 0.04031961229112413, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.053484213854195876, |
| "l1_avg": 0.04031960434383816, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1978227252322005e-05, |
| "l1_avg": 3.881175588402483e-05, |
| "l0_avg": 0.9999708387586805 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474517, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9734, |
| 29057, |
| 7331, |
| 0, |
| 0, |
| 0, |
| 0, |
| 29, |
| 9638, |
| 28893, |
| 7446, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 148, |
| 8099, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 108, |
| 8029, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 205, |
| 55437, |
| 149798, |
| 377681, |
| 154944, |
| 0, |
| 0, |
| 0, |
| 209, |
| 55468, |
| 148863, |
| 377327, |
| 154628, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 216, |
| 55171, |
| 149424, |
| 377400, |
| 155859, |
| 0, |
| 0, |
| 0, |
| 230, |
| 55185, |
| 148479, |
| 377032, |
| 155564, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.1.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020387636308501866, |
| "l1_avg": 0.0017293812500105963, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007648730021996941, |
| "l1_avg": 0.0006142784323957232, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.03980980014138515, |
| "l1_avg": 0.028812521475332753, |
| "l0_avg": 0.880399670071072 |
| }, |
| "merged": { |
| "l2_avg": 0.03979328981592849, |
| "l1_avg": 0.028812553499951776, |
| "l0_avg": 0.8669218576690297 |
| }, |
| "diff": { |
| "l2_avg": 0.002350308470497878, |
| "l1_avg": 0.00040283144256215037, |
| "l0_avg": 0.048275773790147566 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 51253578, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 63488584, |
| 86154927, |
| 107716122, |
| 59887260, |
| 88503963, |
| 63677140, |
| 45309743, |
| 16278299, |
| 63489077, |
| 86123054, |
| 107652801, |
| 59838302, |
| 88430729, |
| 63603164, |
| 45271655, |
| 16258380 |
| ], |
| "fp4_dist_after": [ |
| 70626843, |
| 122082333, |
| 108454327, |
| 73416001, |
| 75522060, |
| 51535282, |
| 25792617, |
| 3571732, |
| 70659985, |
| 122031836, |
| 108381766, |
| 73348074, |
| 75455105, |
| 51471101, |
| 25767838, |
| 3566300 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.1.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010748641847659612, |
| "l1_avg": 0.00931148264143202, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008507342188541462, |
| "l1_avg": 0.0006923016574647691, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.021090655856662328, |
| "l1_avg": 0.021456119867018713, |
| "l0_avg": 0.8330497633380655 |
| }, |
| "merged": { |
| "l2_avg": 0.02109139495425754, |
| "l1_avg": 0.021456114215615354, |
| "l0_avg": 0.8174360774475851 |
| }, |
| "diff": { |
| "l2_avg": 0.0015295493933889601, |
| "l1_avg": 0.00034264935387505425, |
| "l0_avg": 0.05272385632550275 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 111952065, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 177160775, |
| 176388912, |
| 209630823, |
| 105633445, |
| 159683395, |
| 114860949, |
| 86130618, |
| 31922925, |
| 177335748, |
| 176522966, |
| 209761436, |
| 105672324, |
| 159724402, |
| 114895944, |
| 86125132, |
| 31916606 |
| ], |
| "fp4_dist_after": [ |
| 193826371, |
| 249846620, |
| 204827739, |
| 129058570, |
| 132908404, |
| 92600476, |
| 50499045, |
| 7943179, |
| 193823728, |
| 250040933, |
| 204933148, |
| 129098918, |
| 132938462, |
| 92611265, |
| 50472044, |
| 7937498 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.10.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020326530187979, |
| "l1_avg": 0.0017256683773464628, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007463855751233152, |
| "l1_avg": 0.0005977995693683624, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.5303421942111894, |
| "l1_avg": 0.4746394555362654, |
| "l0_avg": 0.8745899586618683 |
| }, |
| "merged": { |
| "l2_avg": 0.5305382051546641, |
| "l1_avg": 0.47464687017746915, |
| "l0_avg": 0.860041717717677 |
| }, |
| "diff": { |
| "l2_avg": 0.03547607385816832, |
| "l1_avg": 0.007141131177360629, |
| "l0_avg": 0.051418373202100215 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 54590023, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66580100, |
| 88917455, |
| 110003766, |
| 59005671, |
| 86527424, |
| 61115771, |
| 43348436, |
| 15566643, |
| 66565634, |
| 88888760, |
| 109947127, |
| 58939522, |
| 86426900, |
| 61028062, |
| 43283962, |
| 15537967 |
| ], |
| "fp4_dist_after": [ |
| 74290715, |
| 125661655, |
| 109601594, |
| 70914209, |
| 72097185, |
| 49118497, |
| 25562006, |
| 3806367, |
| 74300642, |
| 125614182, |
| 109530524, |
| 70827951, |
| 72001894, |
| 49042961, |
| 25516113, |
| 3796705 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.10.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010712012050688757, |
| "l1_avg": 0.009258027209175958, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008434071031407762, |
| "l1_avg": 0.0006874440444840325, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018296873569488524, |
| "l1_avg": 0.01789375587745949, |
| "l0_avg": 0.8813605734742718 |
| }, |
| "merged": { |
| "l2_avg": 0.018297459019554985, |
| "l1_avg": 0.017893729504243828, |
| "l0_avg": 0.8673347374244973 |
| }, |
| "diff": { |
| "l2_avg": 0.00129761869708697, |
| "l1_avg": 0.0002660214165110647, |
| "l0_avg": 0.049955613878038194 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 106074072, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 125949943, |
| 171950205, |
| 210901961, |
| 121273068, |
| 177728646, |
| 131026752, |
| 91429407, |
| 31172188, |
| 125965029, |
| 172001482, |
| 210984769, |
| 121348654, |
| 177819013, |
| 131118883, |
| 91496463, |
| 31199937 |
| ], |
| "fp4_dist_after": [ |
| 140847445, |
| 240632231, |
| 212961949, |
| 145852700, |
| 152374372, |
| 107249024, |
| 54402841, |
| 7118598, |
| 140849516, |
| 240733089, |
| 213036509, |
| 145939028, |
| 152458853, |
| 107339867, |
| 54446260, |
| 7124118 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.11.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010740929376156713, |
| "l1_avg": 0.009287484486897786, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008428609833246866, |
| "l1_avg": 0.0006888140406873492, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018851494789123534, |
| "l1_avg": 0.01823229000892168, |
| "l0_avg": 0.8828186605006089 |
| }, |
| "merged": { |
| "l2_avg": 0.018852443165249293, |
| "l1_avg": 0.01823229000892168, |
| "l0_avg": 0.8687712506894713 |
| }, |
| "diff": { |
| "l2_avg": 0.0013244017130798764, |
| "l1_avg": 0.0002702366864239728, |
| "l0_avg": 0.05028547263439791 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 106774483, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 124395601, |
| 170167168, |
| 210675186, |
| 121114215, |
| 178756785, |
| 132214505, |
| 92561824, |
| 31484206, |
| 124423318, |
| 170243618, |
| 210787480, |
| 121207533, |
| 178892630, |
| 132304647, |
| 92623537, |
| 31514147 |
| ], |
| "fp4_dist_after": [ |
| 139319371, |
| 239583699, |
| 213303175, |
| 146330175, |
| 153122867, |
| 107728914, |
| 54795051, |
| 7201253, |
| 139327346, |
| 239686293, |
| 213418198, |
| 146436620, |
| 153257449, |
| 107812479, |
| 54832516, |
| 7210994 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.12.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010785175054494658, |
| "l1_avg": 0.009332484006881714, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008065151050686836, |
| "l1_avg": 0.0006513990228995681, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.007440663084227731, |
| "l1_avg": 0.005181460248099433, |
| "l0_avg": 0.999993896484375 |
| }, |
| "merged": { |
| "l2_avg": 0.007440479309952031, |
| "l1_avg": 0.005181780126359728, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.984729052992744e-05, |
| "l1_avg": 3.8999034505751396e-05, |
| "l0_avg": 0.9999932183159722 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474550, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9573, |
| 28926, |
| 7357, |
| 0, |
| 0, |
| 0, |
| 0, |
| 32, |
| 9622, |
| 29136, |
| 7478, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 135, |
| 8184, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 133, |
| 7932, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 1848, |
| 389622, |
| 314115, |
| 33260, |
| 105, |
| 0, |
| 0, |
| 0, |
| 1795, |
| 388013, |
| 312737, |
| 32943, |
| 122, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 1858, |
| 388317, |
| 315028, |
| 33613, |
| 105, |
| 0, |
| 0, |
| 0, |
| 1849, |
| 386744, |
| 313626, |
| 33295, |
| 125, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.12.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009059107497748837, |
| "l1_avg": 0.00784157682210207, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008239601132129921, |
| "l1_avg": 0.0006685695714420743, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.1342254372251146, |
| "l1_avg": 0.10016854604085286, |
| "l0_avg": 0.9999995761447482 |
| }, |
| "merged": { |
| "l2_avg": 0.13421386851786024, |
| "l1_avg": 0.10016855663723416, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.3029794172256296e-05, |
| "l1_avg": 3.338173393987947e-05, |
| "l0_avg": 0.9999137878417969 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795463, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 61, |
| 16211, |
| 48058, |
| 1267, |
| 0, |
| 0, |
| 0, |
| 0, |
| 73, |
| 16351, |
| 47841, |
| 1210, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 659, |
| 45305, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 661, |
| 45535, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 824, |
| 198232, |
| 510560, |
| 1737976, |
| 3448835, |
| 1083, |
| 121, |
| 0, |
| 821, |
| 198693, |
| 510480, |
| 1735664, |
| 3452020, |
| 1045, |
| 126, |
| 0 |
| ], |
| "merged": [ |
| 797, |
| 197455, |
| 509161, |
| 1733721, |
| 3455313, |
| 1095, |
| 122, |
| 0, |
| 836, |
| 197841, |
| 508930, |
| 1731462, |
| 3458562, |
| 1057, |
| 128, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.12.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010783331028856437, |
| "l1_avg": 0.009328770637512206, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008396149424473617, |
| "l1_avg": 0.0006794914370402694, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.014159537903165827, |
| "l1_avg": 0.010413003630108303, |
| "l0_avg": 0.9999974568684896 |
| }, |
| "merged": { |
| "l2_avg": 0.014157057783801486, |
| "l1_avg": 0.01041310297118293, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.229794567527812e-05, |
| "l1_avg": 4.083249045328961e-05, |
| "l0_avg": 0.9999946594238281 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796417, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 37, |
| 9545, |
| 28953, |
| 7482, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9730, |
| 29053, |
| 7324, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 876, |
| 64713, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 917, |
| 64566, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7031, |
| 1665906, |
| 2920340, |
| 1295678, |
| 7522, |
| 0, |
| 0, |
| 0, |
| 7054, |
| 1667178, |
| 2921642, |
| 1296799, |
| 7330, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7199, |
| 1659274, |
| 2918999, |
| 1303406, |
| 7713, |
| 0, |
| 0, |
| 0, |
| 6998, |
| 1660264, |
| 2920713, |
| 1304410, |
| 7504, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.12.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010789626475839394, |
| "l1_avg": 0.009342069096035428, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008268022793345153, |
| "l1_avg": 0.0006734909838996828, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.053733034799171984, |
| "l1_avg": 0.04156673749287923, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.05372940329382652, |
| "l1_avg": 0.04156669775644938, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.17193651832853e-05, |
| "l1_avg": 4.041468621128135e-05, |
| "l0_avg": 0.9999728732638888 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474520, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 53, |
| 9503, |
| 28893, |
| 7291, |
| 0, |
| 0, |
| 0, |
| 0, |
| 26, |
| 9685, |
| 29360, |
| 7349, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 93, |
| 8047, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 8124, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 185, |
| 49821, |
| 140330, |
| 378519, |
| 167553, |
| 0, |
| 0, |
| 0, |
| 210, |
| 49687, |
| 140560, |
| 379507, |
| 168188, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 205, |
| 49633, |
| 139913, |
| 378159, |
| 168552, |
| 0, |
| 0, |
| 0, |
| 172, |
| 49461, |
| 140120, |
| 379101, |
| 169244, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.13.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01075228574202556, |
| "l1_avg": 0.009297711981667412, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007913949084468186, |
| "l1_avg": 0.0006345683941617608, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.02083331316661207, |
| "l1_avg": 0.00994187593460083, |
| "l0_avg": 0.9999911838107639 |
| }, |
| "merged": { |
| "l2_avg": 0.020832766555945886, |
| "l1_avg": 0.00994218322965834, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.8746884379286875e-05, |
| "l1_avg": 3.787790839042929e-05, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474555, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 41, |
| 9704, |
| 29090, |
| 7354, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9684, |
| 28884, |
| 7369, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 132, |
| 8048, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 125, |
| 8079, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 2392, |
| 361854, |
| 247135, |
| 110215, |
| 15429, |
| 0, |
| 0, |
| 0, |
| 2279, |
| 362526, |
| 246874, |
| 110567, |
| 15289, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 2341, |
| 360964, |
| 247502, |
| 110589, |
| 15532, |
| 0, |
| 0, |
| 0, |
| 2378, |
| 361572, |
| 247293, |
| 111014, |
| 15375, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.13.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.00902434293317243, |
| "l1_avg": 0.007801322732120752, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008300848425327858, |
| "l1_avg": 0.0006726636240879694, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.21920052416913355, |
| "l1_avg": 0.16342239379882811, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.21918899100322253, |
| "l1_avg": 0.16342239379882811, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.2693891166306364e-05, |
| "l1_avg": 3.346408096452554e-05, |
| "l0_avg": 0.9998597886827257 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11794826, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 64, |
| 16396, |
| 47712, |
| 1278, |
| 0, |
| 0, |
| 0, |
| 0, |
| 55, |
| 16459, |
| 47788, |
| 1320, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 642, |
| 45422, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 672, |
| 45424, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 600, |
| 138031, |
| 333884, |
| 1144309, |
| 4274093, |
| 5231, |
| 354, |
| 0, |
| 586, |
| 138199, |
| 334843, |
| 1144991, |
| 4275751, |
| 5283, |
| 325, |
| 0 |
| ], |
| "merged": [ |
| 647, |
| 137498, |
| 332979, |
| 1141129, |
| 4278569, |
| 5323, |
| 357, |
| 0, |
| 598, |
| 137670, |
| 333883, |
| 1141772, |
| 4280357, |
| 5368, |
| 330, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.13.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010813729964281753, |
| "l1_avg": 0.009351619084676107, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000827527028731085, |
| "l1_avg": 0.0006701566744595766, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.012413197338576062, |
| "l1_avg": 0.008824894163343642, |
| "l0_avg": 0.9999966939290365 |
| }, |
| "merged": { |
| "l2_avg": 0.012411189252677842, |
| "l1_avg": 0.008825052446789211, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2675854453461205e-05, |
| "l1_avg": 4.0475284266803e-05, |
| "l0_avg": 0.9999933878580729 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796402, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9597, |
| 29037, |
| 7422, |
| 0, |
| 0, |
| 0, |
| 0, |
| 25, |
| 9646, |
| 28956, |
| 7444, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 887, |
| 64534, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 942, |
| 64709, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 9740, |
| 2093791, |
| 2792497, |
| 993601, |
| 4858, |
| 0, |
| 0, |
| 0, |
| 9564, |
| 2099007, |
| 2794105, |
| 994507, |
| 4810, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 9675, |
| 2086063, |
| 2793829, |
| 999971, |
| 4955, |
| 0, |
| 0, |
| 0, |
| 9524, |
| 2091486, |
| 2795177, |
| 1000872, |
| 4928, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.13.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010802657171490744, |
| "l1_avg": 0.009328207042482163, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007840656908228993, |
| "l1_avg": 0.0006293227197602391, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.05370013763310136, |
| "l1_avg": 0.04143078327178955, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.05369663806826153, |
| "l1_avg": 0.04143076207902696, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.158326472309634e-05, |
| "l1_avg": 3.8968344839910665e-05, |
| "l0_avg": 0.9999742296006945 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474522, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9710, |
| 29192, |
| 7310, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9759, |
| 28839, |
| 7280, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 135, |
| 8023, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 125, |
| 8101, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 199, |
| 49957, |
| 140172, |
| 382326, |
| 165211, |
| 0, |
| 0, |
| 0, |
| 188, |
| 49827, |
| 139947, |
| 382031, |
| 164702, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 184, |
| 49771, |
| 139791, |
| 381838, |
| 166279, |
| 0, |
| 0, |
| 0, |
| 214, |
| 49614, |
| 139499, |
| 381647, |
| 165723, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.14.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01075974273667416, |
| "l1_avg": 0.009310655461417305, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008061928674578667, |
| "l1_avg": 0.0006542591727338731, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.006051247489799013, |
| "l1_avg": 0.003045191036330329, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "merged": { |
| "l2_avg": 0.006051332308695491, |
| "l1_avg": 0.0030455695258246526, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.96815212492875e-05, |
| "l1_avg": 3.904795739799738e-05, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474558, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 46, |
| 9515, |
| 28984, |
| 7321, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9698, |
| 29179, |
| 7384, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 112, |
| 8047, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 128, |
| 8097, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3542, |
| 554535, |
| 171084, |
| 7786, |
| 771, |
| 0, |
| 0, |
| 0, |
| 3434, |
| 553485, |
| 171372, |
| 7742, |
| 809, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 3432, |
| 553376, |
| 172226, |
| 7832, |
| 777, |
| 0, |
| 0, |
| 0, |
| 3536, |
| 552296, |
| 172488, |
| 7783, |
| 814, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.14.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009047505260607198, |
| "l1_avg": 0.007831534370779991, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008084627976624742, |
| "l1_avg": 0.000656282901763916, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.13928902129486553, |
| "l1_avg": 0.10397022035386827, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 0.13927742593160367, |
| "l1_avg": 0.10397023095024957, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.202917867202695e-05, |
| "l1_avg": 3.216808351377646e-05, |
| "l0_avg": 0.9999032762315538 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11795339, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 50, |
| 16161, |
| 48089, |
| 1100, |
| 0, |
| 0, |
| 0, |
| 0, |
| 88, |
| 16261, |
| 48139, |
| 1184, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 629, |
| 45577, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 608, |
| 45346, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 885, |
| 203609, |
| 505018, |
| 1685290, |
| 3504016, |
| 613, |
| 101, |
| 0, |
| 929, |
| 203478, |
| 503467, |
| 1687364, |
| 3500989, |
| 618, |
| 103, |
| 0 |
| ], |
| "merged": [ |
| 917, |
| 202871, |
| 503629, |
| 1680973, |
| 3510470, |
| 620, |
| 102, |
| 0, |
| 848, |
| 202752, |
| 502085, |
| 1683062, |
| 3507421, |
| 627, |
| 103, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.14.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010784638653510457, |
| "l1_avg": 0.009330671363406711, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008231350307712212, |
| "l1_avg": 0.0006652033771388233, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01564089220787998, |
| "l1_avg": 0.011252227094438341, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "merged": { |
| "l2_avg": 0.01563780455367807, |
| "l1_avg": 0.011252348952823215, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.099453892841339e-05, |
| "l1_avg": 3.965402849846416e-05, |
| "l0_avg": 0.9999921162923177 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796387, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 46, |
| 9661, |
| 28997, |
| 7443, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9590, |
| 29104, |
| 7286, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 967, |
| 64586, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 905, |
| 64614, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 9690, |
| 1684567, |
| 2713749, |
| 1478040, |
| 15545, |
| 0, |
| 0, |
| 0, |
| 9713, |
| 1684195, |
| 2710092, |
| 1475285, |
| 15604, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 9481, |
| 1678575, |
| 2712025, |
| 1485571, |
| 15903, |
| 0, |
| 0, |
| 0, |
| 9494, |
| 1678342, |
| 2708405, |
| 1482755, |
| 15929, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.14.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010781929160984109, |
| "l1_avg": 0.009325689739651151, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008154078968800604, |
| "l1_avg": 0.0006578554748557508, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0615715134297196, |
| "l1_avg": 0.04793897204928928, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.06156736044523284, |
| "l1_avg": 0.04793906211853027, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.097414122945514e-05, |
| "l1_avg": 3.9812539600663716e-05, |
| "l0_avg": 0.9999640570746527 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474507, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 36, |
| 9630, |
| 28991, |
| 7253, |
| 0, |
| 0, |
| 0, |
| 0, |
| 41, |
| 9585, |
| 29277, |
| 7347, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 118, |
| 8171, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 121, |
| 7974, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 155, |
| 40502, |
| 118852, |
| 365692, |
| 212266, |
| 0, |
| 0, |
| 0, |
| 171, |
| 40277, |
| 118357, |
| 365667, |
| 212621, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 177, |
| 40313, |
| 118468, |
| 365188, |
| 213336, |
| 0, |
| 0, |
| 0, |
| 160, |
| 40079, |
| 117988, |
| 365155, |
| 213696, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.11.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.002024305245782953, |
| "l1_avg": 0.0017203594247500102, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007313240362874516, |
| "l1_avg": 0.0005856654710239835, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7130518470501002, |
| "l1_avg": 0.5938414472415123, |
| "l0_avg": 0.8731160990397135 |
| }, |
| "merged": { |
| "l2_avg": 0.7130943460589453, |
| "l1_avg": 0.5938510923032407, |
| "l0_avg": 0.8598320402922454 |
| }, |
| "diff": { |
| "l2_avg": 0.04384309619564327, |
| "l1_avg": 0.008234153088228202, |
| "l0_avg": 0.046735309553735054 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 49618093, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 67354822, |
| 90076986, |
| 110876133, |
| 58682143, |
| 85475524, |
| 59136015, |
| 42981405, |
| 16382538, |
| 67355684, |
| 90061235, |
| 110832799, |
| 58659470, |
| 85421372, |
| 59090852, |
| 42934979, |
| 16361243 |
| ], |
| "fp4_dist_after": [ |
| 74397548, |
| 126786248, |
| 109991751, |
| 72052135, |
| 72369243, |
| 47872286, |
| 23914076, |
| 3574146, |
| 74416420, |
| 126755117, |
| 109948153, |
| 72016448, |
| 72310015, |
| 47824657, |
| 23886021, |
| 3568936 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.12.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.00202713342041187, |
| "l1_avg": 0.001721850037574768, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.00070716596531449, |
| "l1_avg": 0.0005641731950971815, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7570058924829568, |
| "l1_avg": 0.674957561728395, |
| "l0_avg": 0.8743379060721692 |
| }, |
| "merged": { |
| "l2_avg": 0.7570921492865226, |
| "l1_avg": 0.6749757065007717, |
| "l0_avg": 0.861181728221752 |
| }, |
| "diff": { |
| "l2_avg": 0.049241890199469805, |
| "l1_avg": 0.009626894350405093, |
| "l0_avg": 0.04605737662609712 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 48898343, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66711584, |
| 91741404, |
| 110031038, |
| 60702711, |
| 85685605, |
| 59208496, |
| 41510979, |
| 15373182, |
| 66701750, |
| 91721495, |
| 109983003, |
| 60673174, |
| 85639698, |
| 59162040, |
| 41475977, |
| 15361064 |
| ], |
| "fp4_dist_after": [ |
| 73701594, |
| 125938209, |
| 109146589, |
| 72281552, |
| 73144095, |
| 49121540, |
| 24227537, |
| 3406116, |
| 73679433, |
| 125907444, |
| 109083436, |
| 72253772, |
| 73102051, |
| 49071847, |
| 24212266, |
| 3405719 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.12.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010748614080985134, |
| "l1_avg": 0.00930076175265842, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008391130483366105, |
| "l1_avg": 0.0006840712494320339, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018675684928894043, |
| "l1_avg": 0.017729194546923226, |
| "l0_avg": 0.8830257486414026 |
| }, |
| "merged": { |
| "l2_avg": 0.018676319387223986, |
| "l1_avg": 0.01772923410674672, |
| "l0_avg": 0.8690522808498806 |
| }, |
| "diff": { |
| "l2_avg": 0.001299555930826399, |
| "l1_avg": 0.00026091787550184463, |
| "l0_avg": 0.05005596255078728 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 106287149, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 124172755, |
| 169910347, |
| 210541673, |
| 121093092, |
| 178912435, |
| 132409500, |
| 92786719, |
| 31621844, |
| 124206440, |
| 169960184, |
| 210645347, |
| 121145992, |
| 179011555, |
| 132488155, |
| 92824097, |
| 31636265 |
| ], |
| "fp4_dist_after": [ |
| 139027071, |
| 239362251, |
| 213215753, |
| 146559608, |
| 153444239, |
| 107915770, |
| 54764046, |
| 7180457, |
| 139022916, |
| 239450870, |
| 213297430, |
| 146641216, |
| 153520285, |
| 107973112, |
| 54799997, |
| 7191379 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.13.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0020247836655842024, |
| "l1_avg": 0.0017208259966638353, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007133675617592159, |
| "l1_avg": 0.0005703679389423794, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.871462316943161, |
| "l1_avg": 0.8090717230902778, |
| "l0_avg": 0.8745524050865644 |
| }, |
| "merged": { |
| "l2_avg": 0.8716561699679919, |
| "l1_avg": 0.809095775462963, |
| "l0_avg": 0.8601233315173491 |
| }, |
| "diff": { |
| "l2_avg": 0.06074686341975666, |
| "l1_avg": 0.012524824731143904, |
| "l0_avg": 0.05065887545361931 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 53783677, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66587105, |
| 90949885, |
| 109573846, |
| 60590829, |
| 86191484, |
| 60659517, |
| 41716250, |
| 14661694, |
| 66598499, |
| 90935449, |
| 109533964, |
| 60564720, |
| 86153794, |
| 60617152, |
| 41699232, |
| 14649780 |
| ], |
| "fp4_dist_after": [ |
| 74259700, |
| 125493224, |
| 108913366, |
| 70824826, |
| 72244618, |
| 49844895, |
| 25681087, |
| 3681757, |
| 74245009, |
| 125462129, |
| 108879482, |
| 70780335, |
| 72212335, |
| 49811675, |
| 25669080, |
| 3679682 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.13.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010742110015155501, |
| "l1_avg": 0.009299572308858235, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008369682297599567, |
| "l1_avg": 0.0006816579235924615, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.018734019332461885, |
| "l1_avg": 0.017442534646870178, |
| "l0_avg": 0.8832228521653164 |
| }, |
| "merged": { |
| "l2_avg": 0.018734350469377305, |
| "l1_avg": 0.017442523344063463, |
| "l0_avg": 0.8694649157112028 |
| }, |
| "diff": { |
| "l2_avg": 0.0012870906955666011, |
| "l1_avg": 0.00025190856721666124, |
| "l0_avg": 0.04927604015962577 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 104631088, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 123997893, |
| 170698831, |
| 210694167, |
| 121706376, |
| 178792662, |
| 131956763, |
| 92145394, |
| 31491707, |
| 123962779, |
| 170717752, |
| 210803635, |
| 121768412, |
| 178889961, |
| 132036171, |
| 92198658, |
| 31505239 |
| ], |
| "fp4_dist_after": [ |
| 138573157, |
| 239395467, |
| 213299890, |
| 147035741, |
| 153723383, |
| 107951691, |
| 54403046, |
| 7069356, |
| 138600655, |
| 239465977, |
| 213393473, |
| 147094950, |
| 153828475, |
| 108028737, |
| 54429022, |
| 7073380 |
| ], |
| "bf16_dists": null |
| }, |
| "lm_head.weight": { |
| "lora_A": { |
| "l2_avg": 0.010774496512500238, |
| "l1_avg": 0.009324126773410372, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0018356661821962352, |
| "l1_avg": 0.0015421853104948921, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.004370598861691551, |
| "l1_avg": 0.003714712968396368, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.004374095388715884, |
| "l1_avg": 0.003715515460478331, |
| "l0_avg": 0.9999999982732822 |
| }, |
| "diff": { |
| "l2_avg": 0.00010087524255007474, |
| "l1_avg": 8.52967752021883e-05, |
| "l0_avg": 0.9999986427998355 |
| }, |
| "num_elements": 579133440, |
| "num_changed": 579132654, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9575, |
| 29090, |
| 7401, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9622, |
| 28976, |
| 7426, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 16317, |
| 3958162, |
| 860, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 16068, |
| 2442171, |
| 1238, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 949797, |
| 178320676, |
| 108496229, |
| 1222691, |
| 0, |
| 0, |
| 0, |
| 0, |
| 952082, |
| 179783741, |
| 108088242, |
| 1319982, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 943554, |
| 177839921, |
| 108955645, |
| 1257836, |
| 0, |
| 0, |
| 0, |
| 0, |
| 942366, |
| 179317678, |
| 108522041, |
| 1354399, |
| 0, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.31.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.0107962737641707, |
| "l1_avg": 0.009351033634609646, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008272191043943167, |
| "l1_avg": 0.0006664739339612424, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01097583301193199, |
| "l1_avg": 0.004634654190805223, |
| "l0_avg": 0.9999844021267361 |
| }, |
| "merged": { |
| "l2_avg": 0.01097574740767536, |
| "l1_avg": 0.004635341962178548, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.0981200267388934e-05, |
| "l1_avg": 3.977606797383891e-05, |
| "l0_avg": 0.9999966091579862 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474555, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 32, |
| 9505, |
| 28948, |
| 7524, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9513, |
| 29097, |
| 7502, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 110, |
| 8027, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 130, |
| 8117, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5674, |
| 511843, |
| 173564, |
| 42437, |
| 3018, |
| 1, |
| 0, |
| 0, |
| 5665, |
| 513354, |
| 173635, |
| 42211, |
| 3158, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5672, |
| 511330, |
| 174058, |
| 42659, |
| 3041, |
| 1, |
| 0, |
| 0, |
| 5600, |
| 512370, |
| 174220, |
| 42435, |
| 3174, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.31.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009036175660891863, |
| "l1_avg": 0.007821576669812202, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008362798615862339, |
| "l1_avg": 0.0006787369234694375, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.2089248612087276, |
| "l1_avg": 0.8242056104871962, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 1.2089193167591679, |
| "l1_avg": 0.8242056104871962, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.282270684316801e-05, |
| "l1_avg": 3.339341427716944e-05, |
| "l0_avg": 0.999298095703125 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11788200, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 68, |
| 16184, |
| 48084, |
| 1156, |
| 0, |
| 0, |
| 0, |
| 0, |
| 61, |
| 16310, |
| 48048, |
| 1161, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 636, |
| 45272, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 612, |
| 45640, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 100, |
| 25959, |
| 78012, |
| 311918, |
| 3781645, |
| 1181655, |
| 519406, |
| 0, |
| 94, |
| 25682, |
| 78175, |
| 310939, |
| 3780054, |
| 1182872, |
| 519969, |
| 0 |
| ], |
| "merged": [ |
| 86, |
| 25855, |
| 77748, |
| 310959, |
| 3776503, |
| 1184131, |
| 523409, |
| 0, |
| 80, |
| 25573, |
| 77930, |
| 309951, |
| 3774962, |
| 1185512, |
| 523781, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.31.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010776440278877836, |
| "l1_avg": 0.009310456779268053, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000859001340309484, |
| "l1_avg": 0.000699599040672183, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01581580226508326, |
| "l1_avg": 0.010725782977210151, |
| "l0_avg": 0.9999970330132378 |
| }, |
| "merged": { |
| "l2_avg": 0.015813172205676743, |
| "l1_avg": 0.010725926028357612, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.38942170826525e-05, |
| "l1_avg": 4.1367641339699425e-05, |
| "l0_avg": 0.9999929640028212 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796397, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 40, |
| 9803, |
| 28774, |
| 7359, |
| 0, |
| 0, |
| 0, |
| 0, |
| 33, |
| 9656, |
| 29068, |
| 7427, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 867, |
| 64662, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 843, |
| 64700, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 8466, |
| 1858905, |
| 2702978, |
| 1293001, |
| 31594, |
| 0, |
| 0, |
| 0, |
| 8566, |
| 1859516, |
| 2704645, |
| 1297051, |
| 31758, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8401, |
| 1852309, |
| 2702713, |
| 1299571, |
| 32010, |
| 0, |
| 0, |
| 0, |
| 8517, |
| 1852904, |
| 2704172, |
| 1303729, |
| 32154, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.31.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010789866010685926, |
| "l1_avg": 0.009329371982150607, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008245359640568495, |
| "l1_avg": 0.000667478539980948, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.10736698856149642, |
| "l1_avg": 0.08048310279846191, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.10736052975960172, |
| "l1_avg": 0.0804831345876058, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.3821050307809925e-05, |
| "l1_avg": 4.043812708308299e-05, |
| "l0_avg": 0.9999538845486111 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474492, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9618, |
| 29159, |
| 7403, |
| 0, |
| 0, |
| 0, |
| 0, |
| 40, |
| 9500, |
| 28966, |
| 7443, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 106, |
| 8086, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 112, |
| 8080, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 107, |
| 25664, |
| 76878, |
| 263909, |
| 369525, |
| 74, |
| 4, |
| 0, |
| 113, |
| 26166, |
| 76864, |
| 265058, |
| 370146, |
| 51, |
| 1, |
| 0 |
| ], |
| "merged": [ |
| 105, |
| 25539, |
| 76622, |
| 263360, |
| 370460, |
| 75, |
| 4, |
| 0, |
| 103, |
| 26062, |
| 76618, |
| 264440, |
| 371119, |
| 52, |
| 1, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.30.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001997454593896365, |
| "l1_avg": 0.0017040537463294135, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008147620732761753, |
| "l1_avg": 0.0006580723656548394, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 5.982629952228251, |
| "l1_avg": 5.487172067901234, |
| "l0_avg": 0.8769874403211806 |
| }, |
| "merged": { |
| "l2_avg": 5.982581998480612, |
| "l1_avg": 5.487965856481481, |
| "l0_avg": 0.8643562505274643 |
| }, |
| "diff": { |
| "l2_avg": 0.3703550050957563, |
| "l1_avg": 0.07409729757426697, |
| "l0_avg": 0.04461420694986979 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 47366154, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 65303786, |
| 90255777, |
| 107832976, |
| 61094094, |
| 86813474, |
| 61700215, |
| 42741819, |
| 15146706, |
| 65296582, |
| 90261498, |
| 107814180, |
| 61088590, |
| 86794574, |
| 61669154, |
| 42726764, |
| 15143011 |
| ], |
| "fp4_dist_after": [ |
| 72002347, |
| 122319642, |
| 107692832, |
| 72235146, |
| 75089705, |
| 51767532, |
| 26052511, |
| 3722728, |
| 72008343, |
| 122321077, |
| 107674938, |
| 72210543, |
| 75070504, |
| 51753877, |
| 26040894, |
| 3720581 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.30.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010748430820933583, |
| "l1_avg": 0.009293212493260702, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008565544247747829, |
| "l1_avg": 0.0007021759119298723, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "original": { |
| "l2_avg": 0.02025708489947849, |
| "l1_avg": 0.019797144760320216, |
| "l0_avg": 0.8825213373443227 |
| }, |
| "merged": { |
| "l2_avg": 0.020257809427049425, |
| "l1_avg": 0.01979714099271798, |
| "l0_avg": 0.8683337534209828 |
| }, |
| "diff": { |
| "l2_avg": 0.0014175714717970954, |
| "l1_avg": 0.00029840451699716074, |
| "l0_avg": 0.050911814371744794 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 108104436, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 124700093, |
| 168124275, |
| 210861667, |
| 119424255, |
| 178944094, |
| 132650731, |
| 93994156, |
| 32193299, |
| 124750152, |
| 168264393, |
| 211065377, |
| 119621612, |
| 179256293, |
| 132966409, |
| 94249561, |
| 32300033 |
| ], |
| "fp4_dist_after": [ |
| 139790402, |
| 239443356, |
| 213666764, |
| 145892223, |
| 152831077, |
| 107096739, |
| 54837819, |
| 7366566, |
| 139785282, |
| 239658698, |
| 213923200, |
| 146172135, |
| 153129193, |
| 107368409, |
| 55000452, |
| 7404085 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.32.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01081512554927286, |
| "l1_avg": 0.00936189889907837, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000739463372156024, |
| "l1_avg": 0.0005891390610486269, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0025733774388258645, |
| "l1_avg": 0.0018295894066492717, |
| "l0_avg": 0.99998779296875 |
| }, |
| "merged": { |
| "l2_avg": 0.0025736501551388426, |
| "l1_avg": 0.0018300498525301615, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.67885060079136e-05, |
| "l1_avg": 3.569606795079178e-05, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474554, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 48, |
| 9433, |
| 29025, |
| 7577, |
| 0, |
| 0, |
| 0, |
| 0, |
| 48, |
| 9622, |
| 28961, |
| 7446, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 156, |
| 8061, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 120, |
| 8047, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4707, |
| 656455, |
| 74181, |
| 657, |
| 2, |
| 0, |
| 0, |
| 0, |
| 4959, |
| 657918, |
| 75050, |
| 630, |
| 1, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4825, |
| 655772, |
| 74997, |
| 665, |
| 2, |
| 0, |
| 0, |
| 0, |
| 4845, |
| 656955, |
| 75859, |
| 638, |
| 2, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.32.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009035835193381235, |
| "l1_avg": 0.007818758487701416, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008180701084087962, |
| "l1_avg": 0.0006620415796836217, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.7776271057486608, |
| "l1_avg": 0.5782581753200955, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.7776204950588012, |
| "l1_avg": 0.5782581753200955, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.2162289506346854e-05, |
| "l1_avg": 3.293522944053014e-05, |
| "l0_avg": 0.9994960361056857 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11790535, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 65, |
| 16351, |
| 48002, |
| 1044, |
| 0, |
| 0, |
| 0, |
| 0, |
| 49, |
| 16404, |
| 48044, |
| 1113, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 651, |
| 45471, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 671, |
| 45367, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 92, |
| 28599, |
| 86756, |
| 343869, |
| 4460847, |
| 870931, |
| 105802, |
| 0, |
| 97, |
| 28644, |
| 86525, |
| 344361, |
| 4461950, |
| 872334, |
| 105673, |
| 0 |
| ], |
| "merged": [ |
| 104, |
| 28469, |
| 86453, |
| 342750, |
| 4455344, |
| 876362, |
| 107402, |
| 0, |
| 112, |
| 28505, |
| 86267, |
| 343270, |
| 4456442, |
| 877735, |
| 107265, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.32.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010798666756555565, |
| "l1_avg": 0.009340361754099528, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007903786976719068, |
| "l1_avg": 0.0006280205561779439, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01976570944821665, |
| "l1_avg": 0.01385699643029107, |
| "l0_avg": 0.9999978807237413 |
| }, |
| "merged": { |
| "l2_avg": 0.01976241076728871, |
| "l1_avg": 0.013857109016842312, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.988036206811353e-05, |
| "l1_avg": 3.797352676176363e-05, |
| "l0_avg": 0.999989488389757 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796356, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 39, |
| 9453, |
| 28728, |
| 7354, |
| 0, |
| 0, |
| 0, |
| 0, |
| 53, |
| 9683, |
| 29349, |
| 7501, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 1089, |
| 64543, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 1042, |
| 64398, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5735, |
| 1339848, |
| 2656147, |
| 1835987, |
| 61866, |
| 0, |
| 0, |
| 0, |
| 5642, |
| 1342679, |
| 2651910, |
| 1834252, |
| 62414, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5615, |
| 1334675, |
| 2653086, |
| 1843567, |
| 62635, |
| 0, |
| 0, |
| 0, |
| 5678, |
| 1337340, |
| 2648955, |
| 1841734, |
| 63195, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.32.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010805636827909575, |
| "l1_avg": 0.009349693854649862, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000815343577414751, |
| "l1_avg": 0.0006574359722435474, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.10099424957376428, |
| "l1_avg": 0.07876850234137642, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.10098704310900435, |
| "l1_avg": 0.078768523534139, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.1420719714020246e-05, |
| "l1_avg": 3.9510687606202235e-05, |
| "l0_avg": 0.9999457465277778 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474480, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9695, |
| 29050, |
| 7465, |
| 0, |
| 0, |
| 0, |
| 0, |
| 30, |
| 9490, |
| 29013, |
| 7384, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 115, |
| 7977, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 112, |
| 8180, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 106, |
| 24852, |
| 73144, |
| 262523, |
| 376749, |
| 2, |
| 0, |
| 0, |
| 92, |
| 24644, |
| 73342, |
| 262013, |
| 377091, |
| 2, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 93, |
| 24709, |
| 72926, |
| 261883, |
| 377745, |
| 2, |
| 0, |
| 0, |
| 82, |
| 24574, |
| 73073, |
| 261364, |
| 378107, |
| 2, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.33.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010787403121247484, |
| "l1_avg": 0.009346520900726319, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008391176233999431, |
| "l1_avg": 0.000688590167555958, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.006643017542796623, |
| "l1_avg": 0.003543711370891995, |
| "l0_avg": 0.9999864366319444 |
| }, |
| "merged": { |
| "l2_avg": 0.006642917802057248, |
| "l1_avg": 0.0035442352294921876, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.212795493828319e-05, |
| "l1_avg": 4.0307108105884656e-05, |
| "l0_avg": 0.9999972873263889 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474556, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 30, |
| 9546, |
| 29138, |
| 7439, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9495, |
| 29070, |
| 7397, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 95, |
| 8061, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 106, |
| 8122, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5306, |
| 524539, |
| 187420, |
| 19758, |
| 578, |
| 0, |
| 0, |
| 0, |
| 5246, |
| 523738, |
| 187538, |
| 19865, |
| 572, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 5118, |
| 523556, |
| 188194, |
| 19917, |
| 581, |
| 0, |
| 0, |
| 0, |
| 5227, |
| 523048, |
| 188313, |
| 20030, |
| 576, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.33.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009044482541315556, |
| "l1_avg": 0.00782675202935934, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008246327741836636, |
| "l1_avg": 0.000667589075035519, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 1.25829391957655, |
| "l1_avg": 0.8519289652506511, |
| "l0_avg": 0.9999999152289496 |
| }, |
| "merged": { |
| "l2_avg": 1.2582907919383366, |
| "l1_avg": 0.8519289652506511, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.234911150160937e-05, |
| "l1_avg": 3.281609517418676e-05, |
| "l0_avg": 0.9992525736490886 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11787663, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 75, |
| 16157, |
| 47841, |
| 1108, |
| 0, |
| 0, |
| 0, |
| 0, |
| 65, |
| 16359, |
| 48347, |
| 1120, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 678, |
| 45363, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 615, |
| 45504, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 85, |
| 24403, |
| 73817, |
| 292476, |
| 3739610, |
| 1234255, |
| 532958, |
| 0, |
| 98, |
| 24310, |
| 73735, |
| 292931, |
| 3740149, |
| 1233353, |
| 534300, |
| 0 |
| ], |
| "merged": [ |
| 95, |
| 24314, |
| 73611, |
| 291509, |
| 3733289, |
| 1237471, |
| 537342, |
| 0, |
| 83, |
| 24174, |
| 73513, |
| 291986, |
| 3733955, |
| 1236453, |
| 538685, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.33.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010821549009960685, |
| "l1_avg": 0.00937444633907742, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008496529717464153, |
| "l1_avg": 0.0006912794196978211, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.017933322180754746, |
| "l1_avg": 0.012208890914916993, |
| "l0_avg": 0.999998050265842 |
| }, |
| "merged": { |
| "l2_avg": 0.017930101246515347, |
| "l1_avg": 0.01220902336968316, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.392641340941786e-05, |
| "l1_avg": 4.054985216094388e-05, |
| "l0_avg": 0.9999890645345052 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796351, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9543, |
| 29086, |
| 7521, |
| 0, |
| 0, |
| 0, |
| 0, |
| 39, |
| 9436, |
| 29006, |
| 7480, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 889, |
| 64745, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 932, |
| 64506, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6633, |
| 1570896, |
| 2750457, |
| 1524848, |
| 50170, |
| 0, |
| 0, |
| 0, |
| 6518, |
| 1569355, |
| 2744119, |
| 1523933, |
| 49551, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6422, |
| 1564925, |
| 2748761, |
| 1532015, |
| 50784, |
| 0, |
| 0, |
| 0, |
| 6632, |
| 1563131, |
| 2742583, |
| 1531140, |
| 50087, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.33.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010857940243350067, |
| "l1_avg": 0.009389176633622912, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008081376436166465, |
| "l1_avg": 0.0006537153967656195, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0944943827674162, |
| "l1_avg": 0.07103213204277886, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "merged": { |
| "l2_avg": 0.09448796166280883, |
| "l1_avg": 0.07103219562106662, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.4687149165433816e-05, |
| "l1_avg": 4.057571188443237e-05, |
| "l0_avg": 0.999945068359375 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474479, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 49, |
| 9483, |
| 28982, |
| 7460, |
| 0, |
| 0, |
| 0, |
| 0, |
| 38, |
| 9630, |
| 28962, |
| 7556, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 121, |
| 7945, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 130, |
| 8188, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 112, |
| 29641, |
| 87210, |
| 290360, |
| 329566, |
| 5, |
| 0, |
| 0, |
| 95, |
| 29534, |
| 87033, |
| 291254, |
| 329728, |
| 22, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 108, |
| 29470, |
| 86951, |
| 289733, |
| 330602, |
| 5, |
| 0, |
| 0, |
| 132, |
| 29406, |
| 86745, |
| 290632, |
| 330754, |
| 22, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.31.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001988889963153748, |
| "l1_avg": 0.001698748270670573, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008083834762870652, |
| "l1_avg": 0.000652537163760927, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 6.107189811720714, |
| "l1_avg": 5.747880015432099, |
| "l0_avg": 0.8742351098708164 |
| }, |
| "merged": { |
| "l2_avg": 6.106912159521883, |
| "l1_avg": 5.748857060185185, |
| "l0_avg": 0.8611710508370105 |
| }, |
| "diff": { |
| "l2_avg": 0.3965698503465099, |
| "l1_avg": 0.08114018192997685, |
| "l0_avg": 0.0458572189896195 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 48685839, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 66752977, |
| 91544100, |
| 108352722, |
| 60762717, |
| 85724465, |
| 60946830, |
| 42024902, |
| 14836291, |
| 66769494, |
| 91517011, |
| 108301957, |
| 60720884, |
| 85681759, |
| 60919436, |
| 42005957, |
| 14821698 |
| ], |
| "fp4_dist_after": [ |
| 73707183, |
| 123576535, |
| 107798979, |
| 71172403, |
| 73630863, |
| 51129682, |
| 26110427, |
| 3837602, |
| 73685180, |
| 123552477, |
| 107724696, |
| 71131937, |
| 73595091, |
| 51098361, |
| 26096113, |
| 3835671 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.31.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010760634829699979, |
| "l1_avg": 0.009312187963061862, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008602648588155741, |
| "l1_avg": 0.0007066914604769813, |
| "l0_avg": 0.9999986436631945 |
| }, |
| "original": { |
| "l2_avg": 0.02053447167078654, |
| "l1_avg": 0.020060064998673802, |
| "l0_avg": 0.8808726722811475 |
| }, |
| "merged": { |
| "l2_avg": 0.020536143249935575, |
| "l1_avg": 0.02006005557966821, |
| "l0_avg": 0.8666638400230878 |
| }, |
| "diff": { |
| "l2_avg": 0.001436445116996765, |
| "l1_avg": 0.00029977680724344133, |
| "l0_avg": 0.05084360240418234 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 107959597, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 126442604, |
| 168866093, |
| 211736713, |
| 118568995, |
| 177982903, |
| 131300967, |
| 93489895, |
| 32384593, |
| 126508361, |
| 169009586, |
| 212013514, |
| 118786590, |
| 178315263, |
| 131616305, |
| 93819065, |
| 32524953 |
| ], |
| "fp4_dist_after": [ |
| 141553325, |
| 240901124, |
| 214046776, |
| 145338239, |
| 151717799, |
| 105730505, |
| 54123866, |
| 7394720, |
| 141568197, |
| 241130267, |
| 214356591, |
| 145629133, |
| 152068277, |
| 106031092, |
| 54337934, |
| 7438555 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.32.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001985016512064122, |
| "l1_avg": 0.0016967341303825378, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008232200094392191, |
| "l1_avg": 0.0006650563329458237, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 6.520237019322389, |
| "l1_avg": 6.109363908179012, |
| "l0_avg": 0.869192593421465 |
| }, |
| "merged": { |
| "l2_avg": 6.520384237327641, |
| "l1_avg": 6.110518904320988, |
| "l0_avg": 0.855664325290256 |
| }, |
| "diff": { |
| "l2_avg": 0.45216955330678943, |
| "l1_avg": 0.09141820083429784, |
| "l0_avg": 0.04696241119761526 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 49859203, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 69439729, |
| 93423060, |
| 108589388, |
| 60071334, |
| 84549596, |
| 59663299, |
| 41028451, |
| 14175227, |
| 69436297, |
| 93381853, |
| 108546750, |
| 60051339, |
| 84513693, |
| 59635406, |
| 41007576, |
| 14170202 |
| ], |
| "fp4_dist_after": [ |
| 76616952, |
| 124858590, |
| 107557197, |
| 69299336, |
| 72177396, |
| 50181890, |
| 26302241, |
| 3941843, |
| 76621809, |
| 124797745, |
| 107520394, |
| 69264959, |
| 72146156, |
| 50162280, |
| 26294075, |
| 3940337 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.32.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010762447438209878, |
| "l1_avg": 0.009318623277876113, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008541959882366316, |
| "l1_avg": 0.0007010212375058068, |
| "l0_avg": 0.9999959309895833 |
| }, |
| "original": { |
| "l2_avg": 0.020701531569163004, |
| "l1_avg": 0.02010703946337288, |
| "l0_avg": 0.8802869203355578 |
| }, |
| "merged": { |
| "l2_avg": 0.020705025725894503, |
| "l1_avg": 0.02010702062536169, |
| "l0_avg": 0.8658811503280828 |
| }, |
| "diff": { |
| "l2_avg": 0.0015063641799820794, |
| "l1_avg": 0.0003089064727594823, |
| "l0_avg": 0.05147551359953704 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 109301376, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 127066655, |
| 169323949, |
| 211389934, |
| 118621541, |
| 177790285, |
| 131415222, |
| 93242208, |
| 31952570, |
| 127128076, |
| 169466605, |
| 211640608, |
| 118819360, |
| 178129384, |
| 131701144, |
| 93554391, |
| 32124468 |
| ], |
| "fp4_dist_after": [ |
| 142396726, |
| 240910618, |
| 213752245, |
| 144679284, |
| 151293116, |
| 105807299, |
| 54544522, |
| 7456544, |
| 142386733, |
| 241134122, |
| 214075416, |
| 144930410, |
| 151599576, |
| 106116310, |
| 54771104, |
| 7512375 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.33.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010783754473537011, |
| "l1_avg": 0.009326023525661892, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008408062848255397, |
| "l1_avg": 0.0006893027987745073, |
| "l0_avg": 0.9998806423611111 |
| }, |
| "original": { |
| "l2_avg": 0.020483662022484674, |
| "l1_avg": 0.02007679503641011, |
| "l0_avg": 0.87761903974745 |
| }, |
| "merged": { |
| "l2_avg": 0.020484843518998888, |
| "l1_avg": 0.02007682517722801, |
| "l0_avg": 0.8632409922281901 |
| }, |
| "diff": { |
| "l2_avg": 0.0014924380514356825, |
| "l1_avg": 0.0003099478909998764, |
| "l0_avg": 0.05104699546908155 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 108391475, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 129911772, |
| 172176458, |
| 212438917, |
| 118519756, |
| 176102150, |
| 129143070, |
| 91476262, |
| 31629531, |
| 129947847, |
| 172230110, |
| 212540560, |
| 118571123, |
| 176204231, |
| 129190028, |
| 91574810, |
| 31709775 |
| ], |
| "fp4_dist_after": [ |
| 145198574, |
| 243231775, |
| 213901310, |
| 143849753, |
| 149741751, |
| 104414074, |
| 53705464, |
| 7380269, |
| 145190908, |
| 243314017, |
| 214036389, |
| 143896345, |
| 149809459, |
| 104485423, |
| 53792680, |
| 7418209 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.34.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010779065737867858, |
| "l1_avg": 0.009320512082841662, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.000791400671005249, |
| "l1_avg": 0.0006349437753669918, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.0029396325020547955, |
| "l1_avg": 0.001895280016793145, |
| "l0_avg": 0.9999911838107639 |
| }, |
| "merged": { |
| "l2_avg": 0.002939929760872541, |
| "l1_avg": 0.001895800232887268, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.9538901686269635e-05, |
| "l1_avg": 3.8713418568174045e-05, |
| "l0_avg": 1.0 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474560, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 37, |
| 9709, |
| 28922, |
| 7304, |
| 0, |
| 0, |
| 0, |
| 0, |
| 40, |
| 9654, |
| 29059, |
| 7435, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 112, |
| 8143, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 128, |
| 8001, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 5220, |
| 652733, |
| 77455, |
| 2468, |
| 15, |
| 0, |
| 0, |
| 0, |
| 5093, |
| 652441, |
| 76687, |
| 2437, |
| 11, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4990, |
| 652190, |
| 78218, |
| 2491, |
| 15, |
| 0, |
| 0, |
| 0, |
| 5142, |
| 651561, |
| 77477, |
| 2465, |
| 11, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.34.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009045210232996877, |
| "l1_avg": 0.007822664454579353, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008421524902970543, |
| "l1_avg": 0.0006830255190531413, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.9319992245238286, |
| "l1_avg": 0.6789000193277995, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.9319920451724756, |
| "l1_avg": 0.6788999769422743, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.380351255657474e-05, |
| "l1_avg": 3.443386457446549e-05, |
| "l0_avg": 0.9994411892361111 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11789888, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 61, |
| 16216, |
| 47416, |
| 1292, |
| 0, |
| 0, |
| 0, |
| 0, |
| 63, |
| 16453, |
| 48185, |
| 1386, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 628, |
| 45399, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 613, |
| 45520, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 97, |
| 26041, |
| 77854, |
| 310821, |
| 4154069, |
| 1089736, |
| 239401, |
| 0, |
| 100, |
| 25910, |
| 78312, |
| 311020, |
| 4154952, |
| 1088366, |
| 239801, |
| 0 |
| ], |
| "merged": [ |
| 92, |
| 25951, |
| 77593, |
| 309803, |
| 4148342, |
| 1094155, |
| 242085, |
| 0, |
| 104, |
| 25819, |
| 78047, |
| 310036, |
| 4149080, |
| 1092785, |
| 242588, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.34.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010838229274241003, |
| "l1_avg": 0.009379080931345622, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008230885210653449, |
| "l1_avg": 0.0006665411056019366, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.019128157724869397, |
| "l1_avg": 0.013301542070176866, |
| "l0_avg": 0.999998050265842 |
| }, |
| "merged": { |
| "l2_avg": 0.01912476796924917, |
| "l1_avg": 0.013301674524943035, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2201569283583514e-05, |
| "l1_avg": 4.025311985363563e-05, |
| "l0_avg": 0.9999910990397135 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796375, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 33, |
| 9573, |
| 28862, |
| 7645, |
| 0, |
| 0, |
| 0, |
| 0, |
| 45, |
| 9528, |
| 28912, |
| 7562, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 941, |
| 64314, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 886, |
| 64931, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 6055, |
| 1456198, |
| 2641082, |
| 1735813, |
| 59526, |
| 0, |
| 0, |
| 0, |
| 6254, |
| 1455849, |
| 2641737, |
| 1734397, |
| 59569, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 6063, |
| 1450278, |
| 2638923, |
| 1743033, |
| 60248, |
| 0, |
| 0, |
| 0, |
| 6197, |
| 1450287, |
| 2639339, |
| 1741756, |
| 60356, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.34.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01078094353399264, |
| "l1_avg": 0.009319373634126452, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008200621814467013, |
| "l1_avg": 0.0006626153481192887, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.1032949652821427, |
| "l1_avg": 0.0805940310160319, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.10328790960653207, |
| "l1_avg": 0.08059409459431967, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.23589796712872e-05, |
| "l1_avg": 3.993473429646757e-05, |
| "l0_avg": 0.9999471028645833 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474482, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 23, |
| 9627, |
| 29115, |
| 7475, |
| 0, |
| 0, |
| 0, |
| 0, |
| 30, |
| 9620, |
| 28926, |
| 7344, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 113, |
| 8016, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 116, |
| 8139, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 90, |
| 24414, |
| 71447, |
| 257596, |
| 383360, |
| 1, |
| 0, |
| 0, |
| 114, |
| 23950, |
| 71299, |
| 258395, |
| 383892, |
| 2, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 91, |
| 24350, |
| 71223, |
| 256920, |
| 384344, |
| 1, |
| 0, |
| 0, |
| 78, |
| 23841, |
| 71104, |
| 257720, |
| 384886, |
| 2, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.35.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01077102129382514, |
| "l1_avg": 0.009320253796047635, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007494922610931098, |
| "l1_avg": 0.0005988676566630602, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.011011218198965937, |
| "l1_avg": 0.005670623646842109, |
| "l0_avg": 0.9999905056423611 |
| }, |
| "merged": { |
| "l2_avg": 0.011010761119357143, |
| "l1_avg": 0.00567108326488071, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.681499964181027e-05, |
| "l1_avg": 3.547788494163089e-05, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474549, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 38, |
| 9570, |
| 29051, |
| 7272, |
| 0, |
| 0, |
| 0, |
| 0, |
| 37, |
| 9579, |
| 29142, |
| 7471, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 153, |
| 8047, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 141, |
| 8043, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 4414, |
| 424798, |
| 256519, |
| 47225, |
| 3408, |
| 0, |
| 0, |
| 0, |
| 4568, |
| 425047, |
| 257700, |
| 47471, |
| 3410, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 4289, |
| 424146, |
| 257213, |
| 47547, |
| 3435, |
| 0, |
| 0, |
| 0, |
| 4215, |
| 424118, |
| 258353, |
| 47814, |
| 3430, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.35.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009037544116379451, |
| "l1_avg": 0.007818716578185558, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008217659151167459, |
| "l1_avg": 0.0006655888424979316, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.8542163548990249, |
| "l1_avg": 0.5608454386393229, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.8542135826742451, |
| "l1_avg": 0.5608454386393229, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.272079880929232e-05, |
| "l1_avg": 3.320748089916176e-05, |
| "l0_avg": 0.9995221455891927 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11790843, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 69, |
| 16427, |
| 47795, |
| 1172, |
| 0, |
| 0, |
| 0, |
| 0, |
| 56, |
| 16321, |
| 48124, |
| 1108, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 632, |
| 45434, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 667, |
| 45427, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 171, |
| 36902, |
| 110377, |
| 436929, |
| 4394901, |
| 718165, |
| 203066, |
| 0, |
| 161, |
| 36828, |
| 109973, |
| 436381, |
| 4393465, |
| 716440, |
| 202721, |
| 0 |
| ], |
| "merged": [ |
| 144, |
| 36739, |
| 110082, |
| 435569, |
| 4391910, |
| 721211, |
| 204849, |
| 0, |
| 149, |
| 36707, |
| 109615, |
| 435027, |
| 4390381, |
| 719487, |
| 204610, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.35.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010812475744118106, |
| "l1_avg": 0.009351201852162679, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007864029821591516, |
| "l1_avg": 0.0006312452023848891, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.016963383371904946, |
| "l1_avg": 0.011783044868045383, |
| "l0_avg": 0.9999963548448351 |
| }, |
| "merged": { |
| "l2_avg": 0.01696012578565523, |
| "l1_avg": 0.011783211761050754, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.0845336438525136e-05, |
| "l1_avg": 3.782585780653689e-05, |
| "l0_avg": 0.9999903361002604 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796366, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 52, |
| 9599, |
| 28917, |
| 7488, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9639, |
| 28892, |
| 7537, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 1009, |
| 64506, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 1058, |
| 64499, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 10527, |
| 1679653, |
| 2651466, |
| 1520432, |
| 34936, |
| 0, |
| 0, |
| 0, |
| 10512, |
| 1679571, |
| 2652309, |
| 1521591, |
| 35483, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 8947, |
| 1675416, |
| 2649758, |
| 1527513, |
| 35440, |
| 0, |
| 0, |
| 0, |
| 9089, |
| 1674916, |
| 2650772, |
| 1528695, |
| 35934, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.35.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010808365954439736, |
| "l1_avg": 0.009352295928531222, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007836657459847629, |
| "l1_avg": 0.0006292859907262027, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.07677665772556017, |
| "l1_avg": 0.056597137451171876, |
| "l0_avg": 1.0 |
| }, |
| "merged": { |
| "l2_avg": 0.07677169424939596, |
| "l1_avg": 0.056597137451171876, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.2236665963151466e-05, |
| "l1_avg": 3.7552669851316344e-05, |
| "l0_avg": 0.9999484592013889 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474484, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 31, |
| 9563, |
| 29233, |
| 7467, |
| 0, |
| 0, |
| 0, |
| 0, |
| 36, |
| 9659, |
| 28864, |
| 7307, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 147, |
| 8040, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 148, |
| 8049, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 136, |
| 37843, |
| 111327, |
| 338331, |
| 249390, |
| 6, |
| 0, |
| 0, |
| 135, |
| 37619, |
| 111036, |
| 338426, |
| 250308, |
| 3, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 136, |
| 37698, |
| 110918, |
| 337941, |
| 250333, |
| 6, |
| 0, |
| 0, |
| 143, |
| 37451, |
| 110747, |
| 337887, |
| 251297, |
| 3, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.self_attn.k_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.01076591959427408, |
| "l1_avg": 0.009306605656941731, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008587538613937795, |
| "l1_avg": 0.0006952178664505482, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.01585797178121923, |
| "l1_avg": 0.011376127931806776, |
| "l0_avg": 0.9999972873263889 |
| }, |
| "merged": { |
| "l2_avg": 0.015857173855304225, |
| "l1_avg": 0.011376256412929959, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.275526749577031e-05, |
| "l1_avg": 4.112234390858147e-05, |
| "l0_avg": 0.9999925401475694 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474549, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9781, |
| 28858, |
| 7389, |
| 0, |
| 0, |
| 0, |
| 0, |
| 26, |
| 9739, |
| 28844, |
| 7488, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 107, |
| 8116, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 121, |
| 8040, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 819, |
| 192984, |
| 356122, |
| 183557, |
| 2433, |
| 0, |
| 0, |
| 0, |
| 749, |
| 194201, |
| 356916, |
| 184419, |
| 2359, |
| 1, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 813, |
| 192195, |
| 355941, |
| 184535, |
| 2472, |
| 0, |
| 0, |
| 0, |
| 816, |
| 193294, |
| 356703, |
| 185399, |
| 2391, |
| 1, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.self_attn.o_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.009024229663517055, |
| "l1_avg": 0.007800421677529812, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008284640555179307, |
| "l1_avg": 0.0006717250992854437, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.035922444270539686, |
| "l1_avg": 0.022439919577704536, |
| "l0_avg": 0.9999992370605468 |
| }, |
| "merged": { |
| "l2_avg": 0.03591821929335118, |
| "l1_avg": 0.02243996196322971, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 4.276477861853911e-05, |
| "l1_avg": 3.359156350294749e-05, |
| "l0_avg": 0.9999812655978733 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796259, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 62, |
| 16432, |
| 47829, |
| 1096, |
| 0, |
| 0, |
| 0, |
| 0, |
| 62, |
| 16517, |
| 48022, |
| 1052, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 651, |
| 45374, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 641, |
| 45494, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 3443, |
| 874311, |
| 2157680, |
| 2536737, |
| 326888, |
| 82, |
| 2, |
| 0, |
| 3430, |
| 872607, |
| 2158124, |
| 2536498, |
| 326585, |
| 92, |
| 1, |
| 0 |
| ], |
| "merged": [ |
| 3396, |
| 870574, |
| 2153544, |
| 2542219, |
| 329359, |
| 86, |
| 2, |
| 0, |
| 3445, |
| 868750, |
| 2154064, |
| 2541833, |
| 329114, |
| 93, |
| 1, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.self_attn.q_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010772516619555623, |
| "l1_avg": 0.009311723046832615, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008821631738358712, |
| "l1_avg": 0.000717909773811698, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.015796447782305392, |
| "l1_avg": 0.011326229572296143, |
| "l0_avg": 0.999997795952691 |
| }, |
| "merged": { |
| "l2_avg": 0.01579325350407349, |
| "l1_avg": 0.011326328913370768, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.46793762045643e-05, |
| "l1_avg": 4.188904487010505e-05, |
| "l0_avg": 0.9999920315212674 |
| }, |
| "num_elements": 11796480, |
| "num_changed": 11796386, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 35, |
| 9779, |
| 28805, |
| 7421, |
| 0, |
| 0, |
| 0, |
| 0, |
| 34, |
| 9610, |
| 28933, |
| 7543, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 836, |
| 64694, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 863, |
| 64679, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 7357, |
| 1643505, |
| 2764641, |
| 1459698, |
| 21786, |
| 0, |
| 0, |
| 0, |
| 7366, |
| 1643690, |
| 2764209, |
| 1462486, |
| 21742, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 7361, |
| 1637115, |
| 2763257, |
| 1467135, |
| 22136, |
| 0, |
| 0, |
| 0, |
| 7393, |
| 1637280, |
| 2762663, |
| 1470055, |
| 22085, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.4.self_attn.v_proj.weight": { |
| "lora_A": { |
| "l2_avg": 0.010777247629114672, |
| "l1_avg": 0.009313287999894884, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008274975698441267, |
| "l1_avg": 0.0006694136536680162, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 0.03744736687390824, |
| "l1_avg": 0.028611909018622504, |
| "l0_avg": 0.9999993218315972 |
| }, |
| "merged": { |
| "l2_avg": 0.03744461183049304, |
| "l1_avg": 0.02861188252766927, |
| "l0_avg": 1.0 |
| }, |
| "diff": { |
| "l2_avg": 5.13245069597049e-05, |
| "l1_avg": 3.997667485641109e-05, |
| "l0_avg": 0.9999776204427083 |
| }, |
| "num_elements": 1474560, |
| "num_changed": 1474527, |
| "precision": "bf16", |
| "fp4_dist_before": null, |
| "fp4_dist_after": null, |
| "bf16_dists": { |
| "lora_A": [ |
| 44, |
| 9786, |
| 28692, |
| 7595, |
| 0, |
| 0, |
| 0, |
| 0, |
| 29, |
| 9716, |
| 28989, |
| 7309, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "lora_B": [ |
| 124, |
| 8070, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 111, |
| 8079, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0 |
| ], |
| "original": [ |
| 293, |
| 73234, |
| 198463, |
| 396065, |
| 69494, |
| 0, |
| 0, |
| 0, |
| 289, |
| 73611, |
| 198850, |
| 394612, |
| 69649, |
| 0, |
| 0, |
| 0 |
| ], |
| "merged": [ |
| 308, |
| 72892, |
| 197923, |
| 396277, |
| 70169, |
| 0, |
| 0, |
| 0, |
| 281, |
| 73298, |
| 198267, |
| 394792, |
| 70353, |
| 0, |
| 0, |
| 0 |
| ] |
| } |
| }, |
| "model.layers.33.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.001967066051181213, |
| "l1_avg": 0.0016857585973209805, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008013494962506399, |
| "l1_avg": 0.0006469392942057715, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 6.791428409733145, |
| "l1_avg": 6.038957851080247, |
| "l0_avg": 0.8664285184130257 |
| }, |
| "merged": { |
| "l2_avg": 6.7914375209451965, |
| "l1_avg": 6.040175540123457, |
| "l0_avg": 0.8531173734311704 |
| }, |
| "diff": { |
| "l2_avg": 0.46055840209221327, |
| "l1_avg": 0.08964060088734568, |
| "l0_avg": 0.045917565616560574 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 48749908, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 70914420, |
| 94840699, |
| 109395747, |
| 59592082, |
| 83304942, |
| 58256644, |
| 40269888, |
| 14303634, |
| 70896178, |
| 94820455, |
| 109373773, |
| 59586321, |
| 83291014, |
| 58245810, |
| 40278115, |
| 14313478 |
| ], |
| "fp4_dist_after": [ |
| 77968047, |
| 126400688, |
| 107803847, |
| 68943435, |
| 71161555, |
| 49141838, |
| 25541364, |
| 3903093, |
| 77974770, |
| 126377503, |
| 107789166, |
| 68929267, |
| 71146897, |
| 49140531, |
| 25552538, |
| 3908661 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.34.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0019787727588077156, |
| "l1_avg": 0.0016925851504007974, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007910636343325022, |
| "l1_avg": 0.000638117930955357, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 5.3758625516766285, |
| "l1_avg": 4.474571759259259, |
| "l0_avg": 0.8645557733229648 |
| }, |
| "merged": { |
| "l2_avg": 5.374715977570578, |
| "l1_avg": 4.475190972222222, |
| "l0_avg": 0.8514242431263864 |
| }, |
| "diff": { |
| "l2_avg": 0.32662715149629473, |
| "l1_avg": 0.06130116403838735, |
| "l0_avg": 0.045334484900957275 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 48130861, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 71899592, |
| 93191745, |
| 111314334, |
| 57043169, |
| 82840027, |
| 56791184, |
| 41665238, |
| 16071438, |
| 71899268, |
| 93169283, |
| 111336125, |
| 57033556, |
| 82856459, |
| 56811640, |
| 41685453, |
| 16074689 |
| ], |
| "fp4_dist_after": [ |
| 78868030, |
| 128555392, |
| 109297295, |
| 69335377, |
| 70411928, |
| 46620860, |
| 23847936, |
| 3877023, |
| 78872355, |
| 128537594, |
| 109319578, |
| 69339363, |
| 70432847, |
| 46635438, |
| 23857101, |
| 3875083 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.34.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010761618880643465, |
| "l1_avg": 0.009315382109747992, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0008455924659353061, |
| "l1_avg": 0.000693919344080819, |
| "l0_avg": 0.9913045247395833 |
| }, |
| "original": { |
| "l2_avg": 0.018961252106560602, |
| "l1_avg": 0.018871778323326583, |
| "l0_avg": 0.877765907946928 |
| }, |
| "merged": { |
| "l2_avg": 0.018962615066104464, |
| "l1_avg": 0.01887175195011092, |
| "l0_avg": 0.8635812236644603 |
| }, |
| "diff": { |
| "l2_avg": 0.0013919431302282545, |
| "l1_avg": 0.0002883671831201624, |
| "l0_avg": 0.0501850688604661 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 106561289, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 129762541, |
| 175417112, |
| 213550750, |
| 120440431, |
| 175500393, |
| 127505889, |
| 88812664, |
| 30547128, |
| 129785223, |
| 175427284, |
| 213593428, |
| 120463546, |
| 175525892, |
| 127527179, |
| 88888730, |
| 30618210 |
| ], |
| "fp4_dist_after": [ |
| 146081098, |
| 244003607, |
| 214503345, |
| 143687476, |
| 149379404, |
| 104237048, |
| 53487222, |
| 7415725, |
| 143585948, |
| 244024792, |
| 214555262, |
| 143709633, |
| 149394795, |
| 104294099, |
| 53561385, |
| 7445561 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.35.mlp.experts.down_proj": { |
| "lora_A": { |
| "l2_avg": 0.0019737768399023326, |
| "l1_avg": 0.0016896353827582466, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007663603270736226, |
| "l1_avg": 0.0006171308043930266, |
| "l0_avg": 1.0 |
| }, |
| "original": { |
| "l2_avg": 3.2189761123367853, |
| "l1_avg": 2.7294820601851852, |
| "l0_avg": 0.8542120568546248 |
| }, |
| "merged": { |
| "l2_avg": 3.2192573610666884, |
| "l1_avg": 2.7297779224537035, |
| "l0_avg": 0.8390532119185836 |
| }, |
| "diff": { |
| "l2_avg": 0.2214708318965717, |
| "l1_avg": 0.04508605580271026, |
| "l0_avg": 0.05089314119315442 |
| }, |
| "num_elements": 1061683200, |
| "num_changed": 54032393, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 77400038, |
| 97503793, |
| 111007345, |
| 56366728, |
| 79830722, |
| 54997849, |
| 39338294, |
| 14484184, |
| 77380572, |
| 97485295, |
| 110974809, |
| 56345864, |
| 79794568, |
| 54980040, |
| 39319123, |
| 14473976 |
| ], |
| "fp4_dist_after": [ |
| 85435939, |
| 132119234, |
| 107616878, |
| 65617820, |
| 66343045, |
| 45399420, |
| 24438148, |
| 3948031, |
| 85438562, |
| 132094578, |
| 107581941, |
| 65593335, |
| 66302961, |
| 45378178, |
| 24429375, |
| 3945755 |
| ], |
| "bf16_dists": null |
| }, |
| "model.layers.35.mlp.experts.gate_up_proj": { |
| "lora_A": { |
| "l2_avg": 0.010757444994135992, |
| "l1_avg": 0.009298588832219441, |
| "l0_avg": 1.0 |
| }, |
| "lora_B": { |
| "l2_avg": 0.0007726734446600128, |
| "l1_avg": 0.0006101116951968935, |
| "l0_avg": 0.9690185546875 |
| }, |
| "original": { |
| "l2_avg": 0.01955436070760091, |
| "l1_avg": 0.01941656795548804, |
| "l0_avg": 0.8758031506008572 |
| }, |
| "merged": { |
| "l2_avg": 0.01955662038591173, |
| "l1_avg": 0.019416575490692516, |
| "l0_avg": 0.8620844570207007 |
| }, |
| "diff": { |
| "l2_avg": 0.0014051520162158542, |
| "l1_avg": 0.000283958853026967, |
| "l0_avg": 0.048404886693130304 |
| }, |
| "num_elements": 2123366400, |
| "num_changed": 102781310, |
| "precision": "mxfp4", |
| "fp4_dist_before": [ |
| 131852904, |
| 176939904, |
| 215330074, |
| 119190590, |
| 173928706, |
| 125058342, |
| 87904997, |
| 30892485, |
| 131862513, |
| 176987358, |
| 215460566, |
| 119326667, |
| 174151607, |
| 125259439, |
| 88176332, |
| 31043916 |
| ], |
| "fp4_dist_after": [ |
| 150809623, |
| 244712190, |
| 215643038, |
| 142454528, |
| 148431570, |
| 102439976, |
| 53001559, |
| 7999802, |
| 142035607, |
| 244812224, |
| 215801701, |
| 142623443, |
| 148664136, |
| 102680816, |
| 53201327, |
| 8054860 |
| ], |
| "bf16_dists": null |
| } |
| } |