Lance-3B-AWQ-INT4 / quantization_report.json
xocialize's picture
Initial AWQ-INT4 conversion — VQA-scoped variant of bytedance-research/Lance
53866ef verified
{
"source_dir": "/Users/dustinnielson/DEV_INT/lance-mlx-research/lance-mlx-models/Lance-3B-bf16",
"stats_dir": "/Users/dustinnielson/DEV_INT/lance-mlx-research/lance-mlx/notes/phase5n_diagnostics/phase5c3_awq_port/act_stats",
"bits": 4,
"group_size": 128,
"mode": "affine+awq",
"n_grid": 20,
"bf16_bytes": 12371046496,
"quantized_bytes": 3306811488,
"compression_ratio": 0.2673024864201432,
"fusion_groups_applied": 144,
"fusion_groups_skipped": 0,
"skip_patterns_always": [
"time_embedder.proj_in",
"time_embedder.proj_out",
"llm2vae"
],
"awq_per_group": {
"layers.0.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.00458593072835356,
"consumers": [
"layers.0.self_attn.q_proj",
"layers.0.self_attn.k_proj",
"layers.0.self_attn.v_proj"
]
},
"layers.0.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.0021403880091384053,
"consumers": [
"layers.0.self_attn.q_proj_moe_gen",
"layers.0.self_attn.k_proj_moe_gen",
"layers.0.self_attn.v_proj_moe_gen"
]
},
"layers.0.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.007643559714779258,
"consumers": [
"layers.0.mlp.gate_proj",
"layers.0.mlp.up_proj"
]
},
"layers.0.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.003576420247554779,
"consumers": [
"layers.0.mlp_moe_gen.gate_proj",
"layers.0.mlp_moe_gen.up_proj"
]
},
"layers.1.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.07270884048193693,
"consumers": [
"layers.1.self_attn.q_proj",
"layers.1.self_attn.k_proj",
"layers.1.self_attn.v_proj"
]
},
"layers.1.input_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.05615841131657362,
"consumers": [
"layers.1.self_attn.q_proj_moe_gen",
"layers.1.self_attn.k_proj_moe_gen",
"layers.1.self_attn.v_proj_moe_gen"
]
},
"layers.1.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.01055069756694138,
"consumers": [
"layers.1.mlp.gate_proj",
"layers.1.mlp.up_proj"
]
},
"layers.1.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.0033836811780929565,
"consumers": [
"layers.1.mlp_moe_gen.gate_proj",
"layers.1.mlp_moe_gen.up_proj"
]
},
"layers.2.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.03681113151833415,
"consumers": [
"layers.2.self_attn.q_proj",
"layers.2.self_attn.k_proj",
"layers.2.self_attn.v_proj"
]
},
"layers.2.input_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.029231476597487926,
"consumers": [
"layers.2.self_attn.q_proj_moe_gen",
"layers.2.self_attn.k_proj_moe_gen",
"layers.2.self_attn.v_proj_moe_gen"
]
},
"layers.2.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.009307235479354858,
"consumers": [
"layers.2.mlp.gate_proj",
"layers.2.mlp.up_proj"
]
},
"layers.2.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.0031711964402347803,
"consumers": [
"layers.2.mlp_moe_gen.gate_proj",
"layers.2.mlp_moe_gen.up_proj"
]
},
"layers.3.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.022754479199647903,
"consumers": [
"layers.3.self_attn.q_proj",
"layers.3.self_attn.k_proj",
"layers.3.self_attn.v_proj"
]
},
"layers.3.input_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.020830828696489334,
"consumers": [
"layers.3.self_attn.q_proj_moe_gen",
"layers.3.self_attn.k_proj_moe_gen",
"layers.3.self_attn.v_proj_moe_gen"
]
},
"layers.3.post_attention_layernorm": {
"best_alpha": 0.5,
"best_err": 0.016386197414249182,
"consumers": [
"layers.3.mlp.gate_proj",
"layers.3.mlp.up_proj"
]
},
"layers.3.post_attention_layernorm_moe_gen": {
"best_alpha": 0.5,
"best_err": 0.005372598301619291,
"consumers": [
"layers.3.mlp_moe_gen.gate_proj",
"layers.3.mlp_moe_gen.up_proj"
]
},
"layers.4.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.01877523958683014,
"consumers": [
"layers.4.self_attn.q_proj",
"layers.4.self_attn.k_proj",
"layers.4.self_attn.v_proj"
]
},
"layers.4.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.015583840198814869,
"consumers": [
"layers.4.self_attn.q_proj_moe_gen",
"layers.4.self_attn.k_proj_moe_gen",
"layers.4.self_attn.v_proj_moe_gen"
]
},
"layers.4.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.009718624409288168,
"consumers": [
"layers.4.mlp.gate_proj",
"layers.4.mlp.up_proj"
]
},
"layers.4.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.004090428119525313,
"consumers": [
"layers.4.mlp_moe_gen.gate_proj",
"layers.4.mlp_moe_gen.up_proj"
]
},
"layers.5.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.011677489150315523,
"consumers": [
"layers.5.self_attn.q_proj",
"layers.5.self_attn.k_proj",
"layers.5.self_attn.v_proj"
]
},
"layers.5.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.011435149004682899,
"consumers": [
"layers.5.self_attn.q_proj_moe_gen",
"layers.5.self_attn.k_proj_moe_gen",
"layers.5.self_attn.v_proj_moe_gen"
]
},
"layers.5.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.004989819368347526,
"consumers": [
"layers.5.mlp.gate_proj",
"layers.5.mlp.up_proj"
]
},
"layers.5.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.0024960024747997522,
"consumers": [
"layers.5.mlp_moe_gen.gate_proj",
"layers.5.mlp_moe_gen.up_proj"
]
},
"layers.6.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.06253223866224289,
"consumers": [
"layers.6.self_attn.q_proj",
"layers.6.self_attn.k_proj",
"layers.6.self_attn.v_proj"
]
},
"layers.6.input_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.03238382376730442,
"consumers": [
"layers.6.self_attn.q_proj_moe_gen",
"layers.6.self_attn.k_proj_moe_gen",
"layers.6.self_attn.v_proj_moe_gen"
]
},
"layers.6.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.005734460661187768,
"consumers": [
"layers.6.mlp.gate_proj",
"layers.6.mlp.up_proj"
]
},
"layers.6.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.0033549898071214557,
"consumers": [
"layers.6.mlp_moe_gen.gate_proj",
"layers.6.mlp_moe_gen.up_proj"
]
},
"layers.7.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.022551667410880327,
"consumers": [
"layers.7.self_attn.q_proj",
"layers.7.self_attn.k_proj",
"layers.7.self_attn.v_proj"
]
},
"layers.7.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.01729161897674203,
"consumers": [
"layers.7.self_attn.q_proj_moe_gen",
"layers.7.self_attn.k_proj_moe_gen",
"layers.7.self_attn.v_proj_moe_gen"
]
},
"layers.7.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.007926714839413762,
"consumers": [
"layers.7.mlp.gate_proj",
"layers.7.mlp.up_proj"
]
},
"layers.7.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.004999170079827309,
"consumers": [
"layers.7.mlp_moe_gen.gate_proj",
"layers.7.mlp_moe_gen.up_proj"
]
},
"layers.8.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.016717045567929745,
"consumers": [
"layers.8.self_attn.q_proj",
"layers.8.self_attn.k_proj",
"layers.8.self_attn.v_proj"
]
},
"layers.8.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.015823927707970142,
"consumers": [
"layers.8.self_attn.q_proj_moe_gen",
"layers.8.self_attn.k_proj_moe_gen",
"layers.8.self_attn.v_proj_moe_gen"
]
},
"layers.8.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.006246325559914112,
"consumers": [
"layers.8.mlp.gate_proj",
"layers.8.mlp.up_proj"
]
},
"layers.8.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.004818351706489921,
"consumers": [
"layers.8.mlp_moe_gen.gate_proj",
"layers.8.mlp_moe_gen.up_proj"
]
},
"layers.9.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.025030743330717087,
"consumers": [
"layers.9.self_attn.q_proj",
"layers.9.self_attn.k_proj",
"layers.9.self_attn.v_proj"
]
},
"layers.9.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.019082439597696066,
"consumers": [
"layers.9.self_attn.q_proj_moe_gen",
"layers.9.self_attn.k_proj_moe_gen",
"layers.9.self_attn.v_proj_moe_gen"
]
},
"layers.9.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.004797130124643445,
"consumers": [
"layers.9.mlp.gate_proj",
"layers.9.mlp.up_proj"
]
},
"layers.9.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.00391571584623307,
"consumers": [
"layers.9.mlp_moe_gen.gate_proj",
"layers.9.mlp_moe_gen.up_proj"
]
},
"layers.10.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.007787489332258701,
"consumers": [
"layers.10.self_attn.q_proj",
"layers.10.self_attn.k_proj",
"layers.10.self_attn.v_proj"
]
},
"layers.10.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.008221070980653167,
"consumers": [
"layers.10.self_attn.q_proj_moe_gen",
"layers.10.self_attn.k_proj_moe_gen",
"layers.10.self_attn.v_proj_moe_gen"
]
},
"layers.10.post_attention_layernorm": {
"best_alpha": 0.3,
"best_err": 0.003546421998180449,
"consumers": [
"layers.10.mlp.gate_proj",
"layers.10.mlp.up_proj"
]
},
"layers.10.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.0033734560711309314,
"consumers": [
"layers.10.mlp_moe_gen.gate_proj",
"layers.10.mlp_moe_gen.up_proj"
]
},
"layers.11.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.006951858755201101,
"consumers": [
"layers.11.self_attn.q_proj",
"layers.11.self_attn.k_proj",
"layers.11.self_attn.v_proj"
]
},
"layers.11.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.007689060410484672,
"consumers": [
"layers.11.self_attn.q_proj_moe_gen",
"layers.11.self_attn.k_proj_moe_gen",
"layers.11.self_attn.v_proj_moe_gen"
]
},
"layers.11.post_attention_layernorm": {
"best_alpha": 0.3,
"best_err": 0.003067293087951839,
"consumers": [
"layers.11.mlp.gate_proj",
"layers.11.mlp.up_proj"
]
},
"layers.11.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.003110864432528615,
"consumers": [
"layers.11.mlp_moe_gen.gate_proj",
"layers.11.mlp_moe_gen.up_proj"
]
},
"layers.12.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.02311427053064108,
"consumers": [
"layers.12.self_attn.q_proj",
"layers.12.self_attn.k_proj",
"layers.12.self_attn.v_proj"
]
},
"layers.12.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.02629701793193817,
"consumers": [
"layers.12.self_attn.q_proj_moe_gen",
"layers.12.self_attn.k_proj_moe_gen",
"layers.12.self_attn.v_proj_moe_gen"
]
},
"layers.12.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.0031268250895664096,
"consumers": [
"layers.12.mlp.gate_proj",
"layers.12.mlp.up_proj"
]
},
"layers.12.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.0030514480313286185,
"consumers": [
"layers.12.mlp_moe_gen.gate_proj",
"layers.12.mlp_moe_gen.up_proj"
]
},
"layers.13.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.007222589338198304,
"consumers": [
"layers.13.self_attn.q_proj",
"layers.13.self_attn.k_proj",
"layers.13.self_attn.v_proj"
]
},
"layers.13.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.0087540487293154,
"consumers": [
"layers.13.self_attn.q_proj_moe_gen",
"layers.13.self_attn.k_proj_moe_gen",
"layers.13.self_attn.v_proj_moe_gen"
]
},
"layers.13.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.003296053037047386,
"consumers": [
"layers.13.mlp.gate_proj",
"layers.13.mlp.up_proj"
]
},
"layers.13.post_attention_layernorm_moe_gen": {
"best_alpha": 0.3,
"best_err": 0.003499676357023418,
"consumers": [
"layers.13.mlp_moe_gen.gate_proj",
"layers.13.mlp_moe_gen.up_proj"
]
},
"layers.14.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.006669960916042328,
"consumers": [
"layers.14.self_attn.q_proj",
"layers.14.self_attn.k_proj",
"layers.14.self_attn.v_proj"
]
},
"layers.14.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.008936543483287096,
"consumers": [
"layers.14.self_attn.q_proj_moe_gen",
"layers.14.self_attn.k_proj_moe_gen",
"layers.14.self_attn.v_proj_moe_gen"
]
},
"layers.14.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.0037863863399252295,
"consumers": [
"layers.14.mlp.gate_proj",
"layers.14.mlp.up_proj"
]
},
"layers.14.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.004165650112554431,
"consumers": [
"layers.14.mlp_moe_gen.gate_proj",
"layers.14.mlp_moe_gen.up_proj"
]
},
"layers.15.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.00765526108443737,
"consumers": [
"layers.15.self_attn.q_proj",
"layers.15.self_attn.k_proj",
"layers.15.self_attn.v_proj"
]
},
"layers.15.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.010691209929063916,
"consumers": [
"layers.15.self_attn.q_proj_moe_gen",
"layers.15.self_attn.k_proj_moe_gen",
"layers.15.self_attn.v_proj_moe_gen"
]
},
"layers.15.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.003892258508130908,
"consumers": [
"layers.15.mlp.gate_proj",
"layers.15.mlp.up_proj"
]
},
"layers.15.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.004360296297818422,
"consumers": [
"layers.15.mlp_moe_gen.gate_proj",
"layers.15.mlp_moe_gen.up_proj"
]
},
"layers.16.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.009200290311127901,
"consumers": [
"layers.16.self_attn.q_proj",
"layers.16.self_attn.k_proj",
"layers.16.self_attn.v_proj"
]
},
"layers.16.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.013372566550970078,
"consumers": [
"layers.16.self_attn.q_proj_moe_gen",
"layers.16.self_attn.k_proj_moe_gen",
"layers.16.self_attn.v_proj_moe_gen"
]
},
"layers.16.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.003502526553347707,
"consumers": [
"layers.16.mlp.gate_proj",
"layers.16.mlp.up_proj"
]
},
"layers.16.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.003909384133294225,
"consumers": [
"layers.16.mlp_moe_gen.gate_proj",
"layers.16.mlp_moe_gen.up_proj"
]
},
"layers.17.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.01001800550147891,
"consumers": [
"layers.17.self_attn.q_proj",
"layers.17.self_attn.k_proj",
"layers.17.self_attn.v_proj"
]
},
"layers.17.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.014895168598741293,
"consumers": [
"layers.17.self_attn.q_proj_moe_gen",
"layers.17.self_attn.k_proj_moe_gen",
"layers.17.self_attn.v_proj_moe_gen"
]
},
"layers.17.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.003303446574136615,
"consumers": [
"layers.17.mlp.gate_proj",
"layers.17.mlp.up_proj"
]
},
"layers.17.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.0037332052597776055,
"consumers": [
"layers.17.mlp_moe_gen.gate_proj",
"layers.17.mlp_moe_gen.up_proj"
]
},
"layers.18.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.010829327395185828,
"consumers": [
"layers.18.self_attn.q_proj",
"layers.18.self_attn.k_proj",
"layers.18.self_attn.v_proj"
]
},
"layers.18.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.015611939132213593,
"consumers": [
"layers.18.self_attn.q_proj_moe_gen",
"layers.18.self_attn.k_proj_moe_gen",
"layers.18.self_attn.v_proj_moe_gen"
]
},
"layers.18.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.003714948776178062,
"consumers": [
"layers.18.mlp.gate_proj",
"layers.18.mlp.up_proj"
]
},
"layers.18.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.004194366978481412,
"consumers": [
"layers.18.mlp_moe_gen.gate_proj",
"layers.18.mlp_moe_gen.up_proj"
]
},
"layers.19.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.01116279885172844,
"consumers": [
"layers.19.self_attn.q_proj",
"layers.19.self_attn.k_proj",
"layers.19.self_attn.v_proj"
]
},
"layers.19.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.018096713349223137,
"consumers": [
"layers.19.self_attn.q_proj_moe_gen",
"layers.19.self_attn.k_proj_moe_gen",
"layers.19.self_attn.v_proj_moe_gen"
]
},
"layers.19.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.0038424793165177107,
"consumers": [
"layers.19.mlp.gate_proj",
"layers.19.mlp.up_proj"
]
},
"layers.19.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.004652554169297218,
"consumers": [
"layers.19.mlp_moe_gen.gate_proj",
"layers.19.mlp_moe_gen.up_proj"
]
},
"layers.20.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.01315189665183425,
"consumers": [
"layers.20.self_attn.q_proj",
"layers.20.self_attn.k_proj",
"layers.20.self_attn.v_proj"
]
},
"layers.20.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.02170101460069418,
"consumers": [
"layers.20.self_attn.q_proj_moe_gen",
"layers.20.self_attn.k_proj_moe_gen",
"layers.20.self_attn.v_proj_moe_gen"
]
},
"layers.20.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.0043482230976223946,
"consumers": [
"layers.20.mlp.gate_proj",
"layers.20.mlp.up_proj"
]
},
"layers.20.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.005148760508745909,
"consumers": [
"layers.20.mlp_moe_gen.gate_proj",
"layers.20.mlp_moe_gen.up_proj"
]
},
"layers.21.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.014262943528592587,
"consumers": [
"layers.21.self_attn.q_proj",
"layers.21.self_attn.k_proj",
"layers.21.self_attn.v_proj"
]
},
"layers.21.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.026137451641261578,
"consumers": [
"layers.21.self_attn.q_proj_moe_gen",
"layers.21.self_attn.k_proj_moe_gen",
"layers.21.self_attn.v_proj_moe_gen"
]
},
"layers.21.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.004324082052335143,
"consumers": [
"layers.21.mlp.gate_proj",
"layers.21.mlp.up_proj"
]
},
"layers.21.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.005147382616996765,
"consumers": [
"layers.21.mlp_moe_gen.gate_proj",
"layers.21.mlp_moe_gen.up_proj"
]
},
"layers.22.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.017558230087161064,
"consumers": [
"layers.22.self_attn.q_proj",
"layers.22.self_attn.k_proj",
"layers.22.self_attn.v_proj"
]
},
"layers.22.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.02854403667151928,
"consumers": [
"layers.22.self_attn.q_proj_moe_gen",
"layers.22.self_attn.k_proj_moe_gen",
"layers.22.self_attn.v_proj_moe_gen"
]
},
"layers.22.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.00529686501249671,
"consumers": [
"layers.22.mlp.gate_proj",
"layers.22.mlp.up_proj"
]
},
"layers.22.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.006503301672637463,
"consumers": [
"layers.22.mlp_moe_gen.gate_proj",
"layers.22.mlp_moe_gen.up_proj"
]
},
"layers.23.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.017843658570200205,
"consumers": [
"layers.23.self_attn.q_proj",
"layers.23.self_attn.k_proj",
"layers.23.self_attn.v_proj"
]
},
"layers.23.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.028661489952355623,
"consumers": [
"layers.23.self_attn.q_proj_moe_gen",
"layers.23.self_attn.k_proj_moe_gen",
"layers.23.self_attn.v_proj_moe_gen"
]
},
"layers.23.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.006145666586235166,
"consumers": [
"layers.23.mlp.gate_proj",
"layers.23.mlp.up_proj"
]
},
"layers.23.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.007576683536171913,
"consumers": [
"layers.23.mlp_moe_gen.gate_proj",
"layers.23.mlp_moe_gen.up_proj"
]
},
"layers.24.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.020195242948830128,
"consumers": [
"layers.24.self_attn.q_proj",
"layers.24.self_attn.k_proj",
"layers.24.self_attn.v_proj"
]
},
"layers.24.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.039047873578965664,
"consumers": [
"layers.24.self_attn.q_proj_moe_gen",
"layers.24.self_attn.k_proj_moe_gen",
"layers.24.self_attn.v_proj_moe_gen"
]
},
"layers.24.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.006804545875638723,
"consumers": [
"layers.24.mlp.gate_proj",
"layers.24.mlp.up_proj"
]
},
"layers.24.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.008703016676008701,
"consumers": [
"layers.24.mlp_moe_gen.gate_proj",
"layers.24.mlp_moe_gen.up_proj"
]
},
"layers.25.input_layernorm": {
"best_alpha": 0.4,
"best_err": 0.027022873517125845,
"consumers": [
"layers.25.self_attn.q_proj",
"layers.25.self_attn.k_proj",
"layers.25.self_attn.v_proj"
]
},
"layers.25.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.0456560542806983,
"consumers": [
"layers.25.self_attn.q_proj_moe_gen",
"layers.25.self_attn.k_proj_moe_gen",
"layers.25.self_attn.v_proj_moe_gen"
]
},
"layers.25.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.008004232309758663,
"consumers": [
"layers.25.mlp.gate_proj",
"layers.25.mlp.up_proj"
]
},
"layers.25.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.01017605559900403,
"consumers": [
"layers.25.mlp_moe_gen.gate_proj",
"layers.25.mlp_moe_gen.up_proj"
]
},
"layers.26.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.0346508389338851,
"consumers": [
"layers.26.self_attn.q_proj",
"layers.26.self_attn.k_proj",
"layers.26.self_attn.v_proj"
]
},
"layers.26.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.06741268001496792,
"consumers": [
"layers.26.self_attn.q_proj_moe_gen",
"layers.26.self_attn.k_proj_moe_gen",
"layers.26.self_attn.v_proj_moe_gen"
]
},
"layers.26.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.008322665002197027,
"consumers": [
"layers.26.mlp.gate_proj",
"layers.26.mlp.up_proj"
]
},
"layers.26.post_attention_layernorm_moe_gen": {
"best_alpha": 0.5,
"best_err": 0.010978639125823975,
"consumers": [
"layers.26.mlp_moe_gen.gate_proj",
"layers.26.mlp_moe_gen.up_proj"
]
},
"layers.27.input_layernorm": {
"best_alpha": 0.4,
"best_err": 0.04764394834637642,
"consumers": [
"layers.27.self_attn.q_proj",
"layers.27.self_attn.k_proj",
"layers.27.self_attn.v_proj"
]
},
"layers.27.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.08542009443044662,
"consumers": [
"layers.27.self_attn.q_proj_moe_gen",
"layers.27.self_attn.k_proj_moe_gen",
"layers.27.self_attn.v_proj_moe_gen"
]
},
"layers.27.post_attention_layernorm": {
"best_alpha": 0.45,
"best_err": 0.008581496309489012,
"consumers": [
"layers.27.mlp.gate_proj",
"layers.27.mlp.up_proj"
]
},
"layers.27.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.011591496411710978,
"consumers": [
"layers.27.mlp_moe_gen.gate_proj",
"layers.27.mlp_moe_gen.up_proj"
]
},
"layers.28.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.05938507616519928,
"consumers": [
"layers.28.self_attn.q_proj",
"layers.28.self_attn.k_proj",
"layers.28.self_attn.v_proj"
]
},
"layers.28.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.12293312698602676,
"consumers": [
"layers.28.self_attn.q_proj_moe_gen",
"layers.28.self_attn.k_proj_moe_gen",
"layers.28.self_attn.v_proj_moe_gen"
]
},
"layers.28.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.008860271889716387,
"consumers": [
"layers.28.mlp.gate_proj",
"layers.28.mlp.up_proj"
]
},
"layers.28.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.01251167617738247,
"consumers": [
"layers.28.mlp_moe_gen.gate_proj",
"layers.28.mlp_moe_gen.up_proj"
]
},
"layers.29.input_layernorm": {
"best_alpha": 0.35,
"best_err": 0.06842886283993721,
"consumers": [
"layers.29.self_attn.q_proj",
"layers.29.self_attn.k_proj",
"layers.29.self_attn.v_proj"
]
},
"layers.29.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.15185140073299408,
"consumers": [
"layers.29.self_attn.q_proj_moe_gen",
"layers.29.self_attn.k_proj_moe_gen",
"layers.29.self_attn.v_proj_moe_gen"
]
},
"layers.29.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.009515361860394478,
"consumers": [
"layers.29.mlp.gate_proj",
"layers.29.mlp.up_proj"
]
},
"layers.29.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.013548723887652159,
"consumers": [
"layers.29.mlp_moe_gen.gate_proj",
"layers.29.mlp_moe_gen.up_proj"
]
},
"layers.30.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.07855793833732605,
"consumers": [
"layers.30.self_attn.q_proj",
"layers.30.self_attn.k_proj",
"layers.30.self_attn.v_proj"
]
},
"layers.30.input_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.14745976030826569,
"consumers": [
"layers.30.self_attn.q_proj_moe_gen",
"layers.30.self_attn.k_proj_moe_gen",
"layers.30.self_attn.v_proj_moe_gen"
]
},
"layers.30.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.010349276009947062,
"consumers": [
"layers.30.mlp.gate_proj",
"layers.30.mlp.up_proj"
]
},
"layers.30.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.014536601025611162,
"consumers": [
"layers.30.mlp_moe_gen.gate_proj",
"layers.30.mlp_moe_gen.up_proj"
]
},
"layers.31.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.10708250477910042,
"consumers": [
"layers.31.self_attn.q_proj",
"layers.31.self_attn.k_proj",
"layers.31.self_attn.v_proj"
]
},
"layers.31.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.2615283280611038,
"consumers": [
"layers.31.self_attn.q_proj_moe_gen",
"layers.31.self_attn.k_proj_moe_gen",
"layers.31.self_attn.v_proj_moe_gen"
]
},
"layers.31.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.011434435844421387,
"consumers": [
"layers.31.mlp.gate_proj",
"layers.31.mlp.up_proj"
]
},
"layers.31.post_attention_layernorm_moe_gen": {
"best_alpha": 0.35,
"best_err": 0.016750527545809746,
"consumers": [
"layers.31.mlp_moe_gen.gate_proj",
"layers.31.mlp_moe_gen.up_proj"
]
},
"layers.32.input_layernorm": {
"best_alpha": 0.25,
"best_err": 0.11255145259201527,
"consumers": [
"layers.32.self_attn.q_proj",
"layers.32.self_attn.k_proj",
"layers.32.self_attn.v_proj"
]
},
"layers.32.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.1822471246123314,
"consumers": [
"layers.32.self_attn.q_proj_moe_gen",
"layers.32.self_attn.k_proj_moe_gen",
"layers.32.self_attn.v_proj_moe_gen"
]
},
"layers.32.post_attention_layernorm": {
"best_alpha": 0.3,
"best_err": 0.013713904656469822,
"consumers": [
"layers.32.mlp.gate_proj",
"layers.32.mlp.up_proj"
]
},
"layers.32.post_attention_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.017479958944022655,
"consumers": [
"layers.32.mlp_moe_gen.gate_proj",
"layers.32.mlp_moe_gen.up_proj"
]
},
"layers.33.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.17432252317667007,
"consumers": [
"layers.33.self_attn.q_proj",
"layers.33.self_attn.k_proj",
"layers.33.self_attn.v_proj"
]
},
"layers.33.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.22210053354501724,
"consumers": [
"layers.33.self_attn.q_proj_moe_gen",
"layers.33.self_attn.k_proj_moe_gen",
"layers.33.self_attn.v_proj_moe_gen"
]
},
"layers.33.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.01600974891334772,
"consumers": [
"layers.33.mlp.gate_proj",
"layers.33.mlp.up_proj"
]
},
"layers.33.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.02009424101561308,
"consumers": [
"layers.33.mlp_moe_gen.gate_proj",
"layers.33.mlp_moe_gen.up_proj"
]
},
"layers.34.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.19069868326187134,
"consumers": [
"layers.34.self_attn.q_proj",
"layers.34.self_attn.k_proj",
"layers.34.self_attn.v_proj"
]
},
"layers.34.input_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.49640433490276337,
"consumers": [
"layers.34.self_attn.q_proj_moe_gen",
"layers.34.self_attn.k_proj_moe_gen",
"layers.34.self_attn.v_proj_moe_gen"
]
},
"layers.34.post_attention_layernorm": {
"best_alpha": 0.35,
"best_err": 0.0172486649826169,
"consumers": [
"layers.34.mlp.gate_proj",
"layers.34.mlp.up_proj"
]
},
"layers.34.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.021609684452414513,
"consumers": [
"layers.34.mlp_moe_gen.gate_proj",
"layers.34.mlp_moe_gen.up_proj"
]
},
"layers.35.input_layernorm": {
"best_alpha": 0.3,
"best_err": 0.19978100806474686,
"consumers": [
"layers.35.self_attn.q_proj",
"layers.35.self_attn.k_proj",
"layers.35.self_attn.v_proj"
]
},
"layers.35.input_layernorm_moe_gen": {
"best_alpha": 0.4,
"best_err": 0.30665621906518936,
"consumers": [
"layers.35.self_attn.q_proj_moe_gen",
"layers.35.self_attn.k_proj_moe_gen",
"layers.35.self_attn.v_proj_moe_gen"
]
},
"layers.35.post_attention_layernorm": {
"best_alpha": 0.4,
"best_err": 0.012281936127692461,
"consumers": [
"layers.35.mlp.gate_proj",
"layers.35.mlp.up_proj"
]
},
"layers.35.post_attention_layernorm_moe_gen": {
"best_alpha": 0.45,
"best_err": 0.011713964398950338,
"consumers": [
"layers.35.mlp_moe_gen.gate_proj",
"layers.35.mlp_moe_gen.up_proj"
]
}
}
}