ling_base_qwentok_actmag1024 / expert_replacement_stats.json
kaizen9's picture
Upload folder using huggingface_hub
f51425d verified
{
"model.layers.1.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 154,
"reason": "mean activation magnitude=0.5228",
"mean_activation_magnitude": 0.522780179977417
},
"model.layers.2.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 97,
"reason": "mean activation magnitude=0.7987",
"mean_activation_magnitude": 0.7986612915992737
},
"model.layers.3.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 213,
"reason": "mean activation magnitude=1.4180",
"mean_activation_magnitude": 1.41801917552948
},
"model.layers.4.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 226,
"reason": "mean activation magnitude=1.9387",
"mean_activation_magnitude": 1.9387125968933105
},
"model.layers.5.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 8,
"reason": "mean activation magnitude=2.6871",
"mean_activation_magnitude": 2.687067747116089
},
"model.layers.6.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 7,
"reason": "mean activation magnitude=4.3966",
"mean_activation_magnitude": 4.396553039550781
},
"model.layers.7.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 15,
"reason": "mean activation magnitude=5.1095",
"mean_activation_magnitude": 5.109454154968262
},
"model.layers.8.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 29,
"reason": "mean activation magnitude=6.9894",
"mean_activation_magnitude": 6.989428997039795
},
"model.layers.9.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 169,
"reason": "mean activation magnitude=5.9495",
"mean_activation_magnitude": 5.949464321136475
},
"model.layers.10.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 175,
"reason": "mean activation magnitude=8.8505",
"mean_activation_magnitude": 8.850502014160156
},
"model.layers.11.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 55,
"reason": "mean activation magnitude=9.3091",
"mean_activation_magnitude": 9.309072494506836
},
"model.layers.12.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 154,
"reason": "mean activation magnitude=14.2129",
"mean_activation_magnitude": 14.212921142578125
},
"model.layers.13.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 245,
"reason": "mean activation magnitude=8.2107",
"mean_activation_magnitude": 8.210664749145508
},
"model.layers.14.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 55,
"reason": "mean activation magnitude=18.4452",
"mean_activation_magnitude": 18.445188522338867
},
"model.layers.15.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 4,
"reason": "mean activation magnitude=18.9182",
"mean_activation_magnitude": 18.91815948486328
},
"model.layers.16.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 115,
"reason": "mean activation magnitude=0.0000",
"mean_activation_magnitude": 0.0
},
"model.layers.17.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 200,
"reason": "mean activation magnitude=13.2505",
"mean_activation_magnitude": 13.250476837158203
},
"model.layers.18.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 168,
"reason": "mean activation magnitude=12.8750",
"mean_activation_magnitude": 12.874970436096191
},
"model.layers.19.mlp": {
"method": "activation_magnitude",
"merge": "replace",
"target_expert_idx": 142,
"reason": "mean activation magnitude=0.0000",
"mean_activation_magnitude": 0.0
}
}