JuliaSLM-compressed-svd / svd_compression_metadata.json
LisaMegaWatts's picture
SVD compression metadata and per-layer rank schedules
335da09 verified
{
"method": "svd_guided_compression",
"description": "Per-layer SVD rank targets from energy analysis (wolves signal without evolution)",
"source_model": "LisaMegaWatts/JuliaSLM",
"source_params": 5037312,
"source_loss": 3.552281068317482,
"best_config": "SVD-95",
"finetune_steps": 2000,
"finetune_lr": 0.0006,
"configs": {
"SVD-95": {
"rank_schedule": {
"blocks.0.attn.wq": 149,
"blocks.0.attn.wk": 150,
"blocks.0.attn.wv": 157,
"blocks.0.attn.wo": 155,
"blocks.0.ffn.w1": 206,
"blocks.0.ffn.v": 206,
"blocks.0.ffn.w2": 210,
"blocks.1.attn.wq": 149,
"blocks.1.attn.wk": 148,
"blocks.1.attn.wv": 156,
"blocks.1.attn.wo": 157,
"blocks.1.ffn.w1": 205,
"blocks.1.ffn.v": 205,
"blocks.1.ffn.w2": 206,
"blocks.2.attn.wq": 145,
"blocks.2.attn.wk": 144,
"blocks.2.attn.wv": 151,
"blocks.2.attn.wo": 152,
"blocks.2.ffn.w1": 204,
"blocks.2.ffn.v": 204,
"blocks.2.ffn.w2": 205,
"blocks.3.attn.wq": 141,
"blocks.3.attn.wk": 142,
"blocks.3.attn.wv": 147,
"blocks.3.attn.wo": 147,
"blocks.3.ffn.w1": 202,
"blocks.3.ffn.v": 202,
"blocks.3.ffn.w2": 204,
"blocks.4.attn.wq": 144,
"blocks.4.attn.wk": 143,
"blocks.4.attn.wv": 149,
"blocks.4.attn.wo": 150,
"blocks.4.ffn.w1": 200,
"blocks.4.ffn.v": 201,
"blocks.4.ffn.w2": 203,
"blocks.5.attn.wq": 147,
"blocks.5.attn.wk": 145,
"blocks.5.attn.wv": 153,
"blocks.5.attn.wo": 153,
"blocks.5.ffn.w1": 197,
"blocks.5.ffn.v": 200,
"blocks.5.ffn.w2": 200
},
"params": 5624576,
"reduction": -0.11658281242059254,
"pre_finetune_loss": 4.249334152818549,
"post_finetune_loss": 3.739079835722694,
"post_finetune_ppl": 42.05927091506798
},
"SVD-90": {
"rank_schedule": {
"blocks.0.attn.wq": 122,
"blocks.0.attn.wk": 124,
"blocks.0.attn.wv": 131,
"blocks.0.attn.wo": 129,
"blocks.0.ffn.w1": 177,
"blocks.0.ffn.v": 177,
"blocks.0.ffn.w2": 182,
"blocks.1.attn.wq": 122,
"blocks.1.attn.wk": 121,
"blocks.1.attn.wv": 130,
"blocks.1.attn.wo": 131,
"blocks.1.ffn.w1": 176,
"blocks.1.ffn.v": 176,
"blocks.1.ffn.w2": 177,
"blocks.2.attn.wq": 118,
"blocks.2.attn.wk": 117,
"blocks.2.attn.wv": 125,
"blocks.2.attn.wo": 126,
"blocks.2.ffn.w1": 174,
"blocks.2.ffn.v": 174,
"blocks.2.ffn.w2": 176,
"blocks.3.attn.wq": 112,
"blocks.3.attn.wk": 113,
"blocks.3.attn.wv": 120,
"blocks.3.attn.wo": 120,
"blocks.3.ffn.w1": 172,
"blocks.3.ffn.v": 172,
"blocks.3.ffn.w2": 175,
"blocks.4.attn.wq": 117,
"blocks.4.attn.wk": 116,
"blocks.4.attn.wv": 123,
"blocks.4.attn.wo": 123,
"blocks.4.ffn.w1": 169,
"blocks.4.ffn.v": 170,
"blocks.4.ffn.w2": 173,
"blocks.5.attn.wq": 120,
"blocks.5.attn.wk": 117,
"blocks.5.attn.wv": 127,
"blocks.5.attn.wo": 126,
"blocks.5.ffn.w1": 164,
"blocks.5.ffn.v": 169,
"blocks.5.ffn.w2": 169
},
"params": 4812800,
"reduction": 0.04456980230726226,
"pre_finetune_loss": 5.035423757144361,
"post_finetune_loss": 3.7561473862894847,
"post_finetune_ppl": 42.78328060891619
},
"SVD-80": {
"rank_schedule": {
"blocks.0.attn.wq": 88,
"blocks.0.attn.wk": 91,
"blocks.0.attn.wv": 99,
"blocks.0.attn.wo": 97,
"blocks.0.ffn.w1": 137,
"blocks.0.ffn.v": 137,
"blocks.0.ffn.w2": 143,
"blocks.1.attn.wq": 89,
"blocks.1.attn.wk": 89,
"blocks.1.attn.wv": 99,
"blocks.1.attn.wo": 100,
"blocks.1.ffn.w1": 136,
"blocks.1.ffn.v": 136,
"blocks.1.ffn.w2": 137,
"blocks.2.attn.wq": 84,
"blocks.2.attn.wk": 83,
"blocks.2.attn.wv": 93,
"blocks.2.attn.wo": 94,
"blocks.2.ffn.w1": 134,
"blocks.2.ffn.v": 134,
"blocks.2.ffn.w2": 136,
"blocks.3.attn.wq": 78,
"blocks.3.attn.wk": 79,
"blocks.3.attn.wv": 87,
"blocks.3.attn.wo": 88,
"blocks.3.ffn.w1": 130,
"blocks.3.ffn.v": 130,
"blocks.3.ffn.w2": 135,
"blocks.4.attn.wq": 83,
"blocks.4.attn.wk": 82,
"blocks.4.attn.wv": 90,
"blocks.4.attn.wo": 91,
"blocks.4.ffn.w1": 127,
"blocks.4.ffn.v": 129,
"blocks.4.ffn.w2": 132,
"blocks.5.attn.wq": 87,
"blocks.5.attn.wk": 83,
"blocks.5.attn.wv": 95,
"blocks.5.attn.wo": 94,
"blocks.5.ffn.w1": 122,
"blocks.5.ffn.v": 127,
"blocks.5.ffn.w2": 127
},
"params": 3753088,
"reduction": 0.2549423184428521,
"pre_finetune_loss": 6.312526171997246,
"post_finetune_loss": 3.8183020801558074,
"post_finetune_ppl": 45.52684173028366
}
}
}