| { |
| "method": "svd_guided_compression", |
| "description": "Per-layer SVD rank targets from energy analysis (wolves signal without evolution)", |
| "source_model": "LisaMegaWatts/JuliaSLM", |
| "source_params": 5037312, |
| "source_loss": 3.552281068317482, |
| "best_config": "SVD-95", |
| "finetune_steps": 2000, |
| "finetune_lr": 0.0006, |
| "configs": { |
| "SVD-95": { |
| "rank_schedule": { |
| "blocks.0.attn.wq": 149, |
| "blocks.0.attn.wk": 150, |
| "blocks.0.attn.wv": 157, |
| "blocks.0.attn.wo": 155, |
| "blocks.0.ffn.w1": 206, |
| "blocks.0.ffn.v": 206, |
| "blocks.0.ffn.w2": 210, |
| "blocks.1.attn.wq": 149, |
| "blocks.1.attn.wk": 148, |
| "blocks.1.attn.wv": 156, |
| "blocks.1.attn.wo": 157, |
| "blocks.1.ffn.w1": 205, |
| "blocks.1.ffn.v": 205, |
| "blocks.1.ffn.w2": 206, |
| "blocks.2.attn.wq": 145, |
| "blocks.2.attn.wk": 144, |
| "blocks.2.attn.wv": 151, |
| "blocks.2.attn.wo": 152, |
| "blocks.2.ffn.w1": 204, |
| "blocks.2.ffn.v": 204, |
| "blocks.2.ffn.w2": 205, |
| "blocks.3.attn.wq": 141, |
| "blocks.3.attn.wk": 142, |
| "blocks.3.attn.wv": 147, |
| "blocks.3.attn.wo": 147, |
| "blocks.3.ffn.w1": 202, |
| "blocks.3.ffn.v": 202, |
| "blocks.3.ffn.w2": 204, |
| "blocks.4.attn.wq": 144, |
| "blocks.4.attn.wk": 143, |
| "blocks.4.attn.wv": 149, |
| "blocks.4.attn.wo": 150, |
| "blocks.4.ffn.w1": 200, |
| "blocks.4.ffn.v": 201, |
| "blocks.4.ffn.w2": 203, |
| "blocks.5.attn.wq": 147, |
| "blocks.5.attn.wk": 145, |
| "blocks.5.attn.wv": 153, |
| "blocks.5.attn.wo": 153, |
| "blocks.5.ffn.w1": 197, |
| "blocks.5.ffn.v": 200, |
| "blocks.5.ffn.w2": 200 |
| }, |
| "params": 5624576, |
| "reduction": -0.11658281242059254, |
| "pre_finetune_loss": 4.249334152818549, |
| "post_finetune_loss": 3.739079835722694, |
| "post_finetune_ppl": 42.05927091506798 |
| }, |
| "SVD-90": { |
| "rank_schedule": { |
| "blocks.0.attn.wq": 122, |
| "blocks.0.attn.wk": 124, |
| "blocks.0.attn.wv": 131, |
| "blocks.0.attn.wo": 129, |
| "blocks.0.ffn.w1": 177, |
| "blocks.0.ffn.v": 177, |
| "blocks.0.ffn.w2": 182, |
| "blocks.1.attn.wq": 122, |
| "blocks.1.attn.wk": 121, |
| "blocks.1.attn.wv": 130, |
| "blocks.1.attn.wo": 131, |
| "blocks.1.ffn.w1": 176, |
| "blocks.1.ffn.v": 176, |
| "blocks.1.ffn.w2": 177, |
| "blocks.2.attn.wq": 118, |
| "blocks.2.attn.wk": 117, |
| "blocks.2.attn.wv": 125, |
| "blocks.2.attn.wo": 126, |
| "blocks.2.ffn.w1": 174, |
| "blocks.2.ffn.v": 174, |
| "blocks.2.ffn.w2": 176, |
| "blocks.3.attn.wq": 112, |
| "blocks.3.attn.wk": 113, |
| "blocks.3.attn.wv": 120, |
| "blocks.3.attn.wo": 120, |
| "blocks.3.ffn.w1": 172, |
| "blocks.3.ffn.v": 172, |
| "blocks.3.ffn.w2": 175, |
| "blocks.4.attn.wq": 117, |
| "blocks.4.attn.wk": 116, |
| "blocks.4.attn.wv": 123, |
| "blocks.4.attn.wo": 123, |
| "blocks.4.ffn.w1": 169, |
| "blocks.4.ffn.v": 170, |
| "blocks.4.ffn.w2": 173, |
| "blocks.5.attn.wq": 120, |
| "blocks.5.attn.wk": 117, |
| "blocks.5.attn.wv": 127, |
| "blocks.5.attn.wo": 126, |
| "blocks.5.ffn.w1": 164, |
| "blocks.5.ffn.v": 169, |
| "blocks.5.ffn.w2": 169 |
| }, |
| "params": 4812800, |
| "reduction": 0.04456980230726226, |
| "pre_finetune_loss": 5.035423757144361, |
| "post_finetune_loss": 3.7561473862894847, |
| "post_finetune_ppl": 42.78328060891619 |
| }, |
| "SVD-80": { |
| "rank_schedule": { |
| "blocks.0.attn.wq": 88, |
| "blocks.0.attn.wk": 91, |
| "blocks.0.attn.wv": 99, |
| "blocks.0.attn.wo": 97, |
| "blocks.0.ffn.w1": 137, |
| "blocks.0.ffn.v": 137, |
| "blocks.0.ffn.w2": 143, |
| "blocks.1.attn.wq": 89, |
| "blocks.1.attn.wk": 89, |
| "blocks.1.attn.wv": 99, |
| "blocks.1.attn.wo": 100, |
| "blocks.1.ffn.w1": 136, |
| "blocks.1.ffn.v": 136, |
| "blocks.1.ffn.w2": 137, |
| "blocks.2.attn.wq": 84, |
| "blocks.2.attn.wk": 83, |
| "blocks.2.attn.wv": 93, |
| "blocks.2.attn.wo": 94, |
| "blocks.2.ffn.w1": 134, |
| "blocks.2.ffn.v": 134, |
| "blocks.2.ffn.w2": 136, |
| "blocks.3.attn.wq": 78, |
| "blocks.3.attn.wk": 79, |
| "blocks.3.attn.wv": 87, |
| "blocks.3.attn.wo": 88, |
| "blocks.3.ffn.w1": 130, |
| "blocks.3.ffn.v": 130, |
| "blocks.3.ffn.w2": 135, |
| "blocks.4.attn.wq": 83, |
| "blocks.4.attn.wk": 82, |
| "blocks.4.attn.wv": 90, |
| "blocks.4.attn.wo": 91, |
| "blocks.4.ffn.w1": 127, |
| "blocks.4.ffn.v": 129, |
| "blocks.4.ffn.w2": 132, |
| "blocks.5.attn.wq": 87, |
| "blocks.5.attn.wk": 83, |
| "blocks.5.attn.wv": 95, |
| "blocks.5.attn.wo": 94, |
| "blocks.5.ffn.w1": 122, |
| "blocks.5.ffn.v": 127, |
| "blocks.5.ffn.w2": 127 |
| }, |
| "params": 3753088, |
| "reduction": 0.2549423184428521, |
| "pre_finetune_loss": 6.312526171997246, |
| "post_finetune_loss": 3.8183020801558074, |
| "post_finetune_ppl": 45.52684173028366 |
| } |
| } |
| } |