| { |
| "method": "downscaling_projection", |
| "source_model": "LisaMegaWatts/JuliaSLM", |
| "source_params": 5037312, |
| "source_loss": 3.552281068317482, |
| "best_config": "A-3M", |
| "finetune_steps": 2000, |
| "finetune_lr": 0.0006, |
| "configs": { |
| "A-3M": { |
| "config": { |
| "d_model": 192, |
| "n_layers": 6, |
| "n_heads": 3, |
| "head_dim": 64, |
| "ffn_inner": 480, |
| "context_length": 256, |
| "vocab_size": 2000 |
| }, |
| "params": 2930112, |
| "reduction": 0.41831834121055034, |
| "pre_finetune_loss": 6.9663840472007115, |
| "post_finetune_loss": 3.9081690383949335, |
| "post_finetune_ppl": 49.8076724983192 |
| }, |
| "B-2.5M": { |
| "config": { |
| "d_model": 192, |
| "n_layers": 5, |
| "n_heads": 3, |
| "head_dim": 64, |
| "ffn_inner": 480, |
| "context_length": 256, |
| "vocab_size": 2000 |
| }, |
| "params": 2505792, |
| "reduction": 0.5025537429486202, |
| "pre_finetune_loss": 7.5051396219378885, |
| "post_finetune_loss": 3.9149324556950593, |
| "post_finetune_ppl": 50.14568434125149 |
| }, |
| "C-2M": { |
| "config": { |
| "d_model": 192, |
| "n_layers": 4, |
| "n_heads": 3, |
| "head_dim": 64, |
| "ffn_inner": 480, |
| "context_length": 256, |
| "vocab_size": 2000 |
| }, |
| "params": 2081472, |
| "reduction": 0.58678914468669, |
| "pre_finetune_loss": 8.09140216928519, |
| "post_finetune_loss": 3.9662209947702864, |
| "post_finetune_ppl": 52.78467987729236 |
| } |
| } |
| } |