{ "method": "downscaling_projection", "source_model": "LisaMegaWatts/JuliaSLM", "source_params": 5037312, "source_loss": 3.552281068317482, "best_config": "A-3M", "finetune_steps": 2000, "finetune_lr": 0.0006, "configs": { "A-3M": { "config": { "d_model": 192, "n_layers": 6, "n_heads": 3, "head_dim": 64, "ffn_inner": 480, "context_length": 256, "vocab_size": 2000 }, "params": 2930112, "reduction": 0.41831834121055034, "pre_finetune_loss": 6.9663840472007115, "post_finetune_loss": 3.9081690383949335, "post_finetune_ppl": 49.8076724983192 }, "B-2.5M": { "config": { "d_model": 192, "n_layers": 5, "n_heads": 3, "head_dim": 64, "ffn_inner": 480, "context_length": 256, "vocab_size": 2000 }, "params": 2505792, "reduction": 0.5025537429486202, "pre_finetune_loss": 7.5051396219378885, "post_finetune_loss": 3.9149324556950593, "post_finetune_ppl": 50.14568434125149 }, "C-2M": { "config": { "d_model": 192, "n_layers": 4, "n_heads": 3, "head_dim": 64, "ffn_inner": 480, "context_length": 256, "vocab_size": 2000 }, "params": 2081472, "reduction": 0.58678914468669, "pre_finetune_loss": 8.09140216928519, "post_finetune_loss": 3.9662209947702864, "post_finetune_ppl": 52.78467987729236 } } }