JuliaSLM-compressed / compression_metadata.json
LisaMegaWatts's picture
Compression metadata and results
22300e2 verified
{
"method": "downscaling_projection",
"source_model": "LisaMegaWatts/JuliaSLM",
"source_params": 5037312,
"source_loss": 3.552281068317482,
"best_config": "A-3M",
"finetune_steps": 2000,
"finetune_lr": 0.0006,
"configs": {
"A-3M": {
"config": {
"d_model": 192,
"n_layers": 6,
"n_heads": 3,
"head_dim": 64,
"ffn_inner": 480,
"context_length": 256,
"vocab_size": 2000
},
"params": 2930112,
"reduction": 0.41831834121055034,
"pre_finetune_loss": 6.9663840472007115,
"post_finetune_loss": 3.9081690383949335,
"post_finetune_ppl": 49.8076724983192
},
"B-2.5M": {
"config": {
"d_model": 192,
"n_layers": 5,
"n_heads": 3,
"head_dim": 64,
"ffn_inner": 480,
"context_length": 256,
"vocab_size": 2000
},
"params": 2505792,
"reduction": 0.5025537429486202,
"pre_finetune_loss": 7.5051396219378885,
"post_finetune_loss": 3.9149324556950593,
"post_finetune_ppl": 50.14568434125149
},
"C-2M": {
"config": {
"d_model": 192,
"n_layers": 4,
"n_heads": 3,
"head_dim": 64,
"ffn_inner": 480,
"context_length": 256,
"vocab_size": 2000
},
"params": 2081472,
"reduction": 0.58678914468669,
"pre_finetune_loss": 8.09140216928519,
"post_finetune_loss": 3.9662209947702864,
"post_finetune_ppl": 52.78467987729236
}
}
}