Metis-1.4-base / training_summary.json
GiuliannoV's picture
Quarantine invalid base release after alignment audit
54d0c46 verified
{
"repo_id": "Lernex/Metis-1.4-base",
"name": "Metis-1.4 Base",
"release_stage": "base",
"model_type": "metis_mor_transformer",
"architecture": "metis_mor_decoder",
"estimated_params": 503772163,
"context_length": 1024,
"vocab_size": 16384,
"d_model": 1536,
"n_layer": 19,
"n_heads": 24,
"n_kv_heads": 8,
"head_dim": 64,
"hidden_act": "swiglu",
"mor_max_depth": 3,
"selected_continued_pretraining_mode": "static_sequence_mor",
"selected_checkpoint": {
"run": "metis14_static_sequence_continued_fused",
"step": 5000,
"train_loss": 3.6065,
"val_loss": 3.6026,
"perplexity": 36.69
},
"comparison_checkpoint": {
"run": "metis14_static_block_continued_probe",
"step": 5000,
"train_loss": 3.609,
"val_loss": 3.6575,
"perplexity": 38.76
},
"training_path": [
"static_dense_pretrain",
"static_sequence_mor_continued_pretrain"
],
"precision": {
"training_compute": "FP8 with BF16 master/export weights",
"export_dtype": "bfloat16",
"attention_backend": "flash_attention_3",
"loss_impl": "liger_fused_linear_ce"
},
"artifact": {
"format": "safetensors",
"filename": "model.safetensors",
"sha256": "5b58e109bf3531dea6bdf57f75000ca04cc6a8934d6290b2f112e4c97a4a118e",
"s3_uri": "s3://lernex-metis-artifacts-151025633969-us-east-1/metis14/releases/base/model.safetensors"
},
"quarantine": {
"status": "invalid_base_do_not_use",
"date": "2026-05-07",
"reason": "Pretraining batches pre-shifted labels before passing them to a model loss that already shifts internally, training the LM on an accidental two-token-ahead objective.",
"measured_loss_check": {
"continued_val_sample_training_double_shift_avg": 3.72265625,
"continued_val_sample_real_next_token_avg": 7.0390625
},
"fix": "scripts/train_mamba_lm.py get_batch now returns labels aligned with input_ids so the model internal shift predicts the next token."
}
}