| { |
| "repo_id": "Lernex/Metis-1.4-base", |
| "name": "Metis-1.4 Base", |
| "release_stage": "base", |
| "model_type": "metis_mor_transformer", |
| "architecture": "metis_mor_decoder", |
| "estimated_params": 503772163, |
| "context_length": 1024, |
| "vocab_size": 16384, |
| "d_model": 1536, |
| "n_layer": 19, |
| "n_heads": 24, |
| "n_kv_heads": 8, |
| "head_dim": 64, |
| "hidden_act": "swiglu", |
| "mor_max_depth": 3, |
| "selected_continued_pretraining_mode": "static_sequence_mor", |
| "selected_checkpoint": { |
| "run": "metis14_static_sequence_continued_fused", |
| "step": 5000, |
| "train_loss": 3.6065, |
| "val_loss": 3.6026, |
| "perplexity": 36.69 |
| }, |
| "comparison_checkpoint": { |
| "run": "metis14_static_block_continued_probe", |
| "step": 5000, |
| "train_loss": 3.609, |
| "val_loss": 3.6575, |
| "perplexity": 38.76 |
| }, |
| "training_path": [ |
| "static_dense_pretrain", |
| "static_sequence_mor_continued_pretrain" |
| ], |
| "precision": { |
| "training_compute": "FP8 with BF16 master/export weights", |
| "export_dtype": "bfloat16", |
| "attention_backend": "flash_attention_3", |
| "loss_impl": "liger_fused_linear_ce" |
| }, |
| "artifact": { |
| "format": "safetensors", |
| "filename": "model.safetensors", |
| "sha256": "5b58e109bf3531dea6bdf57f75000ca04cc6a8934d6290b2f112e4c97a4a118e", |
| "s3_uri": "s3://lernex-metis-artifacts-151025633969-us-east-1/metis14/releases/base/model.safetensors" |
| }, |
| "quarantine": { |
| "status": "invalid_base_do_not_use", |
| "date": "2026-05-07", |
| "reason": "Pretraining batches pre-shifted labels before passing them to a model loss that already shifts internally, training the LM on an accidental two-token-ahead objective.", |
| "measured_loss_check": { |
| "continued_val_sample_training_double_shift_avg": 3.72265625, |
| "continued_val_sample_real_next_token_avg": 7.0390625 |
| }, |
| "fix": "scripts/train_mamba_lm.py get_batch now returns labels aligned with input_ids so the model internal shift predicts the next token." |
| } |
| } |
|
|