{ "repo_id": "Lernex/Metis-1.4-base", "name": "Metis-1.4 Base", "release_stage": "base", "model_type": "metis_mor_transformer", "architecture": "metis_mor_decoder", "estimated_params": 503772163, "context_length": 1024, "vocab_size": 16384, "d_model": 1536, "n_layer": 19, "n_heads": 24, "n_kv_heads": 8, "head_dim": 64, "hidden_act": "swiglu", "mor_max_depth": 3, "selected_continued_pretraining_mode": "static_sequence_mor", "selected_checkpoint": { "run": "metis14_static_sequence_continued_fused", "step": 5000, "train_loss": 3.6065, "val_loss": 3.6026, "perplexity": 36.69 }, "comparison_checkpoint": { "run": "metis14_static_block_continued_probe", "step": 5000, "train_loss": 3.609, "val_loss": 3.6575, "perplexity": 38.76 }, "training_path": [ "static_dense_pretrain", "static_sequence_mor_continued_pretrain" ], "precision": { "training_compute": "FP8 with BF16 master/export weights", "export_dtype": "bfloat16", "attention_backend": "flash_attention_3", "loss_impl": "liger_fused_linear_ce" }, "artifact": { "format": "safetensors", "filename": "model.safetensors", "sha256": "5b58e109bf3531dea6bdf57f75000ca04cc6a8934d6290b2f112e4c97a4a118e", "s3_uri": "s3://lernex-metis-artifacts-151025633969-us-east-1/metis14/releases/base/model.safetensors" }, "quarantine": { "status": "invalid_base_do_not_use", "date": "2026-05-07", "reason": "Pretraining batches pre-shifted labels before passing them to a model loss that already shifts internally, training the LM on an accidental two-token-ahead objective.", "measured_loss_check": { "continued_val_sample_training_double_shift_avg": 3.72265625, "continued_val_sample_real_next_token_avg": 7.0390625 }, "fix": "scripts/train_mamba_lm.py get_batch now returns labels aligned with input_ids so the model internal shift predicts the next token." } }