{ "model_type": "linnet", "architectures": [ "LLM" ], "depth": 12, "block_size": 1024, "vocab_size": 32768, "n_experts": 8, "n_active_experts": 2, "rope_base": 50000, "attention": "GQA+QKNorm+FlashAttention", "ffn": "SwiGLU", "normalization": "RMSNorm", "sequence_mixing": "CausalDepthwiseConv1d_kernel3", "sparsity": "MoE_top2_of_8", "optimizer": "Muon+AdamW", "torch_dtype": "bfloat16", "transformers_version": "n/a", "pipeline_tag": "text-generation" }