{ "architectures": [ "OpenMythosForCausalLM" ], "bias": false, "block_size": 256, "bos_token_id": 1, "dtype": "float32", "effective_expected_depth": 10, "eos_token_id": 2, "head_dim": 64, "init_values": { "embed_scale": 16.0, "embedding": 0.03952847075210474, "out_proj": 0.008838834764831844, "std": 0.03952847075210474 }, "injection_type": "lti", "intermediate_size": 1024, "kv_lora_rank": 512, "mean_backprop_depth": 2, "mean_recurrence": 4, "model_type": "open_mythos", "moe_intermediate_size": 1024, "moe_top_k": 2, "n_embd": 256, "n_heads": 4, "n_kv_heads": 4, "n_layers": 4, "n_layers_in_coda": 1, "n_layers_in_prelude": 1, "n_layers_in_recurrent_block": 2, "n_routed_experts": 8, "n_shared_experts": 2, "norm_eps": 1e-06, "num_key_value_heads": 4, "pad_token_id": 0, "q_lora_rank": 1536, "qk_bias": true, "rope_base": 50000.0, "rope_head_dim": 64, "state_init": "like-init", "tie_embeddings": true, "tie_word_embeddings": true, "transformers_version": "5.6.0", "use_cache": false, "use_mla": false, "use_moe": false, "vocab_size": 50257 }