{ "model_type": "hyper_gpt_depth_communicator", "architectures": [ "HyperGPTForCausalLM" ], "hyper_config": { "vocab_size": 4096, "block_size": 512, "n_layer": 6, "n_embd": 256, "n_head": 4, "n_kv_head": 2, "head_dim": 64, "intermediate_size": 640, "conn_dim": 64, "conn_head": 4, "dropout": 0.0, "rope_theta": 100000.0 }, "tokenizer_name": "AxiomicLabs/GPT-S-5M", "use_block_residuals": true, "use_depth_gate": false, "depth_gate_init": 0.0, "conn_stat_batch": 8, "conn_stat_tokens": 16, "auto_map": { "AutoConfig": "hyper.HyperGPTConfig", "AutoModelForCausalLM": "hyper.HyperGPTForCausalLM" } }