{ "model_type": "nsa", "architectures": [ "NSAForCausalLM" ], "auto_map": { "AutoConfig": "configuration_nsa.NSAConfig", "AutoModelForCausalLM": "modeling_nsa.NSAForCausalLM" }, "vocab_size": 256, "hidden_size": 768, "num_hidden_layers": 12, "num_attention_heads": 12, "n_kv_groups": 2, "d_k": 64, "d_v": 64, "max_position_embeddings": 2048, "rope_theta": 10000, "nsa": { "branches": [ "cmp", "sel", "win" ], "window": 512, "gqa_groups": 2, "block": 32, "stride": 16, "sel_block": 64, "sel_top_n": 16 } }