GeminiFan207
/

TinyState-19B-A9B

+{
+  "model_type": "tinystate",
+  "architectures": ["TinyStateForCausalLM"],
+  "model_name": "TinyState-19B-A9B",
+  "hidden_size": 6144,
+  "num_hidden_layers": 48,
+  "num_attention_heads": 48,
+  "num_key_value_heads": 8,
+  "intermediate_size": 16384,
+  "hidden_act": "silu",
+  "max_position_embeddings": 32768,
+  "initializer_range": 0.02,
+  "rms_norm_eps": 1e-6,
+  "use_cache": true,
+  "pad_token_id": 151643,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "tie_word_embeddings": false,
+  "rope_theta": 1000000.0,
+  "rope_scaling": null,
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "vocab_size": 151936,
+  "model_params": "19B",
+  "distillation_ratio": 0.12,
+  "target_params": 19000000000,
+  "teacher_model": "Qwen/Qwen3-235B-A22B",
+  "chunk_count": 8,
+  "safetensors_format": true,
+  "version": "A9B",
+  "torch_dtype": "float16",
+  "attn_implementation": "flash_attention_2",
+  "quantization_config": {
+    "load_in_4bit": true,
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_compute_dtype": "float16"
+  },
+  "moe_active": true,
+  "num_experts": 8,
+  "num_experts_per_tok": 2
+}