{ "model_name": "Qwen3-8M-GPT2", "model_type": "Qwen3ForCausalLM", "tokenizer": "gpt2", "dtype": "bfloat16", "vocab_size": 50257, "hidden_size": 128, "num_layers": 12, "num_attention_heads": 4, "num_key_value_heads": 2, "head_dim": 32, "intermediate_size": 384, "max_position_embeddings": 2048, "rope_theta": 10000, "parameters": 8796160, "tie_word_embeddings": true, "attention_type": "full_attention", "positional_encoding": "rope", "normalization": "rmsnorm", "activation": "swiglu", "xsa_enabled": true, "xsa_paper": "arxiv 2603.09078" }