wwwww / metadata.json
C10X's picture
Upload 9 files
7b1332d verified
{
"model_name": "Qwen3-8M-GPT2",
"model_type": "Qwen3ForCausalLM",
"tokenizer": "gpt2",
"dtype": "bfloat16",
"vocab_size": 50257,
"hidden_size": 128,
"num_layers": 12,
"num_attention_heads": 4,
"num_key_value_heads": 2,
"head_dim": 32,
"intermediate_size": 384,
"max_position_embeddings": 2048,
"rope_theta": 10000,
"parameters": 8796160,
"tie_word_embeddings": true,
"attention_type": "full_attention",
"positional_encoding": "rope",
"normalization": "rmsnorm",
"activation": "swiglu",
"xsa_enabled": true,
"xsa_paper": "arxiv 2603.09078"
}