pycraft-1 / config.json
imshadow0's picture
Upload config.json with huggingface_hub
705ef57 verified
Raw
History Blame Contribute Delete
837 Bytes
{
"model_type": "pycraft_decoder_only",
"vocab_size": 32000,
"d_model": 512,
"n_layers": 8,
"n_heads": 8,
"n_kv_heads": 2,
"d_ff": 1408,
"max_seq_len": 1024,
"use_qk_norm": true,
"rope_theta": 10000.0,
"dropout": 0.0,
"architecture_notes": {
"attention": "Grouped Query Attention GQA 8Q/2KV",
"positional": "RoPE rotary embeddings",
"qk_norm": "RMSNorm on Q and K OLMo2 Qwen3 2025",
"ffn": "SwiGLU gated feedforward",
"norm": "RMSNorm pre-norm",
"objective": "Causal LM plus FIM 50 percent PSM format"
},
"training_summary": {
"pretrain_steps": 4000,
"pretrain_loss": 1.16,
"pretrain_ppl": 3.2,
"tokens_seen": "1.05B",
"sft_steps": 400,
"sft_loss": 1.15,
"sft_ppl": 3.15,
"hardware": "NVIDIA RTX 3050 Laptop 4GB VRAM"
}
}