GhostLM / config.json
Ghostgim's picture
v0.5.0 chat-tuned ghost-small (CTIBench MCQ 36.9%)
2ac3661 verified
raw
history blame contribute delete
710 Bytes
{
"vocab_size": 50264,
"context_length": 1024,
"d_model": 512,
"n_heads": 8,
"n_layers": 6,
"d_ff": 2048,
"dropout": 0.1,
"bias": true,
"use_rope": false,
"rope_base": 10000.0,
"use_swiglu": false,
"use_rmsnorm": false,
"use_flash_attention": false,
"batch_size": 8,
"learning_rate": 3e-05,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"grad_accum_steps": 4,
"warmup_steps": 120,
"max_steps": 1800,
"eval_interval": 100,
"save_interval": 600,
"data_dir": "data/processed",
"checkpoint_dir": "checkpoints/phase5_chat_v3",
"log_dir": "logs/phase5_chat_v3",
"device": "auto",
"dtype": "float32",
"seed": 42,
"use_wandb": false
}