fineweb-gpt-scratch / config.json
shreyask's picture
GPT from scratch — 1800 steps, ppl=195.7
00da4c8 verified
{
"model_type": "fineweb-gpt",
"architectures": [
"GPTForCausalLM"
],
"bos_token_id": 0,
"eos_token_id": 0,
"pad_token_id": 1,
"vocab_size": 8192,
"context_len": 512,
"n_layers": 6,
"d_model": 256,
"n_heads": 8,
"d_ff": 1024,
"dropout": 0.1,
"tie_embeddings": true,
"trained_steps": 1800,
"val_loss": 5.2764,
"perplexity": 195.7,
"training_tokens": "~5M",
"dataset": "HuggingFaceFW/fineweb-edu (sample-10BT, 10k docs)"
}