File size: 462 Bytes
00da4c8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | {
"model_type": "fineweb-gpt",
"architectures": [
"GPTForCausalLM"
],
"bos_token_id": 0,
"eos_token_id": 0,
"pad_token_id": 1,
"vocab_size": 8192,
"context_len": 512,
"n_layers": 6,
"d_model": 256,
"n_heads": 8,
"d_ff": 1024,
"dropout": 0.1,
"tie_embeddings": true,
"trained_steps": 1800,
"val_loss": 5.2764,
"perplexity": 195.7,
"training_tokens": "~5M",
"dataset": "HuggingFaceFW/fineweb-edu (sample-10BT, 10k docs)"
} |