File size: 462 Bytes
00da4c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
{
  "model_type": "fineweb-gpt",
  "architectures": [
    "GPTForCausalLM"
  ],
  "bos_token_id": 0,
  "eos_token_id": 0,
  "pad_token_id": 1,
  "vocab_size": 8192,
  "context_len": 512,
  "n_layers": 6,
  "d_model": 256,
  "n_heads": 8,
  "d_ff": 1024,
  "dropout": 0.1,
  "tie_embeddings": true,
  "trained_steps": 1800,
  "val_loss": 5.2764,
  "perplexity": 195.7,
  "training_tokens": "~5M",
  "dataset": "HuggingFaceFW/fineweb-edu (sample-10BT, 10k docs)"
}