| { | |
| "model_type": "fineweb-gpt", | |
| "architectures": [ | |
| "GPTForCausalLM" | |
| ], | |
| "bos_token_id": 0, | |
| "eos_token_id": 0, | |
| "pad_token_id": 1, | |
| "vocab_size": 8192, | |
| "context_len": 512, | |
| "n_layers": 6, | |
| "d_model": 256, | |
| "n_heads": 8, | |
| "d_ff": 1024, | |
| "dropout": 0.1, | |
| "tie_embeddings": true, | |
| "trained_steps": 1800, | |
| "val_loss": 5.2764, | |
| "perplexity": 195.7, | |
| "training_tokens": "~5M", | |
| "dataset": "HuggingFaceFW/fineweb-edu (sample-10BT, 10k docs)" | |
| } |