| { | |
| "model_name": "nanoGPT-MLX-53M-FineWebEdu", | |
| "framework": "MLX", | |
| "architecture": "Pre-LN Transformer (GPT-2 style)", | |
| "training": { | |
| "dataset": "FineWebEdu-10M", | |
| "iterations": 20000, | |
| "final_loss": 0.7583, | |
| "optimizer": "AdamW", | |
| "learning_rate": 0.0006, | |
| "batch_size": 16, | |
| "context_length": 512 | |
| }, | |
| "model_config": { | |
| "vocab_size": 50257, | |
| "d_model": 384, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "d_ff": 1536, | |
| "dropout": 0.1 | |
| }, | |
| "parameters": "52.99M", | |
| "converted_from": "MLX checkpoint_20000.npz", | |
| "conversion_date": "2025-11-14" | |
| } | |