{ "model_name": "nanoGPT-MLX-53M-FineWebEdu", "framework": "MLX", "architecture": "Pre-LN Transformer (GPT-2 style)", "training": { "dataset": "FineWebEdu-10M", "iterations": 20000, "final_loss": 0.7583, "optimizer": "AdamW", "learning_rate": 0.0006, "batch_size": 16, "context_length": 512 }, "model_config": { "vocab_size": 50257, "d_model": 384, "n_layers": 8, "n_heads": 8, "d_ff": 1536, "dropout": 0.1 }, "parameters": "52.99M", "converted_from": "MLX checkpoint_20000.npz", "conversion_date": "2025-11-14" }