{ "step": 812, "val_bpb": 0.7322926359254023, "model_config": { "sequence_len": 2048, "vocab_size": 65536, "n_layer": 4, "n_head": 2, "n_kv_head": 2, "n_embd": 256 }, "user_config": { "run": "dummy", "device_type": "", "dtype": "bfloat16", "num_iterations": -1, "max_seq_len": 2048, "device_batch_size": 32, "unembedding_lr": 0.004, "embedding_lr": 0.2, "matrix_lr": 0.02, "init_lr_frac": 1.0, "weight_decay": 0.0, "eval_every": 150, "eval_tokens": 10485760, "total_batch_size": 524288, "dry_run": 0 } }