| { | |
| "step": 999, | |
| "val_loss": 2.0692103222040337, | |
| "mmlu_acc": 0.2685546875, | |
| "arc_easy_acc": 0.251953125, | |
| "model_config": { | |
| "sequence_len": 1024, | |
| "vocab_size": 65536, | |
| "n_layer": 16, | |
| "n_head": 8, | |
| "n_kv_head": 8, | |
| "n_embd": 1024 | |
| } | |
| } |
| { | |
| "step": 999, | |
| "val_loss": 2.0692103222040337, | |
| "mmlu_acc": 0.2685546875, | |
| "arc_easy_acc": 0.251953125, | |
| "model_config": { | |
| "sequence_len": 1024, | |
| "vocab_size": 65536, | |
| "n_layer": 16, | |
| "n_head": 8, | |
| "n_kv_head": 8, | |
| "n_embd": 1024 | |
| } | |
| } |