| { | |
| "steps": [ | |
| 0 | |
| ], | |
| "batches": [ | |
| 1 | |
| ], | |
| "tokens": [ | |
| 3840 | |
| ], | |
| "accum_steps": [ | |
| 1 | |
| ], | |
| "accum_steps_avg": [ | |
| 1 | |
| ], | |
| "loss": [ | |
| 10.897216796875 | |
| ], | |
| "eval_loss": [], | |
| "perplexity": [], | |
| "accuracy": [], | |
| "wall_time": [ | |
| 1.186187982559204 | |
| ], | |
| "grad_norm": [ | |
| 8.550016403198242 | |
| ], | |
| "adam_second_moment": [ | |
| 6.549289782887457e-08 | |
| ], | |
| "adam_first_moment": [ | |
| 3.808525991760281e-05 | |
| ], | |
| "total_params": 47789184, | |
| "core_params": 28490496, | |
| "embedding_head_params": 19298688, | |
| "are_tied": true | |
| } |