{ "steps": [ 0 ], "batches": [ 1 ], "tokens": [ 3840 ], "accum_steps": [ 1 ], "accum_steps_avg": [ 1 ], "loss": [ 10.897216796875 ], "eval_loss": [], "perplexity": [], "accuracy": [], "wall_time": [ 1.186187982559204 ], "grad_norm": [ 8.550016403198242 ], "adam_second_moment": [ 6.549289782887457e-08 ], "adam_first_moment": [ 3.808525991760281e-05 ], "total_params": 47789184, "core_params": 28490496, "embedding_head_params": 19298688, "are_tied": true }