{"world_size": 1, "epochs": 2, "steps": 3333, "seqs": 34672, "tokens": 101258180, "last_epoch_steps": 0, "last_epoch_seqs": 0, "last_epoch_tokens": 0, "total_seqs": 17336, "nan_in_loss_seqs": 0, "experiment_tracking_run_id": null, "loss_ema": 0.166393037497039, "loss_sum": 9.550168424844742, "mtp_loss_ema": 0, "mtp_loss_sum": 0, "eval_losses_avg": [0.2669239677488804, 0.2087830863893032, 0.18562059849500656, 0.17422222346067429, 0.16747690364718437, 0.16181137412786484, 0.15829269029200077, 0.15605521947145462, 0.15283420123159885, 0.1499126348644495, 0.14830303378403187, 0.14638864435255527, 0.1452154479920864, 0.1440908331423998, 0.14218942821025848, 0.1416184939444065, 0.14071348309516907, 0.13887083530426025, 0.13866095431149006, 0.13731765188276768, 0.13701613806188107, 0.1358004603534937, 0.13419232331216335, 0.1340783443301916, 0.13359127007424831, 0.13340670242905617, 0.13154898025095463, 0.13112467527389526, 0.12966893799602985, 0.1300010159611702, 0.12925688736140728, 0.1286165565252304, 0.12809292040765285, 0.12784288451075554, 0.1277215015143156, 0.12710174918174744, 0.12699639797210693, 0.12620864622294903, 0.12637895718216896, 0.1255951076745987, 0.12555144913494587, 0.12513052485883236]}