| {"loss": 1.05211091, "token_acc": 0.72991851, "grad_norm": 1.3166697, "learning_rate": 9.96e-06, "memory(GiB)": 48.3, "train_speed(iter/s)": 0.006625, "epoch": 0.00428036, "global_step/max_steps": "1/233", "percentage": "0.43%", "elapsed_time": "2m 4s", "remaining_time": "7h 59m 51s"} |
| {"loss": 0.76383126, "token_acc": 0.79082383, "grad_norm": 0.3090775, "learning_rate": 9.91e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.006892, "epoch": 0.00856073, "global_step/max_steps": "2/233", "percentage": "0.86%", "elapsed_time": "4m 23s", "remaining_time": "8h 26m 54s"} |
| {"loss": 0.68559116, "token_acc": 0.79723046, "grad_norm": 0.59014881, "learning_rate": 9.87e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007851, "epoch": 0.01284109, "global_step/max_steps": "3/233", "percentage": "1.29%", "elapsed_time": "5m 55s", "remaining_time": "7h 33m 57s"} |
| {"loss": 0.65103149, "token_acc": 0.81592101, "grad_norm": 0.3545883, "learning_rate": 9.83e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008732, "epoch": 0.01712146, "global_step/max_steps": "4/233", "percentage": "1.72%", "elapsed_time": "7m 11s", "remaining_time": "6h 51m 27s"} |
| {"loss": 0.63546801, "token_acc": 0.81665001, "grad_norm": 0.21685971, "learning_rate": 9.79e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.009836, "epoch": 0.02140182, "global_step/max_steps": "5/233", "percentage": "2.15%", "elapsed_time": "8m 1s", "remaining_time": "6h 5m 56s"} |
| {"loss": 0.62821972, "token_acc": 0.83078308, "grad_norm": 0.15905039, "learning_rate": 9.74e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.010504, "epoch": 0.02568218, "global_step/max_steps": "6/233", "percentage": "2.58%", "elapsed_time": "9m 4s", "remaining_time": "5h 43m 14s"} |
| {"loss": 0.57419938, "token_acc": 0.83345468, "grad_norm": 0.26453313, "learning_rate": 9.7e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.010586, "epoch": 0.02996255, "global_step/max_steps": "7/233", "percentage": "3.00%", "elapsed_time": "10m 34s", "remaining_time": "5h 41m 21s"} |
| {"loss": 0.61988533, "token_acc": 0.80090435, "grad_norm": 0.13725959, "learning_rate": 9.66e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.010552, "epoch": 0.03424291, "global_step/max_steps": "8/233", "percentage": "3.43%", "elapsed_time": "12m 11s", "remaining_time": "5h 42m 47s"} |
| {"loss": 0.58300537, "token_acc": 0.81416453, "grad_norm": 0.19543347, "learning_rate": 9.61e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.01057, "epoch": 0.03852327, "global_step/max_steps": "9/233", "percentage": "3.86%", "elapsed_time": "13m 44s", "remaining_time": "5h 42m 3s"} |
| {"loss": 0.58969021, "token_acc": 0.82695811, "grad_norm": 0.23631856, "learning_rate": 9.57e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.0102, "epoch": 0.04280364, "global_step/max_steps": "10/233", "percentage": "4.29%", "elapsed_time": "15m 53s", "remaining_time": "5h 54m 24s"} |
| {"eval_loss": 0.59994543, "eval_token_acc": 0.81003124, "eval_runtime": 233.3714, "eval_samples_per_second": 1.98, "eval_steps_per_second": 0.249, "epoch": 0.04280364, "global_step/max_steps": "10/233", "percentage": "4.29%", "elapsed_time": "19m 46s", "remaining_time": "7h 21m 8s"} |
| {"loss": 0.64508778, "token_acc": 0.81136651, "grad_norm": 0.15494038, "learning_rate": 9.53e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007619, "epoch": 0.047084, "global_step/max_steps": "11/233", "percentage": "4.72%", "elapsed_time": "23m 36s", "remaining_time": "7h 56m 37s"} |
| {"loss": 0.58474982, "token_acc": 0.81496302, "grad_norm": 0.15614207, "learning_rate": 9.48e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007909, "epoch": 0.05136437, "global_step/max_steps": "12/233", "percentage": "5.15%", "elapsed_time": "24m 50s", "remaining_time": "7h 37m 29s"} |
| {"loss": 0.62716937, "token_acc": 0.82339642, "grad_norm": 0.16823439, "learning_rate": 9.44e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008011, "epoch": 0.05564473, "global_step/max_steps": "13/233", "percentage": "5.58%", "elapsed_time": "26m 35s", "remaining_time": "7h 30m 7s"} |
| {"loss": 0.59660435, "token_acc": 0.84487376, "grad_norm": 0.1322335, "learning_rate": 9.4e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007891, "epoch": 0.05992509, "global_step/max_steps": "14/233", "percentage": "6.01%", "elapsed_time": "29m 7s", "remaining_time": "7h 35m 34s"} |
| {"loss": 0.56707138, "token_acc": 0.85295479, "grad_norm": 0.14563887, "learning_rate": 9.36e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008169, "epoch": 0.06420546, "global_step/max_steps": "15/233", "percentage": "6.44%", "elapsed_time": "30m 9s", "remaining_time": "7h 18m 15s"} |
| {"loss": 0.57920754, "token_acc": 0.83316378, "grad_norm": 0.14242059, "learning_rate": 9.31e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008288, "epoch": 0.06848582, "global_step/max_steps": "16/233", "percentage": "6.87%", "elapsed_time": "31m 43s", "remaining_time": "7h 10m 18s"} |
| {"loss": 0.60998094, "token_acc": 0.84979702, "grad_norm": 0.15288945, "learning_rate": 9.27e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008223, "epoch": 0.07276619, "global_step/max_steps": "17/233", "percentage": "7.30%", "elapsed_time": "34m 0s", "remaining_time": "7h 12m 5s"} |
| {"loss": 0.55885768, "token_acc": 0.79892252, "grad_norm": 0.18015395, "learning_rate": 9.23e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.00823, "epoch": 0.07704655, "global_step/max_steps": "18/233", "percentage": "7.73%", "elapsed_time": "36m 0s", "remaining_time": "7h 10m 2s"} |
| {"loss": 0.57408869, "token_acc": 0.82806532, "grad_norm": 0.16707274, "learning_rate": 9.18e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008308, "epoch": 0.08132691, "global_step/max_steps": "19/233", "percentage": "8.15%", "elapsed_time": "37m 40s", "remaining_time": "7h 4m 16s"} |
| {"loss": 0.63770878, "token_acc": 0.81977924, "grad_norm": 0.15583961, "learning_rate": 9.14e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007956, "epoch": 0.08560728, "global_step/max_steps": "20/233", "percentage": "8.58%", "elapsed_time": "41m 27s", "remaining_time": "7h 21m 27s"} |
| {"eval_loss": 0.57062572, "eval_token_acc": 0.8170977, "eval_runtime": 232.3774, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.25, "epoch": 0.08560728, "global_step/max_steps": "20/233", "percentage": "8.58%", "elapsed_time": "45m 19s", "remaining_time": "8h 2m 42s"} |
| {"loss": 0.6081934, "token_acc": 0.82475915, "grad_norm": 0.13550253, "learning_rate": 9.1e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007128, "epoch": 0.08988764, "global_step/max_steps": "21/233", "percentage": "9.01%", "elapsed_time": "48m 39s", "remaining_time": "8h 11m 10s"} |
| {"loss": 0.53626227, "token_acc": 0.81810307, "grad_norm": 0.12228937, "learning_rate": 9.06e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007297, "epoch": 0.094168, "global_step/max_steps": "22/233", "percentage": "9.44%", "elapsed_time": "49m 48s", "remaining_time": "7h 57m 38s"} |
| {"loss": 0.58173132, "token_acc": 0.81788485, "grad_norm": 0.13067725, "learning_rate": 9.01e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.00744, "epoch": 0.09844837, "global_step/max_steps": "23/233", "percentage": "9.87%", "elapsed_time": "51m 4s", "remaining_time": "7h 46m 22s"} |
| {"loss": 0.59200102, "token_acc": 0.82601231, "grad_norm": 0.13315721, "learning_rate": 8.97e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.00751, "epoch": 0.10272873, "global_step/max_steps": "24/233", "percentage": "10.30%", "elapsed_time": "52m 48s", "remaining_time": "7h 39m 56s"} |
| {"loss": 0.53160775, "token_acc": 0.84765362, "grad_norm": 0.13677236, "learning_rate": 8.93e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.0076, "epoch": 0.1070091, "global_step/max_steps": "25/233", "percentage": "10.73%", "elapsed_time": "54m 22s", "remaining_time": "7h 32m 26s"} |
| {"loss": 0.55127698, "token_acc": 0.85220042, "grad_norm": 0.20182617, "learning_rate": 8.88e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007722, "epoch": 0.11128946, "global_step/max_steps": "26/233", "percentage": "11.16%", "elapsed_time": "55m 40s", "remaining_time": "7h 23m 12s"} |
| {"loss": 0.57834888, "token_acc": 0.82124202, "grad_norm": 0.43602487, "learning_rate": 8.84e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007816, "epoch": 0.11556982, "global_step/max_steps": "27/233", "percentage": "11.59%", "elapsed_time": "57m 7s", "remaining_time": "7h 15m 50s"} |
| {"loss": 0.58652842, "token_acc": 0.8147252, "grad_norm": 0.2080746, "learning_rate": 8.8e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007916, "epoch": 0.11985019, "global_step/max_steps": "28/233", "percentage": "12.02%", "elapsed_time": "58m 30s", "remaining_time": "7h 8m 19s"} |
| {"loss": 0.57351971, "token_acc": 0.83113969, "grad_norm": 0.11781906, "learning_rate": 8.76e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007964, "epoch": 0.12413055, "global_step/max_steps": "29/233", "percentage": "12.45%", "elapsed_time": "1h 0m 14s", "remaining_time": "7h 3m 46s"} |
| {"loss": 0.57332683, "token_acc": 0.82596983, "grad_norm": 0.14135404, "learning_rate": 8.71e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.008097, "epoch": 0.12841091, "global_step/max_steps": "30/233", "percentage": "12.88%", "elapsed_time": "1h 1m 18s", "remaining_time": "6h 54m 48s"} |
| {"eval_loss": 0.56049991, "eval_token_acc": 0.82029019, "eval_runtime": 232.6365, "eval_samples_per_second": 1.986, "eval_steps_per_second": 0.249, "epoch": 0.12841091, "global_step/max_steps": "30/233", "percentage": "12.88%", "elapsed_time": "1h 5m 10s", "remaining_time": "7h 21m 2s"} |
| {"loss": 0.59806776, "token_acc": 0.8235928, "grad_norm": 0.25007367, "learning_rate": 8.67e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007572, "epoch": 0.13269128, "global_step/max_steps": "31/233", "percentage": "13.30%", "elapsed_time": "1h 7m 46s", "remaining_time": "7h 21m 40s"} |
| {"loss": 0.56375688, "token_acc": 0.82393812, "grad_norm": 0.12149891, "learning_rate": 8.63e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007518, "epoch": 0.13697164, "global_step/max_steps": "32/233", "percentage": "13.73%", "elapsed_time": "1h 10m 29s", "remaining_time": "7h 22m 45s"} |
| {"loss": 0.56226754, "token_acc": 0.82827688, "grad_norm": 0.12714922, "learning_rate": 8.58e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007548, "epoch": 0.14125201, "global_step/max_steps": "33/233", "percentage": "14.16%", "elapsed_time": "1h 12m 25s", "remaining_time": "7h 18m 54s"} |
| {"loss": 0.53598464, "token_acc": 0.82137048, "grad_norm": 0.18553115, "learning_rate": 8.54e-06, "memory(GiB)": 127.66, "train_speed(iter/s)": 0.007631, "epoch": 0.14553237, "global_step/max_steps": "34/233", "percentage": "14.59%", "elapsed_time": "1h 13m 48s", "remaining_time": "7h 11m 59s"} |
|
|