| {"loss": 30.96281052, "token_acc": 0.74524095, "grad_norm": 7.9422058, "learning_rate": 3.5e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001132, "epoch": 0.1221374, "global_step/max_steps": "1/24", "percentage": "4.17%", "elapsed_time": "14m 35s", "remaining_time": "5h 35m 38s"} |
| {"loss": 32.10971832, "token_acc": 0.72850925, "grad_norm": 6.52010408, "learning_rate": 7e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001129, "epoch": 0.24427481, "global_step/max_steps": "2/24", "percentage": "8.33%", "elapsed_time": "29m 24s", "remaining_time": "5h 23m 26s"} |
| {"loss": 30.22758484, "token_acc": 0.74227122, "grad_norm": 10.43869129, "learning_rate": 6.964e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001123, "epoch": 0.36641221, "global_step/max_steps": "3/24", "percentage": "12.50%", "elapsed_time": "44m 24s", "remaining_time": "5h 10m 50s"} |
| {"loss": 27.7996254, "token_acc": 0.76933951, "grad_norm": 1.79717579, "learning_rate": 6.858e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001128, "epoch": 0.48854962, "global_step/max_steps": "4/24", "percentage": "16.67%", "elapsed_time": "58m 58s", "remaining_time": "4h 54m 50s"} |
| {"loss": 26.51483536, "token_acc": 0.77443033, "grad_norm": 0.42850104, "learning_rate": 6.684e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001133, "epoch": 0.61068702, "global_step/max_steps": "5/24", "percentage": "20.83%", "elapsed_time": "1h 13m 25s", "remaining_time": "4h 39m 1s"} |
| {"loss": 25.06520462, "token_acc": 0.78730703, "grad_norm": 0.40429934, "learning_rate": 6.444e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001125, "epoch": 0.73282443, "global_step/max_steps": "6/24", "percentage": "25.00%", "elapsed_time": "1h 28m 47s", "remaining_time": "4h 26m 22s"} |
| {"loss": 23.57491112, "token_acc": 0.8091305, "grad_norm": 0.17600205, "learning_rate": 6.145e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001128, "epoch": 0.85496183, "global_step/max_steps": "7/24", "percentage": "29.17%", "elapsed_time": "1h 43m 17s", "remaining_time": "4h 10m 51s"} |
| {"loss": 23.25542068, "token_acc": 0.80147298, "grad_norm": 0.1694329, "learning_rate": 5.792e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001127, "epoch": 0.97709924, "global_step/max_steps": "8/24", "percentage": "33.33%", "elapsed_time": "1h 58m 13s", "remaining_time": "3h 56m 26s"} |
| {"loss": 4.33601952, "token_acc": 0.78835404, "grad_norm": 0.1694329, "learning_rate": 5.392e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001237, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 8s", "remaining_time": "3h 21m 53s"} |
| {"loss": 21.8188076, "token_acc": 0.79576805, "grad_norm": 0.14743451, "learning_rate": 4.954e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 30s", "remaining_time": "3h 9m 42s"} |
| {"eval_loss": 0.6696381, "eval_token_acc": 0.81154384, "eval_runtime": 21.8063, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 52s", "remaining_time": "3h 10m 13s"} |
| {"loss": 21.68900299, "token_acc": 0.78968529, "grad_norm": 0.15730255, "learning_rate": 4.486e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001213, "epoch": 1.24427481, "global_step/max_steps": "11/24", "percentage": "45.83%", "elapsed_time": "2h 31m 0s", "remaining_time": "2h 58m 27s"} |
| {"loss": 20.34348297, "token_acc": 0.81513798, "grad_norm": 0.14047588, "learning_rate": 3.998e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001208, "epoch": 1.36641221, "global_step/max_steps": "12/24", "percentage": "50.00%", "elapsed_time": "2h 45m 29s", "remaining_time": "2h 45m 29s"} |
| {"loss": 19.69247437, "token_acc": 0.81367603, "grad_norm": 0.13805264, "learning_rate": 3.5e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.0012, "epoch": 1.48854962, "global_step/max_steps": "13/24", "percentage": "54.17%", "elapsed_time": "3h 0m 29s", "remaining_time": "2h 32m 43s"} |
| {"loss": 19.99150848, "token_acc": 0.82031954, "grad_norm": 0.13972557, "learning_rate": 3.002e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001189, "epoch": 1.61068702, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "3h 16m 2s", "remaining_time": "2h 20m 1s"} |
| {"loss": 19.0448761, "token_acc": 0.80773965, "grad_norm": 0.11615134, "learning_rate": 2.514e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001188, "epoch": 1.73282443, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "3h 30m 15s", "remaining_time": "2h 6m 9s"} |
| {"loss": 18.81958389, "token_acc": 0.82736064, "grad_norm": 0.10968279, "learning_rate": 2.046e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001186, "epoch": 1.85496183, "global_step/max_steps": "16/24", "percentage": "66.67%", "elapsed_time": "3h 44m 38s", "remaining_time": "1h 52m 19s"} |
| {"loss": 18.56712723, "token_acc": 0.82723235, "grad_norm": 0.16789175, "learning_rate": 1.608e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001181, "epoch": 1.97709924, "global_step/max_steps": "17/24", "percentage": "70.83%", "elapsed_time": "3h 59m 44s", "remaining_time": "1h 38m 43s"} |
| {"loss": 3.45807076, "token_acc": 0.83812147, "grad_norm": 0.16789175, "learning_rate": 1.208e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001235, "epoch": 2.0, "global_step/max_steps": "18/24", "percentage": "75.00%", "elapsed_time": "4h 2m 42s", "remaining_time": "1h 20m 54s"} |
| {"loss": 18.17948341, "token_acc": 0.82936069, "grad_norm": 0.10841891, "learning_rate": 8.55e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 2.1221374, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "4h 17m 32s", "remaining_time": "1h 7m 46s"} |
| {"loss": 18.72079086, "token_acc": 0.81507356, "grad_norm": 0.10311155, "learning_rate": 5.56e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001225, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 31m 57s", "remaining_time": "54m 23s"} |
| {"eval_loss": 0.5854618, "eval_token_acc": 0.82846674, "eval_runtime": 21.8067, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 32m 19s", "remaining_time": "54m 27s"} |
| {"loss": 18.32196236, "token_acc": 0.84050898, "grad_norm": 0.10798978, "learning_rate": 3.16e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001219, "epoch": 2.36641221, "global_step/max_steps": "21/24", "percentage": "87.50%", "elapsed_time": "4h 47m 5s", "remaining_time": "41m 0s"} |
| {"loss": 17.90100861, "token_acc": 0.82744203, "grad_norm": 0.1008019, "learning_rate": 1.42e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001216, "epoch": 2.48854962, "global_step/max_steps": "22/24", "percentage": "91.67%", "elapsed_time": "5h 1m 20s", "remaining_time": "27m 23s"} |
| {"loss": 17.8360672, "token_acc": 0.8334202, "grad_norm": 0.10203252, "learning_rate": 3.6e-07, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001211, "epoch": 2.61068702, "global_step/max_steps": "23/24", "percentage": "95.83%", "elapsed_time": "5h 16m 24s", "remaining_time": "13m 45s"} |
| {"loss": 18.5565834, "token_acc": 0.82515168, "grad_norm": 0.13676309, "learning_rate": 0.0, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001207, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 10s", "remaining_time": "0s"} |
| {"eval_loss": 0.58164978, "eval_token_acc": 0.82967552, "eval_runtime": 21.8292, "eval_samples_per_second": 0.962, "eval_steps_per_second": 0.137, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 32s", "remaining_time": "0s"} |
| {"train_runtime": 19906.1961, "train_samples_per_second": 0.315, "train_steps_per_second": 0.001, "total_flos": 5731493804507136.0, "train_loss": 20.69945669, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 41s", "remaining_time": "0s"} |
|
|