Agentic-V6 / logging.jsonl
jacpetro's picture
Training in progress, step 24
7aafd09 verified
{"loss": 15.60941696, "token_acc": 0.84950753, "grad_norm": 0.31941482, "learning_rate": 6.409e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.012566, "epoch": 1.36641221, "global_step/max_steps": "11/24", "percentage": "45.83%", "elapsed_time": "14m 20s", "remaining_time": "16m 57s"}
{"loss": 15.08524227, "token_acc": 0.84663894, "grad_norm": 0.19521875, "learning_rate": 5.712e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.00683, "epoch": 1.48854962, "global_step/max_steps": "12/24", "percentage": "50.00%", "elapsed_time": "29m 2s", "remaining_time": "29m 2s"}
{"loss": 15.31326485, "token_acc": 0.85482612, "grad_norm": 0.34356144, "learning_rate": 5e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.004866, "epoch": 1.61068702, "global_step/max_steps": "13/24", "percentage": "54.17%", "elapsed_time": "44m 17s", "remaining_time": "37m 28s"}
{"loss": 14.58541107, "token_acc": 0.84268516, "grad_norm": 0.21005033, "learning_rate": 4.288e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.003989, "epoch": 1.73282443, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "58m 14s", "remaining_time": "41m 36s"}
{"loss": 14.47380066, "token_acc": 0.85444938, "grad_norm": 0.21881856, "learning_rate": 3.591e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.003445, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "1h 12m 19s", "remaining_time": "43m 23s"}
{"eval_loss": 0.45699596, "eval_token_acc": 0.85895063, "eval_runtime": 21.4898, "eval_samples_per_second": 0.977, "eval_steps_per_second": 0.14, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "1h 12m 41s", "remaining_time": "43m 36s"}
{"loss": 14.06553078, "token_acc": 0.86124204, "grad_norm": 0.20159593, "learning_rate": 2.923e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.003033, "epoch": 1.97709924, "global_step/max_steps": "16/24", "percentage": "66.67%", "elapsed_time": "1h 27m 40s", "remaining_time": "43m 50s"}
{"loss": 16.11391258, "token_acc": 0.86498041, "grad_norm": 0.20615543, "learning_rate": 2.297e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.00269, "epoch": 2.1221374, "global_step/max_steps": "17/24", "percentage": "70.83%", "elapsed_time": "1h 45m 5s", "remaining_time": "43m 16s"}
{"loss": 14.20277405, "token_acc": 0.85234303, "grad_norm": 0.21431336, "learning_rate": 1.726e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002511, "epoch": 2.24427481, "global_step/max_steps": "18/24", "percentage": "75.00%", "elapsed_time": "1h 59m 12s", "remaining_time": "39m 44s"}
{"loss": 13.74881935, "token_acc": 0.87061663, "grad_norm": 0.25552696, "learning_rate": 1.221e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002367, "epoch": 2.36641221, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "2h 13m 32s", "remaining_time": "35m 8s"}
{"loss": 13.37627411, "token_acc": 0.86427258, "grad_norm": 0.18122235, "learning_rate": 7.94e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002256, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 29s", "remaining_time": "29m 29s"}
{"eval_loss": 0.44094244, "eval_token_acc": 0.86080157, "eval_runtime": 21.4644, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.14, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 51s", "remaining_time": "29m 34s"}
{"loss": 13.05256462, "token_acc": 0.86702151, "grad_norm": 0.39791853, "learning_rate": 4.52e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002147, "epoch": 2.61068702, "global_step/max_steps": "21/24", "percentage": "87.50%", "elapsed_time": "2h 42m 48s", "remaining_time": "23m 15s"}
{"loss": 13.70001602, "token_acc": 0.86057486, "grad_norm": 0.19750436, "learning_rate": 2.03e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002065, "epoch": 2.73282443, "global_step/max_steps": "22/24", "percentage": "91.67%", "elapsed_time": "2h 57m 18s", "remaining_time": "16m 7s"}
{"loss": 13.73007965, "token_acc": 0.87098726, "grad_norm": 0.20478964, "learning_rate": 5.1e-07, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002001, "epoch": 2.85496183, "global_step/max_steps": "23/24", "percentage": "95.83%", "elapsed_time": "3h 11m 20s", "remaining_time": "8m 19s"}
{"loss": 13.49084854, "token_acc": 0.860602, "grad_norm": 0.25566108, "learning_rate": 0.0, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.001939, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 5s", "remaining_time": "0s"}
{"eval_loss": 0.43799019, "eval_token_acc": 0.86136819, "eval_runtime": 21.4221, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.14, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 27s", "remaining_time": "0s"}
{"train_runtime": 12404.4959, "train_samples_per_second": 0.506, "train_steps_per_second": 0.002, "total_flos": 5989836745867264.0, "train_loss": 8.35616481, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 38s", "remaining_time": "0s"}