llama-agentic-sft / logging.jsonl
jacpetro's picture
Training in progress, step 5
25d61c9 verified
{"loss": 1.51931369, "token_acc": 0.65133395, "grad_norm": 1.74212694, "learning_rate": 8.11e-06, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.102087, "epoch": 0.00813008, "global_step/max_steps": "1/369", "percentage": "0.27%", "elapsed_time": "4s", "remaining_time": "25m 43s"}
{"loss": 1.25695169, "token_acc": 0.70556827, "grad_norm": 1.53716826, "learning_rate": 1.622e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.146031, "epoch": 0.01626016, "global_step/max_steps": "2/369", "percentage": "0.54%", "elapsed_time": "8s", "remaining_time": "24m 45s"}
{"loss": 1.51197827, "token_acc": 0.64889197, "grad_norm": 1.5586139, "learning_rate": 2.432e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.168869, "epoch": 0.02439024, "global_step/max_steps": "3/369", "percentage": "0.81%", "elapsed_time": "12s", "remaining_time": "24m 44s"}
{"loss": 1.44245231, "token_acc": 0.6585213, "grad_norm": 1.05729473, "learning_rate": 3.243e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.183824, "epoch": 0.03252033, "global_step/max_steps": "4/369", "percentage": "1.08%", "elapsed_time": "16s", "remaining_time": "24m 34s"}
{"loss": 1.11332512, "token_acc": 0.72395833, "grad_norm": 1.61427522, "learning_rate": 4.054e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.196698, "epoch": 0.04065041, "global_step/max_steps": "5/369", "percentage": "1.36%", "elapsed_time": "19s", "remaining_time": "24m 2s"}
{"loss": 1.62357724, "token_acc": 0.60518004, "grad_norm": 1.17167616, "learning_rate": 4.865e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.196566, "epoch": 0.04878049, "global_step/max_steps": "6/369", "percentage": "1.63%", "elapsed_time": "24s", "remaining_time": "25m 7s"}
{"loss": 0.70930511, "token_acc": 0.83585201, "grad_norm": 0.6108681, "learning_rate": 5.676e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.202937, "epoch": 0.05691057, "global_step/max_steps": "7/369", "percentage": "1.90%", "elapsed_time": "28s", "remaining_time": "24m 54s"}
{"loss": 0.7276203, "token_acc": 0.83107275, "grad_norm": 0.76370066, "learning_rate": 6.486e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.2095, "epoch": 0.06504065, "global_step/max_steps": "8/369", "percentage": "2.17%", "elapsed_time": "32s", "remaining_time": "24m 30s"}
{"loss": 1.29925573, "token_acc": 0.6878089, "grad_norm": 1.19990838, "learning_rate": 7.297e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.214803, "epoch": 0.07317073, "global_step/max_steps": "9/369", "percentage": "2.44%", "elapsed_time": "36s", "remaining_time": "24m 11s"}
{"loss": 1.11077285, "token_acc": 0.72534014, "grad_norm": 0.97156239, "learning_rate": 8.108e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.219705, "epoch": 0.08130081, "global_step/max_steps": "10/369", "percentage": "2.71%", "elapsed_time": "39s", "remaining_time": "23m 52s"}
{"loss": 0.97970724, "token_acc": 0.73512748, "grad_norm": 0.94852263, "learning_rate": 8.919e-05, "memory(GiB)": 12.82, "train_speed(iter/s)": 0.219112, "epoch": 0.08943089, "global_step/max_steps": "11/369", "percentage": "2.98%", "elapsed_time": "44s", "remaining_time": "24m 11s"}