test / trainer_log.jsonl
rbelanec's picture
Training in progress, step 7
1a7843f verified
raw
history blame
9.3 kB
{"current_steps": 5, "total_steps": 125, "loss": 3.6273, "lr": 0.009230769230769232, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:05", "remaining_time": "0:02:23", "throughput": 359.25, "total_tokens": 2144}
{"current_steps": 7, "total_steps": 125, "eval_loss": 6.932018280029297, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:07", "remaining_time": "0:02:11", "throughput": 370.37, "total_tokens": 2880}
{"current_steps": 10, "total_steps": 125, "loss": 6.3813, "lr": 0.02076923076923077, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:00:09", "remaining_time": "0:01:52", "throughput": 423.16, "total_tokens": 4128}
{"current_steps": 14, "total_steps": 125, "eval_loss": 1.6227623224258423, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:00:12", "remaining_time": "0:01:40", "throughput": 468.28, "total_tokens": 5920}
{"current_steps": 15, "total_steps": 125, "loss": 2.5342, "lr": 0.029994099395745794, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:00:13", "remaining_time": "0:01:40", "throughput": 455.1, "total_tokens": 6240}
{"current_steps": 20, "total_steps": 125, "loss": 1.4507, "lr": 0.029788065277773536, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:00:14", "remaining_time": "0:01:17", "throughput": 551.11, "total_tokens": 8096}
{"current_steps": 21, "total_steps": 125, "eval_loss": 0.4040253460407257, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:00:16", "remaining_time": "0:01:20", "throughput": 518.35, "total_tokens": 8416}
{"current_steps": 25, "total_steps": 125, "loss": 1.7771, "lr": 0.02929162684084344, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:00:18", "remaining_time": "0:01:12", "throughput": 560.1, "total_tokens": 10112}
{"current_steps": 28, "total_steps": 125, "eval_loss": 3.618654489517212, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:00:20", "remaining_time": "0:01:09", "throughput": 562.47, "total_tokens": 11264}
{"current_steps": 30, "total_steps": 125, "loss": 3.7935, "lr": 0.028514533018536285, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:00:21", "remaining_time": "0:01:07", "throughput": 566.28, "total_tokens": 12032}
{"current_steps": 35, "total_steps": 125, "loss": 0.7848, "lr": 0.02747204418453818, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:22", "remaining_time": "0:00:57", "throughput": 622.81, "total_tokens": 13824}
{"current_steps": 35, "total_steps": 125, "eval_loss": 0.36673951148986816, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:00:23", "remaining_time": "0:01:00", "throughput": 586.05, "total_tokens": 13824}
{"current_steps": 40, "total_steps": 125, "loss": 0.4314, "lr": 0.026184632473247482, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:54", "throughput": 615.57, "total_tokens": 15840}
{"current_steps": 42, "total_steps": 125, "eval_loss": 0.3661668598651886, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:00:27", "remaining_time": "0:00:54", "throughput": 605.36, "total_tokens": 16672}
{"current_steps": 45, "total_steps": 125, "loss": 0.4096, "lr": 0.024677579753131316, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:00:29", "remaining_time": "0:00:51", "throughput": 614.1, "total_tokens": 17920}
{"current_steps": 49, "total_steps": 125, "eval_loss": 0.5265085101127625, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:00:31", "remaining_time": "0:00:48", "throughput": 615.9, "total_tokens": 19296}
{"current_steps": 50, "total_steps": 125, "loss": 0.2423, "lr": 0.022980481147730047, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:48", "throughput": 606.78, "total_tokens": 19712}
{"current_steps": 55, "total_steps": 125, "loss": 0.5554, "lr": 0.021126663854039943, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:00:33", "remaining_time": "0:00:42", "throughput": 654.54, "total_tokens": 21952}
{"current_steps": 56, "total_steps": 125, "eval_loss": 0.39251771569252014, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:00:35", "remaining_time": "0:00:43", "throughput": 638.28, "total_tokens": 22432}
{"current_steps": 60, "total_steps": 125, "loss": 0.4968, "lr": 0.01915253267137274, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:00:36", "remaining_time": "0:00:40", "throughput": 653.69, "total_tokens": 24160}
{"current_steps": 63, "total_steps": 125, "eval_loss": 2.652535915374756, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:00:38", "remaining_time": "0:00:38", "throughput": 654.32, "total_tokens": 25504}
{"current_steps": 65, "total_steps": 125, "loss": 0.7728, "lr": 0.017096855093032493, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:00:40", "remaining_time": "0:00:37", "throughput": 648.92, "total_tokens": 26112}
{"current_steps": 70, "total_steps": 125, "loss": 0.3298, "lr": 0.015, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:41", "remaining_time": "0:00:32", "throughput": 681.14, "total_tokens": 28064}
{"current_steps": 70, "total_steps": 125, "eval_loss": 0.37763792276382446, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:00:42", "remaining_time": "0:00:33", "throughput": 659.12, "total_tokens": 28064}
{"current_steps": 75, "total_steps": 125, "loss": 0.3663, "lr": 0.012903144906967513, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:00:44", "remaining_time": "0:00:29", "throughput": 670.97, "total_tokens": 29824}
{"current_steps": 77, "total_steps": 125, "eval_loss": 0.36274200677871704, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:00:46", "remaining_time": "0:00:28", "throughput": 663.42, "total_tokens": 30720}
{"current_steps": 80, "total_steps": 125, "loss": 0.3654, "lr": 0.01084746732862726, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:00:48", "remaining_time": "0:00:27", "throughput": 664.61, "total_tokens": 31904}
{"current_steps": 84, "total_steps": 125, "eval_loss": 0.35259029269218445, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:00:50", "remaining_time": "0:00:24", "throughput": 667.23, "total_tokens": 33504}
{"current_steps": 85, "total_steps": 125, "loss": 0.3696, "lr": 0.008873336145960059, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:00:51", "remaining_time": "0:00:24", "throughput": 658.85, "total_tokens": 33984}
{"current_steps": 90, "total_steps": 125, "loss": 0.3495, "lr": 0.007019518852269953, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:00:52", "remaining_time": "0:00:20", "throughput": 680.97, "total_tokens": 35776}
{"current_steps": 91, "total_steps": 125, "eval_loss": 0.3546493351459503, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:00:54", "remaining_time": "0:00:20", "throughput": 667.49, "total_tokens": 36128}
{"current_steps": 95, "total_steps": 125, "loss": 0.412, "lr": 0.005322420246868689, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:00:55", "remaining_time": "0:00:17", "throughput": 670.99, "total_tokens": 37472}
{"current_steps": 98, "total_steps": 125, "eval_loss": 0.3496828079223633, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:00:57", "remaining_time": "0:00:15", "throughput": 666.72, "total_tokens": 38592}
{"current_steps": 100, "total_steps": 125, "loss": 0.3676, "lr": 0.003815367526752516, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:00:59", "remaining_time": "0:00:14", "throughput": 661.88, "total_tokens": 39328}
{"current_steps": 105, "total_steps": 125, "loss": 0.349, "lr": 0.0025279558154618197, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:01:00", "remaining_time": "0:00:11", "throughput": 683.27, "total_tokens": 41280}
{"current_steps": 105, "total_steps": 125, "eval_loss": 0.35384148359298706, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:01:01", "remaining_time": "0:00:11", "throughput": 667.71, "total_tokens": 41280}
{"current_steps": 110, "total_steps": 125, "loss": 0.3482, "lr": 0.0014854669814637145, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "0:01:03", "remaining_time": "0:00:08", "throughput": 682.44, "total_tokens": 43552}
{"current_steps": 112, "total_steps": 125, "eval_loss": 0.3565780818462372, "epoch": 0.896, "percentage": 89.6, "elapsed_time": "0:01:05", "remaining_time": "0:00:07", "throughput": 672.98, "total_tokens": 44160}
{"current_steps": 115, "total_steps": 125, "loss": 0.3258, "lr": 0.000708373159156555, "epoch": 0.92, "percentage": 92.0, "elapsed_time": "0:01:07", "remaining_time": "0:00:05", "throughput": 674.38, "total_tokens": 45216}
{"current_steps": 119, "total_steps": 125, "eval_loss": 0.35846200585365295, "epoch": 0.952, "percentage": 95.2, "elapsed_time": "0:01:09", "remaining_time": "0:00:03", "throughput": 677.12, "total_tokens": 46944}
{"current_steps": 120, "total_steps": 125, "loss": 0.3217, "lr": 0.0002119347222264617, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "0:01:10", "remaining_time": "0:00:02", "throughput": 672.12, "total_tokens": 47360}
{"current_steps": 125, "total_steps": 125, "loss": 0.3608, "lr": 5.90060425420702e-06, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:01:11", "remaining_time": "0:00:00", "throughput": 688.36, "total_tokens": 49376}
{"current_steps": 125, "total_steps": 125, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:01:12", "remaining_time": "0:00:00", "throughput": 678.8, "total_tokens": 49376}