rbelanec
/

train_cb_123_1760637639

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4239d6f3b8a7811329e696d2744cf049ec07ab2419e99288e0d8326510ea023
 size 1638528

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f0b6e2fd000000e964900a5823257284e4f26b356c6a9f547a1d951f8e1a5d8
 size 1638528

trainer_log.jsonl CHANGED Viewed

@@ -235,3 +235,15 @@
 {"current_steps": 1083, "total_steps": 1140, "eval_loss": 0.39144256711006165, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:34", "remaining_time": "0:00:14", "throughput": 2563.41, "total_tokens": 704816}
 {"current_steps": 1085, "total_steps": 1140, "loss": 0.0003, "lr": 7.332578507216469e-06, "epoch": 19.035087719298247, "percentage": 95.18, "elapsed_time": "0:04:36", "remaining_time": "0:00:14", "throughput": 2553.85, "total_tokens": 706192}
 {"current_steps": 1090, "total_steps": 1140, "loss": 0.0004, "lr": 6.084188161890325e-06, "epoch": 19.12280701754386, "percentage": 95.61, "elapsed_time": "0:04:37", "remaining_time": "0:00:12", "throughput": 2554.11, "total_tokens": 709072}

 {"current_steps": 1083, "total_steps": 1140, "eval_loss": 0.39144256711006165, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:34", "remaining_time": "0:00:14", "throughput": 2563.41, "total_tokens": 704816}
 {"current_steps": 1085, "total_steps": 1140, "loss": 0.0003, "lr": 7.332578507216469e-06, "epoch": 19.035087719298247, "percentage": 95.18, "elapsed_time": "0:04:36", "remaining_time": "0:00:14", "throughput": 2553.85, "total_tokens": 706192}
 {"current_steps": 1090, "total_steps": 1140, "loss": 0.0004, "lr": 6.084188161890325e-06, "epoch": 19.12280701754386, "percentage": 95.61, "elapsed_time": "0:04:37", "remaining_time": "0:00:12", "throughput": 2554.11, "total_tokens": 709072}
+{"current_steps": 1095, "total_steps": 1140, "loss": 0.0001, "lr": 4.95156606941688e-06, "epoch": 19.210526315789473, "percentage": 96.05, "elapsed_time": "0:04:38", "remaining_time": "0:00:11", "throughput": 2555.88, "total_tokens": 712496}
+{"current_steps": 1100, "total_steps": 1140, "loss": 0.0002, "lr": 3.9349777035421194e-06, "epoch": 19.29824561403509, "percentage": 96.49, "elapsed_time": "0:04:39", "remaining_time": "0:00:10", "throughput": 2555.77, "total_tokens": 715056}
+{"current_steps": 1105, "total_steps": 1140, "loss": 0.0002, "lr": 3.034661341025258e-06, "epoch": 19.385964912280702, "percentage": 96.93, "elapsed_time": "0:04:40", "remaining_time": "0:00:08", "throughput": 2557.35, "total_tokens": 718416}
+{"current_steps": 1110, "total_steps": 1140, "loss": 0.0002, "lr": 2.250828005789518e-06, "epoch": 19.473684210526315, "percentage": 97.37, "elapsed_time": "0:04:42", "remaining_time": "0:00:07", "throughput": 2558.9, "total_tokens": 721744}
+{"current_steps": 1115, "total_steps": 1140, "loss": 0.0001, "lr": 1.5836614194602028e-06, "epoch": 19.56140350877193, "percentage": 97.81, "elapsed_time": "0:04:43", "remaining_time": "0:00:06", "throughput": 2560.82, "total_tokens": 725328}
+{"current_steps": 1120, "total_steps": 1140, "loss": 0.0002, "lr": 1.033317958302693e-06, "epoch": 19.649122807017545, "percentage": 98.25, "elapsed_time": "0:04:44", "remaining_time": "0:00:05", "throughput": 2562.99, "total_tokens": 728976}
+{"current_steps": 1125, "total_steps": 1140, "loss": 0.0003, "lr": 5.999266165694906e-07, "epoch": 19.736842105263158, "percentage": 98.68, "elapsed_time": "0:04:45", "remaining_time": "0:00:03", "throughput": 2564.53, "total_tokens": 732368}
+{"current_steps": 1130, "total_steps": 1140, "loss": 0.0002, "lr": 2.8358897626556966e-07, "epoch": 19.82456140350877, "percentage": 99.12, "elapsed_time": "0:04:46", "remaining_time": "0:00:02", "throughput": 2565.43, "total_tokens": 735440}
+{"current_steps": 1135, "total_steps": 1140, "loss": 0.0002, "lr": 8.437918333864537e-08, "epoch": 19.912280701754387, "percentage": 99.56, "elapsed_time": "0:04:47", "remaining_time": "0:00:01", "throughput": 2567.56, "total_tokens": 739120}
+{"current_steps": 1140, "total_steps": 1140, "loss": 0.0001, "lr": 2.343930299963937e-09, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:48", "remaining_time": "0:00:00", "throughput": 2568.83, "total_tokens": 742296}
+{"current_steps": 1140, "total_steps": 1140, "eval_loss": 0.39334043860435486, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:49", "remaining_time": "0:00:00", "throughput": 2562.19, "total_tokens": 742296}
+{"current_steps": 1140, "total_steps": 1140, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:50", "remaining_time": "0:00:00", "throughput": 2554.02, "total_tokens": 742296}