rbelanec
/

test

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42f9730df9003ed90ec16eff36447670adf3c6c2825279836b31778b7ba30e50
 size 335717200

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e2d5b595f47cacb4719f8b6331630e3ef29556ccac245bc2b9e9f5c17d12aac
 size 335717200

trainer_log.jsonl CHANGED Viewed

@@ -37,3 +37,11 @@
 {"current_steps": 135, "total_steps": 249, "loss": 0.3506, "lr": 2.6051562213206632e-05, "epoch": 0.5421686746987951, "percentage": 54.22, "elapsed_time": "0:00:31", "remaining_time": "0:00:26", "throughput": 754.7, "total_tokens": 23840}
 {"current_steps": 140, "total_steps": 249, "loss": 0.3597, "lr": 2.429884359310328e-05, "epoch": 0.5622489959839357, "percentage": 56.22, "elapsed_time": "0:00:31", "remaining_time": "0:00:24", "throughput": 776.42, "total_tokens": 24832}
 {"current_steps": 143, "total_steps": 249, "eval_loss": 0.3539418578147888, "epoch": 0.5742971887550201, "percentage": 57.43, "elapsed_time": "0:00:33", "remaining_time": "0:00:24", "throughput": 764.59, "total_tokens": 25312}

 {"current_steps": 135, "total_steps": 249, "loss": 0.3506, "lr": 2.6051562213206632e-05, "epoch": 0.5421686746987951, "percentage": 54.22, "elapsed_time": "0:00:31", "remaining_time": "0:00:26", "throughput": 754.7, "total_tokens": 23840}
 {"current_steps": 140, "total_steps": 249, "loss": 0.3597, "lr": 2.429884359310328e-05, "epoch": 0.5622489959839357, "percentage": 56.22, "elapsed_time": "0:00:31", "remaining_time": "0:00:24", "throughput": 776.42, "total_tokens": 24832}
 {"current_steps": 143, "total_steps": 249, "eval_loss": 0.3539418578147888, "epoch": 0.5742971887550201, "percentage": 57.43, "elapsed_time": "0:00:33", "remaining_time": "0:00:24", "throughput": 764.59, "total_tokens": 25312}
+{"current_steps": 145, "total_steps": 249, "loss": 0.382, "lr": 2.2549571491760986e-05, "epoch": 0.5823293172690763, "percentage": 58.23, "elapsed_time": "0:00:34", "remaining_time": "0:00:24", "throughput": 748.86, "total_tokens": 25648}
+{"current_steps": 150, "total_steps": 249, "loss": 0.3337, "lr": 2.0812344417381595e-05, "epoch": 0.6024096385542169, "percentage": 60.24, "elapsed_time": "0:00:34", "remaining_time": "0:00:22", "throughput": 765.61, "total_tokens": 26496}
+{"current_steps": 155, "total_steps": 249, "loss": 0.3846, "lr": 1.909570167110415e-05, "epoch": 0.6224899598393574, "percentage": 62.25, "elapsed_time": "0:00:34", "remaining_time": "0:00:21", "throughput": 784.09, "total_tokens": 27392}
+{"current_steps": 156, "total_steps": 249, "eval_loss": 0.35141557455062866, "epoch": 0.6265060240963856, "percentage": 62.65, "elapsed_time": "0:00:35", "remaining_time": "0:00:21", "throughput": 772.32, "total_tokens": 27552}
+{"current_steps": 160, "total_steps": 249, "loss": 0.3663, "lr": 1.7408081372259632e-05, "epoch": 0.642570281124498, "percentage": 64.26, "elapsed_time": "0:00:37", "remaining_time": "0:00:21", "throughput": 748.74, "total_tokens": 28272}
+{"current_steps": 165, "total_steps": 249, "loss": 0.3281, "lr": 1.5757778980982626e-05, "epoch": 0.6626506024096386, "percentage": 66.27, "elapsed_time": "0:00:38", "remaining_time": "0:00:19", "throughput": 766.77, "total_tokens": 29184}
+{"current_steps": 169, "total_steps": 249, "eval_loss": 0.35279974341392517, "epoch": 0.678714859437751, "percentage": 67.87, "elapsed_time": "0:00:39", "remaining_time": "0:00:18", "throughput": 766.99, "total_tokens": 29984}
+{"current_steps": 170, "total_steps": 249, "loss": 0.333, "lr": 1.4152906522061048e-05, "epoch": 0.6827309236947792, "percentage": 68.27, "elapsed_time": "0:00:40", "remaining_time": "0:00:18", "throughput": 746.8, "total_tokens": 30128}