Training in progress, step 30528
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +383 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fd691a69779af2453a731e09d16729c689f39bc213fb6b154a67de4839a7c03
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -5739,3 +5739,386 @@
|
|
| 5739 |
{"current_steps": 28620, "total_steps": 38160, "eval_loss": 0.4691649079322815, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "1:26:27", "remaining_time": "0:28:49", "throughput": 3598.63, "total_tokens": 18668536}
|
| 5740 |
{"current_steps": 28625, "total_steps": 38160, "loss": 0.7674, "lr": 8.92330349735711e-06, "epoch": 15.002620545073375, "percentage": 75.01, "elapsed_time": "1:26:30", "remaining_time": "0:28:48", "throughput": 3597.57, "total_tokens": 18672024}
|
| 5741 |
{"current_steps": 28630, "total_steps": 38160, "loss": 0.6076, "lr": 8.914548698623104e-06, "epoch": 15.00524109014675, "percentage": 75.03, "elapsed_time": "1:26:31", "remaining_time": "0:28:47", "throughput": 3597.49, "total_tokens": 18674712}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5739 |
{"current_steps": 28620, "total_steps": 38160, "eval_loss": 0.4691649079322815, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "1:26:27", "remaining_time": "0:28:49", "throughput": 3598.63, "total_tokens": 18668536}
|
| 5740 |
{"current_steps": 28625, "total_steps": 38160, "loss": 0.7674, "lr": 8.92330349735711e-06, "epoch": 15.002620545073375, "percentage": 75.01, "elapsed_time": "1:26:30", "remaining_time": "0:28:48", "throughput": 3597.57, "total_tokens": 18672024}
|
| 5741 |
{"current_steps": 28630, "total_steps": 38160, "loss": 0.6076, "lr": 8.914548698623104e-06, "epoch": 15.00524109014675, "percentage": 75.03, "elapsed_time": "1:26:31", "remaining_time": "0:28:47", "throughput": 3597.49, "total_tokens": 18674712}
|
| 5742 |
+
{"current_steps": 28635, "total_steps": 38160, "loss": 0.4669, "lr": 8.905797264781995e-06, "epoch": 15.007861635220126, "percentage": 75.04, "elapsed_time": "1:26:32", "remaining_time": "0:28:47", "throughput": 3597.79, "total_tokens": 18682488}
|
| 5743 |
+
{"current_steps": 28640, "total_steps": 38160, "loss": 0.4795, "lr": 8.89704919766449e-06, "epoch": 15.0104821802935, "percentage": 75.05, "elapsed_time": "1:26:33", "remaining_time": "0:28:46", "throughput": 3597.87, "total_tokens": 18685816}
|
| 5744 |
+
{"current_steps": 28645, "total_steps": 38160, "loss": 0.511, "lr": 8.888304499100586e-06, "epoch": 15.013102725366876, "percentage": 75.07, "elapsed_time": "1:26:34", "remaining_time": "0:28:45", "throughput": 3597.84, "total_tokens": 18688760}
|
| 5745 |
+
{"current_steps": 28650, "total_steps": 38160, "loss": 0.3944, "lr": 8.879563170919566e-06, "epoch": 15.015723270440251, "percentage": 75.08, "elapsed_time": "1:26:35", "remaining_time": "0:28:44", "throughput": 3597.76, "total_tokens": 18691160}
|
| 5746 |
+
{"current_steps": 28655, "total_steps": 38160, "loss": 0.5243, "lr": 8.870825214950016e-06, "epoch": 15.018343815513626, "percentage": 75.09, "elapsed_time": "1:26:36", "remaining_time": "0:28:43", "throughput": 3597.79, "total_tokens": 18694616}
|
| 5747 |
+
{"current_steps": 28660, "total_steps": 38160, "loss": 0.4111, "lr": 8.862090633019818e-06, "epoch": 15.020964360587001, "percentage": 75.1, "elapsed_time": "1:26:37", "remaining_time": "0:28:42", "throughput": 3597.75, "total_tokens": 18697656}
|
| 5748 |
+
{"current_steps": 28665, "total_steps": 38160, "loss": 0.415, "lr": 8.853359426956146e-06, "epoch": 15.023584905660377, "percentage": 75.12, "elapsed_time": "1:26:37", "remaining_time": "0:28:41", "throughput": 3597.67, "total_tokens": 18700536}
|
| 5749 |
+
{"current_steps": 28670, "total_steps": 38160, "loss": 0.5785, "lr": 8.844631598585482e-06, "epoch": 15.026205450733753, "percentage": 75.13, "elapsed_time": "1:26:38", "remaining_time": "0:28:40", "throughput": 3597.71, "total_tokens": 18703960}
|
| 5750 |
+
{"current_steps": 28675, "total_steps": 38160, "loss": 0.4214, "lr": 8.835907149733569e-06, "epoch": 15.028825995807129, "percentage": 75.14, "elapsed_time": "1:26:39", "remaining_time": "0:28:39", "throughput": 3597.69, "total_tokens": 18706744}
|
| 5751 |
+
{"current_steps": 28680, "total_steps": 38160, "loss": 0.4867, "lr": 8.827186082225477e-06, "epoch": 15.031446540880504, "percentage": 75.16, "elapsed_time": "1:26:40", "remaining_time": "0:28:39", "throughput": 3597.78, "total_tokens": 18710424}
|
| 5752 |
+
{"current_steps": 28685, "total_steps": 38160, "loss": 0.5995, "lr": 8.81846839788554e-06, "epoch": 15.034067085953879, "percentage": 75.17, "elapsed_time": "1:26:41", "remaining_time": "0:28:38", "throughput": 3597.99, "total_tokens": 18715640}
|
| 5753 |
+
{"current_steps": 28690, "total_steps": 38160, "loss": 0.5773, "lr": 8.809754098537407e-06, "epoch": 15.036687631027254, "percentage": 75.18, "elapsed_time": "1:26:42", "remaining_time": "0:28:37", "throughput": 3597.99, "total_tokens": 18718680}
|
| 5754 |
+
{"current_steps": 28695, "total_steps": 38160, "loss": 0.4906, "lr": 8.801043186004016e-06, "epoch": 15.03930817610063, "percentage": 75.2, "elapsed_time": "1:26:43", "remaining_time": "0:28:36", "throughput": 3597.98, "total_tokens": 18721688}
|
| 5755 |
+
{"current_steps": 28700, "total_steps": 38160, "loss": 0.4583, "lr": 8.792335662107575e-06, "epoch": 15.041928721174004, "percentage": 75.21, "elapsed_time": "1:26:44", "remaining_time": "0:28:35", "throughput": 3598.0, "total_tokens": 18724920}
|
| 5756 |
+
{"current_steps": 28705, "total_steps": 38160, "loss": 0.6302, "lr": 8.783631528669605e-06, "epoch": 15.04454926624738, "percentage": 75.22, "elapsed_time": "1:26:45", "remaining_time": "0:28:34", "throughput": 3598.03, "total_tokens": 18728376}
|
| 5757 |
+
{"current_steps": 28710, "total_steps": 38160, "loss": 0.3933, "lr": 8.774930787510924e-06, "epoch": 15.047169811320755, "percentage": 75.24, "elapsed_time": "1:26:46", "remaining_time": "0:28:33", "throughput": 3598.09, "total_tokens": 18731704}
|
| 5758 |
+
{"current_steps": 28715, "total_steps": 38160, "loss": 0.5297, "lr": 8.766233440451608e-06, "epoch": 15.04979035639413, "percentage": 75.25, "elapsed_time": "1:26:46", "remaining_time": "0:28:32", "throughput": 3598.15, "total_tokens": 18734968}
|
| 5759 |
+
{"current_steps": 28720, "total_steps": 38160, "loss": 0.5306, "lr": 8.75753948931105e-06, "epoch": 15.052410901467505, "percentage": 75.26, "elapsed_time": "1:26:47", "remaining_time": "0:28:31", "throughput": 3598.18, "total_tokens": 18738680}
|
| 5760 |
+
{"current_steps": 28725, "total_steps": 38160, "loss": 0.4353, "lr": 8.748848935907936e-06, "epoch": 15.05503144654088, "percentage": 75.28, "elapsed_time": "1:26:48", "remaining_time": "0:28:30", "throughput": 3598.23, "total_tokens": 18742136}
|
| 5761 |
+
{"current_steps": 28730, "total_steps": 38160, "loss": 0.3737, "lr": 8.740161782060214e-06, "epoch": 15.057651991614255, "percentage": 75.29, "elapsed_time": "1:26:49", "remaining_time": "0:28:29", "throughput": 3598.16, "total_tokens": 18745016}
|
| 5762 |
+
{"current_steps": 28735, "total_steps": 38160, "loss": 0.492, "lr": 8.731478029585144e-06, "epoch": 15.06027253668763, "percentage": 75.3, "elapsed_time": "1:26:50", "remaining_time": "0:28:29", "throughput": 3598.31, "total_tokens": 18748920}
|
| 5763 |
+
{"current_steps": 28740, "total_steps": 38160, "loss": 0.4938, "lr": 8.722797680299278e-06, "epoch": 15.062893081761006, "percentage": 75.31, "elapsed_time": "1:26:51", "remaining_time": "0:28:28", "throughput": 3598.5, "total_tokens": 18753560}
|
| 5764 |
+
{"current_steps": 28745, "total_steps": 38160, "loss": 0.5807, "lr": 8.71412073601843e-06, "epoch": 15.065513626834381, "percentage": 75.33, "elapsed_time": "1:26:52", "remaining_time": "0:28:27", "throughput": 3598.5, "total_tokens": 18756952}
|
| 5765 |
+
{"current_steps": 28750, "total_steps": 38160, "loss": 0.5194, "lr": 8.705447198557731e-06, "epoch": 15.068134171907756, "percentage": 75.34, "elapsed_time": "1:26:53", "remaining_time": "0:28:26", "throughput": 3598.54, "total_tokens": 18760472}
|
| 5766 |
+
{"current_steps": 28755, "total_steps": 38160, "loss": 0.4953, "lr": 8.696777069731574e-06, "epoch": 15.070754716981131, "percentage": 75.35, "elapsed_time": "1:26:54", "remaining_time": "0:28:25", "throughput": 3598.56, "total_tokens": 18763928}
|
| 5767 |
+
{"current_steps": 28760, "total_steps": 38160, "loss": 0.495, "lr": 8.688110351353654e-06, "epoch": 15.073375262054507, "percentage": 75.37, "elapsed_time": "1:26:55", "remaining_time": "0:28:24", "throughput": 3598.69, "total_tokens": 18767512}
|
| 5768 |
+
{"current_steps": 28765, "total_steps": 38160, "loss": 0.5838, "lr": 8.679447045236962e-06, "epoch": 15.075995807127883, "percentage": 75.38, "elapsed_time": "1:26:56", "remaining_time": "0:28:23", "throughput": 3598.95, "total_tokens": 18774040}
|
| 5769 |
+
{"current_steps": 28770, "total_steps": 38160, "loss": 0.4197, "lr": 8.670787153193746e-06, "epoch": 15.078616352201259, "percentage": 75.39, "elapsed_time": "1:26:57", "remaining_time": "0:28:22", "throughput": 3599.0, "total_tokens": 18777336}
|
| 5770 |
+
{"current_steps": 28775, "total_steps": 38160, "loss": 0.3594, "lr": 8.662130677035574e-06, "epoch": 15.081236897274634, "percentage": 75.41, "elapsed_time": "1:26:58", "remaining_time": "0:28:21", "throughput": 3599.03, "total_tokens": 18780504}
|
| 5771 |
+
{"current_steps": 28780, "total_steps": 38160, "loss": 0.6688, "lr": 8.65347761857326e-06, "epoch": 15.083857442348009, "percentage": 75.42, "elapsed_time": "1:26:59", "remaining_time": "0:28:21", "throughput": 3599.05, "total_tokens": 18783800}
|
| 5772 |
+
{"current_steps": 28785, "total_steps": 38160, "loss": 0.4435, "lr": 8.64482797961694e-06, "epoch": 15.086477987421384, "percentage": 75.43, "elapsed_time": "1:26:59", "remaining_time": "0:28:20", "throughput": 3599.09, "total_tokens": 18786968}
|
| 5773 |
+
{"current_steps": 28790, "total_steps": 38160, "loss": 0.4559, "lr": 8.636181761976016e-06, "epoch": 15.08909853249476, "percentage": 75.45, "elapsed_time": "1:27:00", "remaining_time": "0:28:19", "throughput": 3599.05, "total_tokens": 18789720}
|
| 5774 |
+
{"current_steps": 28795, "total_steps": 38160, "loss": 0.4549, "lr": 8.62753896745919e-06, "epoch": 15.091719077568134, "percentage": 75.46, "elapsed_time": "1:27:01", "remaining_time": "0:28:18", "throughput": 3599.07, "total_tokens": 18792952}
|
| 5775 |
+
{"current_steps": 28800, "total_steps": 38160, "loss": 0.4166, "lr": 8.618899597874413e-06, "epoch": 15.09433962264151, "percentage": 75.47, "elapsed_time": "1:27:02", "remaining_time": "0:28:17", "throughput": 3598.96, "total_tokens": 18795480}
|
| 5776 |
+
{"current_steps": 28805, "total_steps": 38160, "loss": 0.3336, "lr": 8.610263655028964e-06, "epoch": 15.096960167714885, "percentage": 75.48, "elapsed_time": "1:27:03", "remaining_time": "0:28:16", "throughput": 3599.0, "total_tokens": 18798648}
|
| 5777 |
+
{"current_steps": 28810, "total_steps": 38160, "loss": 0.5526, "lr": 8.601631140729366e-06, "epoch": 15.09958071278826, "percentage": 75.5, "elapsed_time": "1:27:04", "remaining_time": "0:28:15", "throughput": 3599.12, "total_tokens": 18802680}
|
| 5778 |
+
{"current_steps": 28815, "total_steps": 38160, "loss": 0.5347, "lr": 8.593002056781451e-06, "epoch": 15.102201257861635, "percentage": 75.51, "elapsed_time": "1:27:05", "remaining_time": "0:28:14", "throughput": 3599.05, "total_tokens": 18805112}
|
| 5779 |
+
{"current_steps": 28820, "total_steps": 38160, "loss": 0.4404, "lr": 8.584376404990326e-06, "epoch": 15.10482180293501, "percentage": 75.52, "elapsed_time": "1:27:06", "remaining_time": "0:28:13", "throughput": 3599.15, "total_tokens": 18809272}
|
| 5780 |
+
{"current_steps": 28825, "total_steps": 38160, "loss": 0.4508, "lr": 8.575754187160384e-06, "epoch": 15.107442348008385, "percentage": 75.54, "elapsed_time": "1:27:06", "remaining_time": "0:28:12", "throughput": 3599.2, "total_tokens": 18812440}
|
| 5781 |
+
{"current_steps": 28830, "total_steps": 38160, "loss": 0.6371, "lr": 8.567135405095291e-06, "epoch": 15.11006289308176, "percentage": 75.55, "elapsed_time": "1:27:07", "remaining_time": "0:28:11", "throughput": 3599.11, "total_tokens": 18815288}
|
| 5782 |
+
{"current_steps": 28835, "total_steps": 38160, "loss": 0.5749, "lr": 8.558520060597985e-06, "epoch": 15.112683438155136, "percentage": 75.56, "elapsed_time": "1:27:08", "remaining_time": "0:28:10", "throughput": 3599.16, "total_tokens": 18818424}
|
| 5783 |
+
{"current_steps": 28840, "total_steps": 38160, "loss": 0.5404, "lr": 8.54990815547071e-06, "epoch": 15.115303983228511, "percentage": 75.58, "elapsed_time": "1:27:09", "remaining_time": "0:28:09", "throughput": 3599.11, "total_tokens": 18821144}
|
| 5784 |
+
{"current_steps": 28845, "total_steps": 38160, "loss": 0.4012, "lr": 8.541299691514974e-06, "epoch": 15.117924528301886, "percentage": 75.59, "elapsed_time": "1:27:10", "remaining_time": "0:28:09", "throughput": 3599.17, "total_tokens": 18824632}
|
| 5785 |
+
{"current_steps": 28850, "total_steps": 38160, "loss": 0.5228, "lr": 8.53269467053158e-06, "epoch": 15.120545073375261, "percentage": 75.6, "elapsed_time": "1:27:11", "remaining_time": "0:28:08", "throughput": 3599.12, "total_tokens": 18827224}
|
| 5786 |
+
{"current_steps": 28855, "total_steps": 38160, "loss": 0.5148, "lr": 8.524093094320593e-06, "epoch": 15.123165618448636, "percentage": 75.62, "elapsed_time": "1:27:11", "remaining_time": "0:28:07", "throughput": 3599.09, "total_tokens": 18830072}
|
| 5787 |
+
{"current_steps": 28860, "total_steps": 38160, "loss": 0.5882, "lr": 8.515494964681357e-06, "epoch": 15.125786163522013, "percentage": 75.63, "elapsed_time": "1:27:12", "remaining_time": "0:28:06", "throughput": 3599.06, "total_tokens": 18832856}
|
| 5788 |
+
{"current_steps": 28865, "total_steps": 38160, "loss": 0.6785, "lr": 8.506900283412506e-06, "epoch": 15.128406708595389, "percentage": 75.64, "elapsed_time": "1:27:13", "remaining_time": "0:28:05", "throughput": 3599.0, "total_tokens": 18835544}
|
| 5789 |
+
{"current_steps": 28870, "total_steps": 38160, "loss": 0.488, "lr": 8.498309052311953e-06, "epoch": 15.131027253668764, "percentage": 75.66, "elapsed_time": "1:27:14", "remaining_time": "0:28:04", "throughput": 3598.92, "total_tokens": 18838360}
|
| 5790 |
+
{"current_steps": 28875, "total_steps": 38160, "loss": 0.6165, "lr": 8.489721273176887e-06, "epoch": 15.133647798742139, "percentage": 75.67, "elapsed_time": "1:27:15", "remaining_time": "0:28:03", "throughput": 3599.03, "total_tokens": 18842168}
|
| 5791 |
+
{"current_steps": 28880, "total_steps": 38160, "loss": 0.504, "lr": 8.481136947803777e-06, "epoch": 15.136268343815514, "percentage": 75.68, "elapsed_time": "1:27:16", "remaining_time": "0:28:02", "throughput": 3599.05, "total_tokens": 18845496}
|
| 5792 |
+
{"current_steps": 28885, "total_steps": 38160, "loss": 0.3566, "lr": 8.472556077988362e-06, "epoch": 15.13888888888889, "percentage": 75.69, "elapsed_time": "1:27:17", "remaining_time": "0:28:01", "throughput": 3599.27, "total_tokens": 18850232}
|
| 5793 |
+
{"current_steps": 28890, "total_steps": 38160, "loss": 0.5382, "lr": 8.46397866552565e-06, "epoch": 15.141509433962264, "percentage": 75.71, "elapsed_time": "1:27:18", "remaining_time": "0:28:00", "throughput": 3599.12, "total_tokens": 18852536}
|
| 5794 |
+
{"current_steps": 28895, "total_steps": 38160, "loss": 0.5532, "lr": 8.455404712209952e-06, "epoch": 15.14412997903564, "percentage": 75.72, "elapsed_time": "1:27:18", "remaining_time": "0:27:59", "throughput": 3599.1, "total_tokens": 18855544}
|
| 5795 |
+
{"current_steps": 28900, "total_steps": 38160, "loss": 0.5829, "lr": 8.446834219834836e-06, "epoch": 15.146750524109015, "percentage": 75.73, "elapsed_time": "1:27:19", "remaining_time": "0:27:58", "throughput": 3599.13, "total_tokens": 18858584}
|
| 5796 |
+
{"current_steps": 28905, "total_steps": 38160, "loss": 0.4823, "lr": 8.43826719019316e-06, "epoch": 15.14937106918239, "percentage": 75.75, "elapsed_time": "1:27:20", "remaining_time": "0:27:57", "throughput": 3599.15, "total_tokens": 18861688}
|
| 5797 |
+
{"current_steps": 28910, "total_steps": 38160, "loss": 0.4706, "lr": 8.429703625077043e-06, "epoch": 15.151991614255765, "percentage": 75.76, "elapsed_time": "1:27:21", "remaining_time": "0:27:57", "throughput": 3599.05, "total_tokens": 18864056}
|
| 5798 |
+
{"current_steps": 28915, "total_steps": 38160, "loss": 0.4574, "lr": 8.421143526277875e-06, "epoch": 15.15461215932914, "percentage": 75.77, "elapsed_time": "1:27:22", "remaining_time": "0:27:56", "throughput": 3599.17, "total_tokens": 18867960}
|
| 5799 |
+
{"current_steps": 28920, "total_steps": 38160, "loss": 0.3854, "lr": 8.41258689558634e-06, "epoch": 15.157232704402515, "percentage": 75.79, "elapsed_time": "1:27:23", "remaining_time": "0:27:55", "throughput": 3599.09, "total_tokens": 18870328}
|
| 5800 |
+
{"current_steps": 28925, "total_steps": 38160, "loss": 0.4766, "lr": 8.404033734792386e-06, "epoch": 15.15985324947589, "percentage": 75.8, "elapsed_time": "1:27:23", "remaining_time": "0:27:54", "throughput": 3599.22, "total_tokens": 18873976}
|
| 5801 |
+
{"current_steps": 28930, "total_steps": 38160, "loss": 0.322, "lr": 8.395484045685248e-06, "epoch": 15.162473794549266, "percentage": 75.81, "elapsed_time": "1:27:24", "remaining_time": "0:27:53", "throughput": 3599.23, "total_tokens": 18877112}
|
| 5802 |
+
{"current_steps": 28935, "total_steps": 38160, "loss": 0.4765, "lr": 8.386937830053412e-06, "epoch": 15.165094339622641, "percentage": 75.83, "elapsed_time": "1:27:25", "remaining_time": "0:27:52", "throughput": 3599.14, "total_tokens": 18879480}
|
| 5803 |
+
{"current_steps": 28940, "total_steps": 38160, "loss": 0.4011, "lr": 8.37839508968464e-06, "epoch": 15.167714884696016, "percentage": 75.84, "elapsed_time": "1:27:26", "remaining_time": "0:27:51", "throughput": 3599.09, "total_tokens": 18882072}
|
| 5804 |
+
{"current_steps": 28945, "total_steps": 38160, "loss": 0.5568, "lr": 8.369855826365988e-06, "epoch": 15.170335429769391, "percentage": 75.85, "elapsed_time": "1:27:27", "remaining_time": "0:27:50", "throughput": 3599.05, "total_tokens": 18884760}
|
| 5805 |
+
{"current_steps": 28950, "total_steps": 38160, "loss": 0.4662, "lr": 8.361320041883772e-06, "epoch": 15.172955974842766, "percentage": 75.86, "elapsed_time": "1:27:28", "remaining_time": "0:27:49", "throughput": 3599.01, "total_tokens": 18887736}
|
| 5806 |
+
{"current_steps": 28955, "total_steps": 38160, "loss": 0.4548, "lr": 8.352787738023576e-06, "epoch": 15.175576519916143, "percentage": 75.88, "elapsed_time": "1:27:28", "remaining_time": "0:27:48", "throughput": 3598.95, "total_tokens": 18890584}
|
| 5807 |
+
{"current_steps": 28960, "total_steps": 38160, "loss": 0.4237, "lr": 8.344258916570274e-06, "epoch": 15.178197064989519, "percentage": 75.89, "elapsed_time": "1:27:29", "remaining_time": "0:27:47", "throughput": 3598.85, "total_tokens": 18893208}
|
| 5808 |
+
{"current_steps": 28965, "total_steps": 38160, "loss": 0.4293, "lr": 8.335733579307988e-06, "epoch": 15.180817610062894, "percentage": 75.9, "elapsed_time": "1:27:30", "remaining_time": "0:27:46", "throughput": 3598.76, "total_tokens": 18895864}
|
| 5809 |
+
{"current_steps": 28970, "total_steps": 38160, "loss": 0.4353, "lr": 8.327211728020113e-06, "epoch": 15.183438155136269, "percentage": 75.92, "elapsed_time": "1:27:31", "remaining_time": "0:27:45", "throughput": 3598.75, "total_tokens": 18898712}
|
| 5810 |
+
{"current_steps": 28975, "total_steps": 38160, "loss": 0.4746, "lr": 8.31869336448933e-06, "epoch": 15.186058700209644, "percentage": 75.93, "elapsed_time": "1:27:32", "remaining_time": "0:27:44", "throughput": 3598.77, "total_tokens": 18901624}
|
| 5811 |
+
{"current_steps": 28980, "total_steps": 38160, "loss": 0.395, "lr": 8.310178490497586e-06, "epoch": 15.18867924528302, "percentage": 75.94, "elapsed_time": "1:27:33", "remaining_time": "0:27:44", "throughput": 3598.85, "total_tokens": 18905176}
|
| 5812 |
+
{"current_steps": 28985, "total_steps": 38160, "loss": 0.3391, "lr": 8.301667107826103e-06, "epoch": 15.191299790356394, "percentage": 75.96, "elapsed_time": "1:27:33", "remaining_time": "0:27:43", "throughput": 3598.82, "total_tokens": 18907864}
|
| 5813 |
+
{"current_steps": 28990, "total_steps": 38160, "loss": 0.3853, "lr": 8.293159218255345e-06, "epoch": 15.19392033542977, "percentage": 75.97, "elapsed_time": "1:27:34", "remaining_time": "0:27:42", "throughput": 3598.89, "total_tokens": 18911064}
|
| 5814 |
+
{"current_steps": 28995, "total_steps": 38160, "loss": 0.5816, "lr": 8.284654823565088e-06, "epoch": 15.196540880503145, "percentage": 75.98, "elapsed_time": "1:27:35", "remaining_time": "0:27:41", "throughput": 3598.91, "total_tokens": 18914136}
|
| 5815 |
+
{"current_steps": 29000, "total_steps": 38160, "loss": 0.4548, "lr": 8.276153925534333e-06, "epoch": 15.19916142557652, "percentage": 76.0, "elapsed_time": "1:27:36", "remaining_time": "0:27:40", "throughput": 3598.88, "total_tokens": 18917048}
|
| 5816 |
+
{"current_steps": 29005, "total_steps": 38160, "loss": 0.5899, "lr": 8.267656525941383e-06, "epoch": 15.201781970649895, "percentage": 76.01, "elapsed_time": "1:27:37", "remaining_time": "0:27:39", "throughput": 3598.87, "total_tokens": 18919864}
|
| 5817 |
+
{"current_steps": 29010, "total_steps": 38160, "loss": 0.4487, "lr": 8.259162626563801e-06, "epoch": 15.20440251572327, "percentage": 76.02, "elapsed_time": "1:27:37", "remaining_time": "0:27:38", "throughput": 3598.9, "total_tokens": 18922968}
|
| 5818 |
+
{"current_steps": 29015, "total_steps": 38160, "loss": 0.4257, "lr": 8.250672229178402e-06, "epoch": 15.207023060796645, "percentage": 76.04, "elapsed_time": "1:27:38", "remaining_time": "0:27:37", "throughput": 3598.98, "total_tokens": 18926328}
|
| 5819 |
+
{"current_steps": 29020, "total_steps": 38160, "loss": 0.4428, "lr": 8.242185335561295e-06, "epoch": 15.20964360587002, "percentage": 76.05, "elapsed_time": "1:27:39", "remaining_time": "0:27:36", "throughput": 3599.02, "total_tokens": 18929368}
|
| 5820 |
+
{"current_steps": 29025, "total_steps": 38160, "loss": 0.4446, "lr": 8.23370194748782e-06, "epoch": 15.212264150943396, "percentage": 76.06, "elapsed_time": "1:27:40", "remaining_time": "0:27:35", "throughput": 3598.99, "total_tokens": 18932056}
|
| 5821 |
+
{"current_steps": 29030, "total_steps": 38160, "loss": 0.593, "lr": 8.22522206673262e-06, "epoch": 15.214884696016771, "percentage": 76.07, "elapsed_time": "1:27:41", "remaining_time": "0:27:34", "throughput": 3599.11, "total_tokens": 18935896}
|
| 5822 |
+
{"current_steps": 29035, "total_steps": 38160, "loss": 0.432, "lr": 8.216745695069589e-06, "epoch": 15.217505241090146, "percentage": 76.09, "elapsed_time": "1:27:42", "remaining_time": "0:27:33", "throughput": 3599.16, "total_tokens": 18938968}
|
| 5823 |
+
{"current_steps": 29040, "total_steps": 38160, "loss": 0.5405, "lr": 8.208272834271894e-06, "epoch": 15.220125786163521, "percentage": 76.1, "elapsed_time": "1:27:42", "remaining_time": "0:27:32", "throughput": 3599.28, "total_tokens": 18943000}
|
| 5824 |
+
{"current_steps": 29045, "total_steps": 38160, "loss": 0.5312, "lr": 8.19980348611194e-06, "epoch": 15.222746331236896, "percentage": 76.11, "elapsed_time": "1:27:43", "remaining_time": "0:27:31", "throughput": 3599.33, "total_tokens": 18946104}
|
| 5825 |
+
{"current_steps": 29050, "total_steps": 38160, "loss": 0.4615, "lr": 8.191337652361439e-06, "epoch": 15.225366876310272, "percentage": 76.13, "elapsed_time": "1:27:44", "remaining_time": "0:27:31", "throughput": 3599.43, "total_tokens": 18950328}
|
| 5826 |
+
{"current_steps": 29055, "total_steps": 38160, "loss": 0.4069, "lr": 8.182875334791332e-06, "epoch": 15.227987421383649, "percentage": 76.14, "elapsed_time": "1:27:45", "remaining_time": "0:27:30", "throughput": 3599.42, "total_tokens": 18953144}
|
| 5827 |
+
{"current_steps": 29060, "total_steps": 38160, "loss": 0.5555, "lr": 8.174416535171841e-06, "epoch": 15.230607966457024, "percentage": 76.15, "elapsed_time": "1:27:47", "remaining_time": "0:27:29", "throughput": 3599.59, "total_tokens": 18960248}
|
| 5828 |
+
{"current_steps": 29065, "total_steps": 38160, "loss": 0.4454, "lr": 8.165961255272467e-06, "epoch": 15.233228511530399, "percentage": 76.17, "elapsed_time": "1:27:48", "remaining_time": "0:27:28", "throughput": 3599.47, "total_tokens": 18962744}
|
| 5829 |
+
{"current_steps": 29070, "total_steps": 38160, "loss": 0.4332, "lr": 8.157509496861935e-06, "epoch": 15.235849056603774, "percentage": 76.18, "elapsed_time": "1:27:49", "remaining_time": "0:27:27", "throughput": 3599.45, "total_tokens": 18965816}
|
| 5830 |
+
{"current_steps": 29075, "total_steps": 38160, "loss": 0.4837, "lr": 8.149061261708266e-06, "epoch": 15.23846960167715, "percentage": 76.19, "elapsed_time": "1:27:50", "remaining_time": "0:27:26", "throughput": 3599.48, "total_tokens": 18969560}
|
| 5831 |
+
{"current_steps": 29080, "total_steps": 38160, "loss": 0.2982, "lr": 8.140616551578745e-06, "epoch": 15.241090146750524, "percentage": 76.21, "elapsed_time": "1:27:51", "remaining_time": "0:27:25", "throughput": 3599.46, "total_tokens": 18972792}
|
| 5832 |
+
{"current_steps": 29085, "total_steps": 38160, "loss": 0.3788, "lr": 8.13217536823989e-06, "epoch": 15.2437106918239, "percentage": 76.22, "elapsed_time": "1:27:51", "remaining_time": "0:27:24", "throughput": 3599.44, "total_tokens": 18975640}
|
| 5833 |
+
{"current_steps": 29090, "total_steps": 38160, "loss": 0.4215, "lr": 8.12373771345752e-06, "epoch": 15.246331236897275, "percentage": 76.23, "elapsed_time": "1:27:52", "remaining_time": "0:27:23", "throughput": 3599.45, "total_tokens": 18978840}
|
| 5834 |
+
{"current_steps": 29095, "total_steps": 38160, "loss": 0.4499, "lr": 8.115303588996676e-06, "epoch": 15.24895178197065, "percentage": 76.24, "elapsed_time": "1:27:53", "remaining_time": "0:27:23", "throughput": 3599.38, "total_tokens": 18981464}
|
| 5835 |
+
{"current_steps": 29100, "total_steps": 38160, "loss": 0.4197, "lr": 8.10687299662169e-06, "epoch": 15.251572327044025, "percentage": 76.26, "elapsed_time": "1:27:54", "remaining_time": "0:27:22", "throughput": 3599.48, "total_tokens": 18985368}
|
| 5836 |
+
{"current_steps": 29105, "total_steps": 38160, "loss": 0.5567, "lr": 8.098445938096147e-06, "epoch": 15.2541928721174, "percentage": 76.27, "elapsed_time": "1:27:55", "remaining_time": "0:27:21", "throughput": 3599.46, "total_tokens": 18988120}
|
| 5837 |
+
{"current_steps": 29110, "total_steps": 38160, "loss": 0.5829, "lr": 8.090022415182898e-06, "epoch": 15.256813417190775, "percentage": 76.28, "elapsed_time": "1:27:56", "remaining_time": "0:27:20", "throughput": 3599.55, "total_tokens": 18991640}
|
| 5838 |
+
{"current_steps": 29115, "total_steps": 38160, "loss": 0.5589, "lr": 8.081602429644033e-06, "epoch": 15.25943396226415, "percentage": 76.3, "elapsed_time": "1:27:56", "remaining_time": "0:27:19", "throughput": 3599.54, "total_tokens": 18994552}
|
| 5839 |
+
{"current_steps": 29120, "total_steps": 38160, "loss": 0.5996, "lr": 8.073185983240932e-06, "epoch": 15.262054507337526, "percentage": 76.31, "elapsed_time": "1:27:57", "remaining_time": "0:27:18", "throughput": 3599.72, "total_tokens": 18999224}
|
| 5840 |
+
{"current_steps": 29125, "total_steps": 38160, "loss": 0.7442, "lr": 8.064773077734206e-06, "epoch": 15.264675052410901, "percentage": 76.32, "elapsed_time": "1:27:58", "remaining_time": "0:27:17", "throughput": 3599.79, "total_tokens": 19002648}
|
| 5841 |
+
{"current_steps": 29130, "total_steps": 38160, "loss": 0.416, "lr": 8.05636371488374e-06, "epoch": 15.267295597484276, "percentage": 76.34, "elapsed_time": "1:27:59", "remaining_time": "0:27:16", "throughput": 3599.81, "total_tokens": 19005976}
|
| 5842 |
+
{"current_steps": 29135, "total_steps": 38160, "loss": 0.4823, "lr": 8.047957896448696e-06, "epoch": 15.269916142557651, "percentage": 76.35, "elapsed_time": "1:28:00", "remaining_time": "0:27:15", "throughput": 3599.87, "total_tokens": 19009144}
|
| 5843 |
+
{"current_steps": 29140, "total_steps": 38160, "loss": 0.3932, "lr": 8.039555624187451e-06, "epoch": 15.272536687631026, "percentage": 76.36, "elapsed_time": "1:28:01", "remaining_time": "0:27:14", "throughput": 3599.81, "total_tokens": 19011896}
|
| 5844 |
+
{"current_steps": 29145, "total_steps": 38160, "loss": 0.6121, "lr": 8.031156899857681e-06, "epoch": 15.275157232704402, "percentage": 76.38, "elapsed_time": "1:28:02", "remaining_time": "0:27:13", "throughput": 3599.83, "total_tokens": 19015256}
|
| 5845 |
+
{"current_steps": 29150, "total_steps": 38160, "loss": 0.578, "lr": 8.022761725216288e-06, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "1:28:03", "remaining_time": "0:27:12", "throughput": 3599.85, "total_tokens": 19018360}
|
| 5846 |
+
{"current_steps": 29155, "total_steps": 38160, "loss": 0.324, "lr": 8.014370102019456e-06, "epoch": 15.280398322851154, "percentage": 76.4, "elapsed_time": "1:28:04", "remaining_time": "0:27:12", "throughput": 3599.92, "total_tokens": 19022136}
|
| 5847 |
+
{"current_steps": 29160, "total_steps": 38160, "loss": 0.5324, "lr": 8.005982032022616e-06, "epoch": 15.283018867924529, "percentage": 76.42, "elapsed_time": "1:28:04", "remaining_time": "0:27:11", "throughput": 3599.92, "total_tokens": 19024920}
|
| 5848 |
+
{"current_steps": 29165, "total_steps": 38160, "loss": 0.389, "lr": 7.997597516980467e-06, "epoch": 15.285639412997904, "percentage": 76.43, "elapsed_time": "1:28:05", "remaining_time": "0:27:10", "throughput": 3600.12, "total_tokens": 19029464}
|
| 5849 |
+
{"current_steps": 29170, "total_steps": 38160, "loss": 0.5293, "lr": 7.989216558646942e-06, "epoch": 15.28825995807128, "percentage": 76.44, "elapsed_time": "1:28:06", "remaining_time": "0:27:09", "throughput": 3600.21, "total_tokens": 19033272}
|
| 5850 |
+
{"current_steps": 29175, "total_steps": 38160, "loss": 0.5148, "lr": 7.980839158775235e-06, "epoch": 15.290880503144654, "percentage": 76.45, "elapsed_time": "1:28:07", "remaining_time": "0:27:08", "throughput": 3600.15, "total_tokens": 19035736}
|
| 5851 |
+
{"current_steps": 29180, "total_steps": 38160, "loss": 0.5262, "lr": 7.972465319117814e-06, "epoch": 15.29350104821803, "percentage": 76.47, "elapsed_time": "1:28:08", "remaining_time": "0:27:07", "throughput": 3600.19, "total_tokens": 19038904}
|
| 5852 |
+
{"current_steps": 29185, "total_steps": 38160, "loss": 0.4433, "lr": 7.964095041426386e-06, "epoch": 15.296121593291405, "percentage": 76.48, "elapsed_time": "1:28:09", "remaining_time": "0:27:06", "throughput": 3600.24, "total_tokens": 19042424}
|
| 5853 |
+
{"current_steps": 29190, "total_steps": 38160, "loss": 0.3429, "lr": 7.95572832745192e-06, "epoch": 15.29874213836478, "percentage": 76.49, "elapsed_time": "1:28:10", "remaining_time": "0:27:05", "throughput": 3600.31, "total_tokens": 19045784}
|
| 5854 |
+
{"current_steps": 29195, "total_steps": 38160, "loss": 0.4525, "lr": 7.947365178944643e-06, "epoch": 15.301362683438155, "percentage": 76.51, "elapsed_time": "1:28:10", "remaining_time": "0:27:04", "throughput": 3600.32, "total_tokens": 19049016}
|
| 5855 |
+
{"current_steps": 29200, "total_steps": 38160, "loss": 0.4057, "lr": 7.939005597654025e-06, "epoch": 15.30398322851153, "percentage": 76.52, "elapsed_time": "1:28:11", "remaining_time": "0:27:03", "throughput": 3600.36, "total_tokens": 19052536}
|
| 5856 |
+
{"current_steps": 29205, "total_steps": 38160, "loss": 0.3857, "lr": 7.930649585328787e-06, "epoch": 15.306603773584905, "percentage": 76.53, "elapsed_time": "1:28:12", "remaining_time": "0:27:02", "throughput": 3600.3, "total_tokens": 19055480}
|
| 5857 |
+
{"current_steps": 29210, "total_steps": 38160, "loss": 0.4954, "lr": 7.922297143716918e-06, "epoch": 15.30922431865828, "percentage": 76.55, "elapsed_time": "1:28:13", "remaining_time": "0:27:02", "throughput": 3600.39, "total_tokens": 19059416}
|
| 5858 |
+
{"current_steps": 29215, "total_steps": 38160, "loss": 0.3857, "lr": 7.913948274565652e-06, "epoch": 15.311844863731656, "percentage": 76.56, "elapsed_time": "1:28:14", "remaining_time": "0:27:01", "throughput": 3600.37, "total_tokens": 19062360}
|
| 5859 |
+
{"current_steps": 29220, "total_steps": 38160, "loss": 0.4377, "lr": 7.905602979621491e-06, "epoch": 15.314465408805031, "percentage": 76.57, "elapsed_time": "1:28:15", "remaining_time": "0:27:00", "throughput": 3600.33, "total_tokens": 19065400}
|
| 5860 |
+
{"current_steps": 29225, "total_steps": 38160, "loss": 0.4925, "lr": 7.897261260630159e-06, "epoch": 15.317085953878406, "percentage": 76.59, "elapsed_time": "1:28:16", "remaining_time": "0:26:59", "throughput": 3600.31, "total_tokens": 19068376}
|
| 5861 |
+
{"current_steps": 29230, "total_steps": 38160, "loss": 0.3572, "lr": 7.888923119336647e-06, "epoch": 15.319706498951781, "percentage": 76.6, "elapsed_time": "1:28:17", "remaining_time": "0:26:58", "throughput": 3600.24, "total_tokens": 19071032}
|
| 5862 |
+
{"current_steps": 29235, "total_steps": 38160, "loss": 0.4963, "lr": 7.880588557485203e-06, "epoch": 15.322327044025156, "percentage": 76.61, "elapsed_time": "1:28:18", "remaining_time": "0:26:57", "throughput": 3600.21, "total_tokens": 19073944}
|
| 5863 |
+
{"current_steps": 29240, "total_steps": 38160, "loss": 0.4892, "lr": 7.872257576819325e-06, "epoch": 15.324947589098532, "percentage": 76.62, "elapsed_time": "1:28:18", "remaining_time": "0:26:56", "throughput": 3600.13, "total_tokens": 19076760}
|
| 5864 |
+
{"current_steps": 29245, "total_steps": 38160, "loss": 0.6066, "lr": 7.863930179081769e-06, "epoch": 15.327568134171909, "percentage": 76.64, "elapsed_time": "1:28:19", "remaining_time": "0:26:55", "throughput": 3600.07, "total_tokens": 19079480}
|
| 5865 |
+
{"current_steps": 29250, "total_steps": 38160, "loss": 0.5747, "lr": 7.855606366014517e-06, "epoch": 15.330188679245284, "percentage": 76.65, "elapsed_time": "1:28:20", "remaining_time": "0:26:54", "throughput": 3599.97, "total_tokens": 19081944}
|
| 5866 |
+
{"current_steps": 29255, "total_steps": 38160, "loss": 0.4671, "lr": 7.847286139358814e-06, "epoch": 15.332809224318659, "percentage": 76.66, "elapsed_time": "1:28:21", "remaining_time": "0:26:53", "throughput": 3600.0, "total_tokens": 19085272}
|
| 5867 |
+
{"current_steps": 29260, "total_steps": 38160, "loss": 0.5267, "lr": 7.838969500855162e-06, "epoch": 15.335429769392034, "percentage": 76.68, "elapsed_time": "1:28:22", "remaining_time": "0:26:52", "throughput": 3599.91, "total_tokens": 19088152}
|
| 5868 |
+
{"current_steps": 29265, "total_steps": 38160, "loss": 0.5897, "lr": 7.830656452243307e-06, "epoch": 15.33805031446541, "percentage": 76.69, "elapsed_time": "1:28:23", "remaining_time": "0:26:51", "throughput": 3599.94, "total_tokens": 19091576}
|
| 5869 |
+
{"current_steps": 29270, "total_steps": 38160, "loss": 0.5452, "lr": 7.822346995262241e-06, "epoch": 15.340670859538784, "percentage": 76.7, "elapsed_time": "1:28:24", "remaining_time": "0:26:51", "throughput": 3600.04, "total_tokens": 19095672}
|
| 5870 |
+
{"current_steps": 29275, "total_steps": 38160, "loss": 0.5339, "lr": 7.814041131650223e-06, "epoch": 15.34329140461216, "percentage": 76.72, "elapsed_time": "1:28:25", "remaining_time": "0:26:50", "throughput": 3600.16, "total_tokens": 19099416}
|
| 5871 |
+
{"current_steps": 29280, "total_steps": 38160, "loss": 0.4667, "lr": 7.805738863144731e-06, "epoch": 15.345911949685535, "percentage": 76.73, "elapsed_time": "1:28:25", "remaining_time": "0:26:49", "throughput": 3600.15, "total_tokens": 19102296}
|
| 5872 |
+
{"current_steps": 29285, "total_steps": 38160, "loss": 0.4722, "lr": 7.797440191482496e-06, "epoch": 15.34853249475891, "percentage": 76.74, "elapsed_time": "1:28:26", "remaining_time": "0:26:48", "throughput": 3600.15, "total_tokens": 19105432}
|
| 5873 |
+
{"current_steps": 29290, "total_steps": 38160, "loss": 0.3914, "lr": 7.789145118399518e-06, "epoch": 15.351153039832285, "percentage": 76.76, "elapsed_time": "1:28:27", "remaining_time": "0:26:47", "throughput": 3600.15, "total_tokens": 19108632}
|
| 5874 |
+
{"current_steps": 29295, "total_steps": 38160, "loss": 0.6256, "lr": 7.780853645631032e-06, "epoch": 15.35377358490566, "percentage": 76.77, "elapsed_time": "1:28:28", "remaining_time": "0:26:46", "throughput": 3600.21, "total_tokens": 19111832}
|
| 5875 |
+
{"current_steps": 29300, "total_steps": 38160, "loss": 0.4231, "lr": 7.772565774911522e-06, "epoch": 15.356394129979035, "percentage": 76.78, "elapsed_time": "1:28:29", "remaining_time": "0:26:45", "throughput": 3600.3, "total_tokens": 19115288}
|
| 5876 |
+
{"current_steps": 29305, "total_steps": 38160, "loss": 0.4125, "lr": 7.76428150797471e-06, "epoch": 15.35901467505241, "percentage": 76.8, "elapsed_time": "1:28:30", "remaining_time": "0:26:44", "throughput": 3600.34, "total_tokens": 19118456}
|
| 5877 |
+
{"current_steps": 29310, "total_steps": 38160, "loss": 0.442, "lr": 7.756000846553562e-06, "epoch": 15.361635220125786, "percentage": 76.81, "elapsed_time": "1:28:31", "remaining_time": "0:26:43", "throughput": 3600.31, "total_tokens": 19121272}
|
| 5878 |
+
{"current_steps": 29315, "total_steps": 38160, "loss": 0.4294, "lr": 7.74772379238031e-06, "epoch": 15.364255765199161, "percentage": 76.82, "elapsed_time": "1:28:31", "remaining_time": "0:26:42", "throughput": 3600.26, "total_tokens": 19124440}
|
| 5879 |
+
{"current_steps": 29320, "total_steps": 38160, "loss": 0.5151, "lr": 7.739450347186417e-06, "epoch": 15.366876310272536, "percentage": 76.83, "elapsed_time": "1:28:32", "remaining_time": "0:26:41", "throughput": 3600.27, "total_tokens": 19127672}
|
| 5880 |
+
{"current_steps": 29325, "total_steps": 38160, "loss": 0.5611, "lr": 7.731180512702599e-06, "epoch": 15.369496855345911, "percentage": 76.85, "elapsed_time": "1:28:33", "remaining_time": "0:26:40", "throughput": 3600.31, "total_tokens": 19130776}
|
| 5881 |
+
{"current_steps": 29330, "total_steps": 38160, "loss": 0.5127, "lr": 7.722914290658795e-06, "epoch": 15.372117400419286, "percentage": 76.86, "elapsed_time": "1:28:34", "remaining_time": "0:26:39", "throughput": 3600.32, "total_tokens": 19133720}
|
| 5882 |
+
{"current_steps": 29335, "total_steps": 38160, "loss": 0.5066, "lr": 7.714651682784224e-06, "epoch": 15.374737945492662, "percentage": 76.87, "elapsed_time": "1:28:35", "remaining_time": "0:26:39", "throughput": 3600.39, "total_tokens": 19137112}
|
| 5883 |
+
{"current_steps": 29340, "total_steps": 38160, "loss": 0.481, "lr": 7.706392690807313e-06, "epoch": 15.377358490566039, "percentage": 76.89, "elapsed_time": "1:28:36", "remaining_time": "0:26:38", "throughput": 3600.4, "total_tokens": 19140408}
|
| 5884 |
+
{"current_steps": 29345, "total_steps": 38160, "loss": 0.4667, "lr": 7.69813731645575e-06, "epoch": 15.379979035639414, "percentage": 76.9, "elapsed_time": "1:28:37", "remaining_time": "0:26:37", "throughput": 3600.45, "total_tokens": 19143672}
|
| 5885 |
+
{"current_steps": 29350, "total_steps": 38160, "loss": 0.4254, "lr": 7.689885561456475e-06, "epoch": 15.382599580712789, "percentage": 76.91, "elapsed_time": "1:28:37", "remaining_time": "0:26:36", "throughput": 3600.56, "total_tokens": 19147288}
|
| 5886 |
+
{"current_steps": 29355, "total_steps": 38160, "loss": 0.5749, "lr": 7.681637427535663e-06, "epoch": 15.385220125786164, "percentage": 76.93, "elapsed_time": "1:28:38", "remaining_time": "0:26:35", "throughput": 3600.45, "total_tokens": 19149688}
|
| 5887 |
+
{"current_steps": 29360, "total_steps": 38160, "loss": 0.4832, "lr": 7.673392916418715e-06, "epoch": 15.38784067085954, "percentage": 76.94, "elapsed_time": "1:28:39", "remaining_time": "0:26:34", "throughput": 3600.6, "total_tokens": 19153752}
|
| 5888 |
+
{"current_steps": 29365, "total_steps": 38160, "loss": 0.5677, "lr": 7.665152029830303e-06, "epoch": 15.390461215932914, "percentage": 76.95, "elapsed_time": "1:28:40", "remaining_time": "0:26:33", "throughput": 3600.68, "total_tokens": 19157208}
|
| 5889 |
+
{"current_steps": 29370, "total_steps": 38160, "loss": 0.5659, "lr": 7.656914769494314e-06, "epoch": 15.39308176100629, "percentage": 76.97, "elapsed_time": "1:28:41", "remaining_time": "0:26:32", "throughput": 3600.82, "total_tokens": 19161656}
|
| 5890 |
+
{"current_steps": 29375, "total_steps": 38160, "loss": 0.4732, "lr": 7.648681137133892e-06, "epoch": 15.395702306079665, "percentage": 76.98, "elapsed_time": "1:28:42", "remaining_time": "0:26:31", "throughput": 3600.82, "total_tokens": 19164824}
|
| 5891 |
+
{"current_steps": 29380, "total_steps": 38160, "loss": 0.4566, "lr": 7.640451134471432e-06, "epoch": 15.39832285115304, "percentage": 76.99, "elapsed_time": "1:28:43", "remaining_time": "0:26:30", "throughput": 3600.74, "total_tokens": 19167480}
|
| 5892 |
+
{"current_steps": 29385, "total_steps": 38160, "loss": 0.4422, "lr": 7.632224763228538e-06, "epoch": 15.400943396226415, "percentage": 77.0, "elapsed_time": "1:28:44", "remaining_time": "0:26:29", "throughput": 3600.56, "total_tokens": 19169752}
|
| 5893 |
+
{"current_steps": 29390, "total_steps": 38160, "loss": 0.5685, "lr": 7.6240020251260896e-06, "epoch": 15.40356394129979, "percentage": 77.02, "elapsed_time": "1:28:44", "remaining_time": "0:26:28", "throughput": 3600.55, "total_tokens": 19172696}
|
| 5894 |
+
{"current_steps": 29395, "total_steps": 38160, "loss": 0.461, "lr": 7.615782921884174e-06, "epoch": 15.406184486373165, "percentage": 77.03, "elapsed_time": "1:28:45", "remaining_time": "0:26:28", "throughput": 3600.68, "total_tokens": 19176600}
|
| 5895 |
+
{"current_steps": 29400, "total_steps": 38160, "loss": 0.356, "lr": 7.607567455222142e-06, "epoch": 15.40880503144654, "percentage": 77.04, "elapsed_time": "1:28:46", "remaining_time": "0:26:27", "throughput": 3600.58, "total_tokens": 19179000}
|
| 5896 |
+
{"current_steps": 29405, "total_steps": 38160, "loss": 0.5318, "lr": 7.599355626858582e-06, "epoch": 15.411425576519916, "percentage": 77.06, "elapsed_time": "1:28:47", "remaining_time": "0:26:26", "throughput": 3600.6, "total_tokens": 19182264}
|
| 5897 |
+
{"current_steps": 29410, "total_steps": 38160, "loss": 0.4672, "lr": 7.591147438511298e-06, "epoch": 15.414046121593291, "percentage": 77.07, "elapsed_time": "1:28:48", "remaining_time": "0:26:25", "throughput": 3600.61, "total_tokens": 19185240}
|
| 5898 |
+
{"current_steps": 29415, "total_steps": 38160, "loss": 0.4064, "lr": 7.5829428918973625e-06, "epoch": 15.416666666666666, "percentage": 77.08, "elapsed_time": "1:28:49", "remaining_time": "0:26:24", "throughput": 3600.71, "total_tokens": 19188920}
|
| 5899 |
+
{"current_steps": 29420, "total_steps": 38160, "loss": 0.528, "lr": 7.574741988733075e-06, "epoch": 15.419287211740041, "percentage": 77.1, "elapsed_time": "1:28:50", "remaining_time": "0:26:23", "throughput": 3600.6, "total_tokens": 19191672}
|
| 5900 |
+
{"current_steps": 29425, "total_steps": 38160, "loss": 0.4396, "lr": 7.56654473073396e-06, "epoch": 15.421907756813416, "percentage": 77.11, "elapsed_time": "1:28:50", "remaining_time": "0:26:22", "throughput": 3600.61, "total_tokens": 19194744}
|
| 5901 |
+
{"current_steps": 29430, "total_steps": 38160, "loss": 0.569, "lr": 7.558351119614796e-06, "epoch": 15.424528301886792, "percentage": 77.12, "elapsed_time": "1:28:51", "remaining_time": "0:26:21", "throughput": 3600.61, "total_tokens": 19197848}
|
| 5902 |
+
{"current_steps": 29435, "total_steps": 38160, "loss": 0.3337, "lr": 7.5501611570896e-06, "epoch": 15.427148846960169, "percentage": 77.14, "elapsed_time": "1:28:52", "remaining_time": "0:26:20", "throughput": 3600.63, "total_tokens": 19201432}
|
| 5903 |
+
{"current_steps": 29440, "total_steps": 38160, "loss": 0.3943, "lr": 7.541974844871602e-06, "epoch": 15.429769392033544, "percentage": 77.15, "elapsed_time": "1:28:53", "remaining_time": "0:26:19", "throughput": 3600.68, "total_tokens": 19205304}
|
| 5904 |
+
{"current_steps": 29445, "total_steps": 38160, "loss": 0.5516, "lr": 7.5337921846732965e-06, "epoch": 15.432389937106919, "percentage": 77.16, "elapsed_time": "1:28:54", "remaining_time": "0:26:18", "throughput": 3600.75, "total_tokens": 19208888}
|
| 5905 |
+
{"current_steps": 29450, "total_steps": 38160, "loss": 0.4797, "lr": 7.525613178206409e-06, "epoch": 15.435010482180294, "percentage": 77.18, "elapsed_time": "1:28:55", "remaining_time": "0:26:18", "throughput": 3600.79, "total_tokens": 19212216}
|
| 5906 |
+
{"current_steps": 29455, "total_steps": 38160, "loss": 0.5597, "lr": 7.517437827181878e-06, "epoch": 15.43763102725367, "percentage": 77.19, "elapsed_time": "1:28:56", "remaining_time": "0:26:17", "throughput": 3600.86, "total_tokens": 19215736}
|
| 5907 |
+
{"current_steps": 29460, "total_steps": 38160, "loss": 0.4735, "lr": 7.509266133309908e-06, "epoch": 15.440251572327044, "percentage": 77.2, "elapsed_time": "1:28:57", "remaining_time": "0:26:16", "throughput": 3600.88, "total_tokens": 19219224}
|
| 5908 |
+
{"current_steps": 29465, "total_steps": 38160, "loss": 0.5361, "lr": 7.501098098299914e-06, "epoch": 15.44287211740042, "percentage": 77.21, "elapsed_time": "1:28:58", "remaining_time": "0:26:15", "throughput": 3600.88, "total_tokens": 19222424}
|
| 5909 |
+
{"current_steps": 29470, "total_steps": 38160, "loss": 0.5405, "lr": 7.492933723860557e-06, "epoch": 15.445492662473795, "percentage": 77.23, "elapsed_time": "1:28:59", "remaining_time": "0:26:14", "throughput": 3600.91, "total_tokens": 19225784}
|
| 5910 |
+
{"current_steps": 29475, "total_steps": 38160, "loss": 0.3442, "lr": 7.4847730116997334e-06, "epoch": 15.44811320754717, "percentage": 77.24, "elapsed_time": "1:29:00", "remaining_time": "0:26:13", "throughput": 3600.83, "total_tokens": 19228600}
|
| 5911 |
+
{"current_steps": 29480, "total_steps": 38160, "loss": 0.4708, "lr": 7.476615963524583e-06, "epoch": 15.450733752620545, "percentage": 77.25, "elapsed_time": "1:29:00", "remaining_time": "0:26:12", "throughput": 3600.84, "total_tokens": 19231864}
|
| 5912 |
+
{"current_steps": 29485, "total_steps": 38160, "loss": 0.5575, "lr": 7.468462581041452e-06, "epoch": 15.45335429769392, "percentage": 77.27, "elapsed_time": "1:29:01", "remaining_time": "0:26:11", "throughput": 3600.93, "total_tokens": 19235480}
|
| 5913 |
+
{"current_steps": 29490, "total_steps": 38160, "loss": 0.5389, "lr": 7.460312865955932e-06, "epoch": 15.455974842767295, "percentage": 77.28, "elapsed_time": "1:29:02", "remaining_time": "0:26:10", "throughput": 3601.03, "total_tokens": 19238904}
|
| 5914 |
+
{"current_steps": 29495, "total_steps": 38160, "loss": 0.4022, "lr": 7.4521668199728584e-06, "epoch": 15.45859538784067, "percentage": 77.29, "elapsed_time": "1:29:03", "remaining_time": "0:26:09", "throughput": 3601.07, "total_tokens": 19242040}
|
| 5915 |
+
{"current_steps": 29500, "total_steps": 38160, "loss": 0.4802, "lr": 7.4440244447962884e-06, "epoch": 15.461215932914046, "percentage": 77.31, "elapsed_time": "1:29:04", "remaining_time": "0:26:08", "throughput": 3601.1, "total_tokens": 19245336}
|
| 5916 |
+
{"current_steps": 29505, "total_steps": 38160, "loss": 0.4519, "lr": 7.435885742129523e-06, "epoch": 15.463836477987421, "percentage": 77.32, "elapsed_time": "1:29:05", "remaining_time": "0:26:08", "throughput": 3601.31, "total_tokens": 19250264}
|
| 5917 |
+
{"current_steps": 29510, "total_steps": 38160, "loss": 0.8246, "lr": 7.427750713675071e-06, "epoch": 15.466457023060796, "percentage": 77.33, "elapsed_time": "1:29:06", "remaining_time": "0:26:07", "throughput": 3601.36, "total_tokens": 19253592}
|
| 5918 |
+
{"current_steps": 29515, "total_steps": 38160, "loss": 0.4286, "lr": 7.419619361134702e-06, "epoch": 15.469077568134171, "percentage": 77.35, "elapsed_time": "1:29:07", "remaining_time": "0:26:06", "throughput": 3601.37, "total_tokens": 19256728}
|
| 5919 |
+
{"current_steps": 29520, "total_steps": 38160, "loss": 0.5065, "lr": 7.411491686209387e-06, "epoch": 15.471698113207546, "percentage": 77.36, "elapsed_time": "1:29:07", "remaining_time": "0:26:05", "throughput": 3601.33, "total_tokens": 19259640}
|
| 5920 |
+
{"current_steps": 29525, "total_steps": 38160, "loss": 0.4048, "lr": 7.4033676905993525e-06, "epoch": 15.474318658280922, "percentage": 77.37, "elapsed_time": "1:29:08", "remaining_time": "0:26:04", "throughput": 3601.4, "total_tokens": 19263128}
|
| 5921 |
+
{"current_steps": 29530, "total_steps": 38160, "loss": 0.4146, "lr": 7.3952473760040444e-06, "epoch": 15.476939203354299, "percentage": 77.38, "elapsed_time": "1:29:09", "remaining_time": "0:26:03", "throughput": 3601.58, "total_tokens": 19267928}
|
| 5922 |
+
{"current_steps": 29535, "total_steps": 38160, "loss": 0.492, "lr": 7.387130744122148e-06, "epoch": 15.479559748427674, "percentage": 77.4, "elapsed_time": "1:29:10", "remaining_time": "0:26:02", "throughput": 3601.55, "total_tokens": 19270552}
|
| 5923 |
+
{"current_steps": 29540, "total_steps": 38160, "loss": 0.4676, "lr": 7.379017796651558e-06, "epoch": 15.482180293501049, "percentage": 77.41, "elapsed_time": "1:29:11", "remaining_time": "0:26:01", "throughput": 3601.62, "total_tokens": 19274072}
|
| 5924 |
+
{"current_steps": 29545, "total_steps": 38160, "loss": 0.4425, "lr": 7.3709085352894085e-06, "epoch": 15.484800838574424, "percentage": 77.42, "elapsed_time": "1:29:12", "remaining_time": "0:26:00", "throughput": 3601.6, "total_tokens": 19277048}
|
| 5925 |
+
{"current_steps": 29550, "total_steps": 38160, "loss": 0.4754, "lr": 7.362802961732071e-06, "epoch": 15.4874213836478, "percentage": 77.44, "elapsed_time": "1:29:13", "remaining_time": "0:25:59", "throughput": 3601.55, "total_tokens": 19279576}
|
| 5926 |
+
{"current_steps": 29555, "total_steps": 38160, "loss": 0.4671, "lr": 7.354701077675136e-06, "epoch": 15.490041928721174, "percentage": 77.45, "elapsed_time": "1:29:13", "remaining_time": "0:25:58", "throughput": 3601.52, "total_tokens": 19282200}
|
| 5927 |
+
{"current_steps": 29560, "total_steps": 38160, "loss": 0.5906, "lr": 7.346602884813439e-06, "epoch": 15.49266247379455, "percentage": 77.46, "elapsed_time": "1:29:14", "remaining_time": "0:25:57", "throughput": 3601.5, "total_tokens": 19285336}
|
| 5928 |
+
{"current_steps": 29565, "total_steps": 38160, "loss": 0.5064, "lr": 7.338508384841014e-06, "epoch": 15.495283018867925, "percentage": 77.48, "elapsed_time": "1:29:15", "remaining_time": "0:25:56", "throughput": 3601.51, "total_tokens": 19288344}
|
| 5929 |
+
{"current_steps": 29570, "total_steps": 38160, "loss": 0.4053, "lr": 7.3304175794511345e-06, "epoch": 15.4979035639413, "percentage": 77.49, "elapsed_time": "1:29:16", "remaining_time": "0:25:56", "throughput": 3601.54, "total_tokens": 19291928}
|
| 5930 |
+
{"current_steps": 29575, "total_steps": 38160, "loss": 0.4298, "lr": 7.3223304703363135e-06, "epoch": 15.500524109014675, "percentage": 77.5, "elapsed_time": "1:29:17", "remaining_time": "0:25:55", "throughput": 3601.59, "total_tokens": 19295736}
|
| 5931 |
+
{"current_steps": 29580, "total_steps": 38160, "loss": 0.4629, "lr": 7.314247059188281e-06, "epoch": 15.50314465408805, "percentage": 77.52, "elapsed_time": "1:29:18", "remaining_time": "0:25:54", "throughput": 3601.58, "total_tokens": 19298744}
|
| 5932 |
+
{"current_steps": 29585, "total_steps": 38160, "loss": 0.4502, "lr": 7.306167347697992e-06, "epoch": 15.505765199161425, "percentage": 77.53, "elapsed_time": "1:29:19", "remaining_time": "0:25:53", "throughput": 3601.5, "total_tokens": 19301368}
|
| 5933 |
+
{"current_steps": 29590, "total_steps": 38160, "loss": 0.4067, "lr": 7.29809133755564e-06, "epoch": 15.5083857442348, "percentage": 77.54, "elapsed_time": "1:29:20", "remaining_time": "0:25:52", "throughput": 3601.52, "total_tokens": 19304952}
|
| 5934 |
+
{"current_steps": 29595, "total_steps": 38160, "loss": 0.5529, "lr": 7.290019030450629e-06, "epoch": 15.511006289308176, "percentage": 77.56, "elapsed_time": "1:29:21", "remaining_time": "0:25:51", "throughput": 3601.55, "total_tokens": 19308088}
|
| 5935 |
+
{"current_steps": 29600, "total_steps": 38160, "loss": 0.4749, "lr": 7.2819504280715794e-06, "epoch": 15.51362683438155, "percentage": 77.57, "elapsed_time": "1:29:21", "remaining_time": "0:25:50", "throughput": 3601.51, "total_tokens": 19310840}
|
| 5936 |
+
{"current_steps": 29605, "total_steps": 38160, "loss": 0.5127, "lr": 7.273885532106364e-06, "epoch": 15.516247379454926, "percentage": 77.58, "elapsed_time": "1:29:22", "remaining_time": "0:25:49", "throughput": 3601.47, "total_tokens": 19313720}
|
| 5937 |
+
{"current_steps": 29610, "total_steps": 38160, "loss": 0.5478, "lr": 7.265824344242064e-06, "epoch": 15.518867924528301, "percentage": 77.59, "elapsed_time": "1:29:23", "remaining_time": "0:25:48", "throughput": 3601.62, "total_tokens": 19318424}
|
| 5938 |
+
{"current_steps": 29615, "total_steps": 38160, "loss": 0.3584, "lr": 7.257766866165e-06, "epoch": 15.521488469601676, "percentage": 77.61, "elapsed_time": "1:29:24", "remaining_time": "0:25:47", "throughput": 3601.69, "total_tokens": 19322488}
|
| 5939 |
+
{"current_steps": 29620, "total_steps": 38160, "loss": 0.6969, "lr": 7.24971309956069e-06, "epoch": 15.524109014675052, "percentage": 77.62, "elapsed_time": "1:29:25", "remaining_time": "0:25:47", "throughput": 3601.71, "total_tokens": 19325592}
|
| 5940 |
+
{"current_steps": 29625, "total_steps": 38160, "loss": 0.4951, "lr": 7.241663046113887e-06, "epoch": 15.526729559748428, "percentage": 77.63, "elapsed_time": "1:29:26", "remaining_time": "0:25:46", "throughput": 3601.65, "total_tokens": 19328408}
|
| 5941 |
+
{"current_steps": 29630, "total_steps": 38160, "loss": 0.615, "lr": 7.233616707508576e-06, "epoch": 15.529350104821804, "percentage": 77.65, "elapsed_time": "1:29:27", "remaining_time": "0:25:45", "throughput": 3601.54, "total_tokens": 19330776}
|
| 5942 |
+
{"current_steps": 29635, "total_steps": 38160, "loss": 0.4981, "lr": 7.225574085427961e-06, "epoch": 15.531970649895179, "percentage": 77.66, "elapsed_time": "1:29:28", "remaining_time": "0:25:44", "throughput": 3601.57, "total_tokens": 19334360}
|
| 5943 |
+
{"current_steps": 29640, "total_steps": 38160, "loss": 0.4264, "lr": 7.217535181554474e-06, "epoch": 15.534591194968554, "percentage": 77.67, "elapsed_time": "1:29:29", "remaining_time": "0:25:43", "throughput": 3601.43, "total_tokens": 19336664}
|
| 5944 |
+
{"current_steps": 29645, "total_steps": 38160, "loss": 0.4418, "lr": 7.209499997569747e-06, "epoch": 15.53721174004193, "percentage": 77.69, "elapsed_time": "1:29:30", "remaining_time": "0:25:42", "throughput": 3601.38, "total_tokens": 19339608}
|
| 5945 |
+
{"current_steps": 29650, "total_steps": 38160, "loss": 0.4657, "lr": 7.201468535154663e-06, "epoch": 15.539832285115304, "percentage": 77.7, "elapsed_time": "1:29:30", "remaining_time": "0:25:41", "throughput": 3601.37, "total_tokens": 19342712}
|
| 5946 |
+
{"current_steps": 29655, "total_steps": 38160, "loss": 0.4298, "lr": 7.193440795989295e-06, "epoch": 15.54245283018868, "percentage": 77.71, "elapsed_time": "1:29:31", "remaining_time": "0:25:40", "throughput": 3601.44, "total_tokens": 19346296}
|
| 5947 |
+
{"current_steps": 29660, "total_steps": 38160, "loss": 0.348, "lr": 7.185416781752965e-06, "epoch": 15.545073375262055, "percentage": 77.73, "elapsed_time": "1:29:32", "remaining_time": "0:25:39", "throughput": 3601.5, "total_tokens": 19349656}
|
| 5948 |
+
{"current_steps": 29665, "total_steps": 38160, "loss": 0.4729, "lr": 7.177396494124206e-06, "epoch": 15.54769392033543, "percentage": 77.74, "elapsed_time": "1:29:33", "remaining_time": "0:25:38", "throughput": 3601.46, "total_tokens": 19352280}
|
| 5949 |
+
{"current_steps": 29670, "total_steps": 38160, "loss": 0.5095, "lr": 7.169379934780779e-06, "epoch": 15.550314465408805, "percentage": 77.75, "elapsed_time": "1:29:34", "remaining_time": "0:25:37", "throughput": 3601.42, "total_tokens": 19355128}
|
| 5950 |
+
{"current_steps": 29675, "total_steps": 38160, "loss": 0.6255, "lr": 7.161367105399644e-06, "epoch": 15.55293501048218, "percentage": 77.76, "elapsed_time": "1:29:35", "remaining_time": "0:25:36", "throughput": 3601.5, "total_tokens": 19359096}
|
| 5951 |
+
{"current_steps": 29680, "total_steps": 38160, "loss": 0.4712, "lr": 7.153358007656991e-06, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "1:29:36", "remaining_time": "0:25:36", "throughput": 3601.38, "total_tokens": 19361784}
|
| 5952 |
+
{"current_steps": 29685, "total_steps": 38160, "loss": 0.3985, "lr": 7.145352643228237e-06, "epoch": 15.55817610062893, "percentage": 77.79, "elapsed_time": "1:29:37", "remaining_time": "0:25:35", "throughput": 3601.34, "total_tokens": 19364824}
|
| 5953 |
+
{"current_steps": 29690, "total_steps": 38160, "loss": 0.6233, "lr": 7.137351013788013e-06, "epoch": 15.560796645702306, "percentage": 77.8, "elapsed_time": "1:29:38", "remaining_time": "0:25:34", "throughput": 3601.31, "total_tokens": 19368056}
|
| 5954 |
+
{"current_steps": 29695, "total_steps": 38160, "loss": 0.4228, "lr": 7.129353121010177e-06, "epoch": 15.56341719077568, "percentage": 77.82, "elapsed_time": "1:29:38", "remaining_time": "0:25:33", "throughput": 3601.34, "total_tokens": 19371448}
|
| 5955 |
+
{"current_steps": 29700, "total_steps": 38160, "loss": 0.6125, "lr": 7.121358966567779e-06, "epoch": 15.566037735849056, "percentage": 77.83, "elapsed_time": "1:29:39", "remaining_time": "0:25:32", "throughput": 3601.41, "total_tokens": 19375448}
|
| 5956 |
+
{"current_steps": 29705, "total_steps": 38160, "loss": 0.4348, "lr": 7.113368552133126e-06, "epoch": 15.568658280922431, "percentage": 77.84, "elapsed_time": "1:29:40", "remaining_time": "0:25:31", "throughput": 3601.47, "total_tokens": 19379256}
|
| 5957 |
+
{"current_steps": 29710, "total_steps": 38160, "loss": 0.4748, "lr": 7.105381879377701e-06, "epoch": 15.571278825995806, "percentage": 77.86, "elapsed_time": "1:29:41", "remaining_time": "0:25:30", "throughput": 3601.55, "total_tokens": 19383032}
|
| 5958 |
+
{"current_steps": 29715, "total_steps": 38160, "loss": 0.4655, "lr": 7.097398949972234e-06, "epoch": 15.573899371069182, "percentage": 77.87, "elapsed_time": "1:29:42", "remaining_time": "0:25:29", "throughput": 3601.75, "total_tokens": 19387640}
|
| 5959 |
+
{"current_steps": 29720, "total_steps": 38160, "loss": 0.5403, "lr": 7.089419765586672e-06, "epoch": 15.576519916142558, "percentage": 77.88, "elapsed_time": "1:29:43", "remaining_time": "0:25:28", "throughput": 3601.64, "total_tokens": 19389944}
|
| 5960 |
+
{"current_steps": 29725, "total_steps": 38160, "loss": 0.5234, "lr": 7.081444327890152e-06, "epoch": 15.579140461215934, "percentage": 77.9, "elapsed_time": "1:29:44", "remaining_time": "0:25:27", "throughput": 3601.7, "total_tokens": 19393464}
|
| 5961 |
+
{"current_steps": 29730, "total_steps": 38160, "loss": 0.3769, "lr": 7.073472638551054e-06, "epoch": 15.581761006289309, "percentage": 77.91, "elapsed_time": "1:29:45", "remaining_time": "0:25:27", "throughput": 3601.6, "total_tokens": 19395864}
|
| 5962 |
+
{"current_steps": 29735, "total_steps": 38160, "loss": 0.5321, "lr": 7.065504699236969e-06, "epoch": 15.584381551362684, "percentage": 77.92, "elapsed_time": "1:29:46", "remaining_time": "0:25:26", "throughput": 3601.57, "total_tokens": 19398552}
|
| 5963 |
+
{"current_steps": 29740, "total_steps": 38160, "loss": 0.4195, "lr": 7.057540511614688e-06, "epoch": 15.58700209643606, "percentage": 77.94, "elapsed_time": "1:29:46", "remaining_time": "0:25:25", "throughput": 3601.59, "total_tokens": 19401688}
|
| 5964 |
+
{"current_steps": 29745, "total_steps": 38160, "loss": 0.4192, "lr": 7.049580077350232e-06, "epoch": 15.589622641509434, "percentage": 77.95, "elapsed_time": "1:29:47", "remaining_time": "0:25:24", "throughput": 3601.58, "total_tokens": 19404472}
|
| 5965 |
+
{"current_steps": 29750, "total_steps": 38160, "loss": 0.5013, "lr": 7.041623398108843e-06, "epoch": 15.59224318658281, "percentage": 77.96, "elapsed_time": "1:29:48", "remaining_time": "0:25:23", "throughput": 3601.56, "total_tokens": 19407544}
|
| 5966 |
+
{"current_steps": 29755, "total_steps": 38160, "loss": 0.4808, "lr": 7.033670475554949e-06, "epoch": 15.594863731656185, "percentage": 77.97, "elapsed_time": "1:29:49", "remaining_time": "0:25:22", "throughput": 3601.56, "total_tokens": 19410488}
|
| 5967 |
+
{"current_steps": 29760, "total_steps": 38160, "loss": 0.6649, "lr": 7.025721311352224e-06, "epoch": 15.59748427672956, "percentage": 77.99, "elapsed_time": "1:29:50", "remaining_time": "0:25:21", "throughput": 3601.61, "total_tokens": 19414360}
|
| 5968 |
+
{"current_steps": 29765, "total_steps": 38160, "loss": 0.4737, "lr": 7.017775907163546e-06, "epoch": 15.600104821802935, "percentage": 78.0, "elapsed_time": "1:29:51", "remaining_time": "0:25:20", "throughput": 3601.66, "total_tokens": 19418072}
|
| 5969 |
+
{"current_steps": 29770, "total_steps": 38160, "loss": 0.4625, "lr": 7.009834264650989e-06, "epoch": 15.60272536687631, "percentage": 78.01, "elapsed_time": "1:29:52", "remaining_time": "0:25:19", "throughput": 3601.64, "total_tokens": 19421016}
|
| 5970 |
+
{"current_steps": 29775, "total_steps": 38160, "loss": 0.5222, "lr": 7.001896385475867e-06, "epoch": 15.605345911949685, "percentage": 78.03, "elapsed_time": "1:29:53", "remaining_time": "0:25:18", "throughput": 3601.71, "total_tokens": 19424568}
|
| 5971 |
+
{"current_steps": 29780, "total_steps": 38160, "loss": 0.4857, "lr": 6.993962271298682e-06, "epoch": 15.60796645702306, "percentage": 78.04, "elapsed_time": "1:29:53", "remaining_time": "0:25:17", "throughput": 3601.68, "total_tokens": 19427352}
|
| 5972 |
+
{"current_steps": 29785, "total_steps": 38160, "loss": 0.4131, "lr": 6.986031923779166e-06, "epoch": 15.610587002096436, "percentage": 78.05, "elapsed_time": "1:29:54", "remaining_time": "0:25:16", "throughput": 3601.66, "total_tokens": 19430328}
|
| 5973 |
+
{"current_steps": 29790, "total_steps": 38160, "loss": 0.3899, "lr": 6.978105344576264e-06, "epoch": 15.61320754716981, "percentage": 78.07, "elapsed_time": "1:29:55", "remaining_time": "0:25:16", "throughput": 3601.72, "total_tokens": 19433848}
|
| 5974 |
+
{"current_steps": 29795, "total_steps": 38160, "loss": 0.5601, "lr": 6.970182535348111e-06, "epoch": 15.615828092243186, "percentage": 78.08, "elapsed_time": "1:29:56", "remaining_time": "0:25:15", "throughput": 3601.67, "total_tokens": 19436888}
|
| 5975 |
+
{"current_steps": 29800, "total_steps": 38160, "loss": 0.5961, "lr": 6.962263497752086e-06, "epoch": 15.618448637316561, "percentage": 78.09, "elapsed_time": "1:29:57", "remaining_time": "0:25:14", "throughput": 3601.81, "total_tokens": 19441176}
|
| 5976 |
+
{"current_steps": 29805, "total_steps": 38160, "loss": 0.4834, "lr": 6.9543482334447436e-06, "epoch": 15.621069182389936, "percentage": 78.11, "elapsed_time": "1:29:58", "remaining_time": "0:25:13", "throughput": 3601.87, "total_tokens": 19444664}
|
| 5977 |
+
{"current_steps": 29810, "total_steps": 38160, "loss": 0.4217, "lr": 6.946436744081875e-06, "epoch": 15.623689727463312, "percentage": 78.12, "elapsed_time": "1:29:59", "remaining_time": "0:25:12", "throughput": 3601.82, "total_tokens": 19447736}
|
| 5978 |
+
{"current_steps": 29815, "total_steps": 38160, "loss": 0.5035, "lr": 6.938529031318472e-06, "epoch": 15.626310272536688, "percentage": 78.13, "elapsed_time": "1:30:00", "remaining_time": "0:25:11", "throughput": 3601.88, "total_tokens": 19451384}
|
| 5979 |
+
{"current_steps": 29820, "total_steps": 38160, "loss": 0.4732, "lr": 6.9306250968087485e-06, "epoch": 15.628930817610064, "percentage": 78.14, "elapsed_time": "1:30:01", "remaining_time": "0:25:10", "throughput": 3601.88, "total_tokens": 19454552}
|
| 5980 |
+
{"current_steps": 29825, "total_steps": 38160, "loss": 0.4451, "lr": 6.922724942206102e-06, "epoch": 15.631551362683439, "percentage": 78.16, "elapsed_time": "1:30:02", "remaining_time": "0:25:09", "throughput": 3601.79, "total_tokens": 19457336}
|
| 5981 |
+
{"current_steps": 29830, "total_steps": 38160, "loss": 0.4662, "lr": 6.914828569163167e-06, "epoch": 15.634171907756814, "percentage": 78.17, "elapsed_time": "1:30:03", "remaining_time": "0:25:08", "throughput": 3601.7, "total_tokens": 19460344}
|
| 5982 |
+
{"current_steps": 29835, "total_steps": 38160, "loss": 0.5315, "lr": 6.906935979331763e-06, "epoch": 15.63679245283019, "percentage": 78.18, "elapsed_time": "1:30:03", "remaining_time": "0:25:07", "throughput": 3601.59, "total_tokens": 19462744}
|
| 5983 |
+
{"current_steps": 29840, "total_steps": 38160, "loss": 0.4569, "lr": 6.8990471743629356e-06, "epoch": 15.639412997903564, "percentage": 78.2, "elapsed_time": "1:30:04", "remaining_time": "0:25:06", "throughput": 3601.59, "total_tokens": 19465880}
|
| 5984 |
+
{"current_steps": 29845, "total_steps": 38160, "loss": 0.5444, "lr": 6.89116215590693e-06, "epoch": 15.64203354297694, "percentage": 78.21, "elapsed_time": "1:30:05", "remaining_time": "0:25:06", "throughput": 3601.55, "total_tokens": 19468632}
|
| 5985 |
+
{"current_steps": 29850, "total_steps": 38160, "loss": 0.4499, "lr": 6.8832809256132146e-06, "epoch": 15.644654088050315, "percentage": 78.22, "elapsed_time": "1:30:06", "remaining_time": "0:25:05", "throughput": 3601.52, "total_tokens": 19471736}
|
| 5986 |
+
{"current_steps": 29855, "total_steps": 38160, "loss": 0.4562, "lr": 6.875403485130444e-06, "epoch": 15.64727463312369, "percentage": 78.24, "elapsed_time": "1:30:07", "remaining_time": "0:25:04", "throughput": 3601.41, "total_tokens": 19474424}
|
| 5987 |
+
{"current_steps": 29860, "total_steps": 38160, "loss": 0.4051, "lr": 6.867529836106479e-06, "epoch": 15.649895178197065, "percentage": 78.25, "elapsed_time": "1:30:08", "remaining_time": "0:25:03", "throughput": 3601.38, "total_tokens": 19477400}
|
| 5988 |
+
{"current_steps": 29865, "total_steps": 38160, "loss": 0.4578, "lr": 6.8596599801884045e-06, "epoch": 15.65251572327044, "percentage": 78.26, "elapsed_time": "1:30:09", "remaining_time": "0:25:02", "throughput": 3601.25, "total_tokens": 19480024}
|
| 5989 |
+
{"current_steps": 29870, "total_steps": 38160, "loss": 0.4855, "lr": 6.851793919022509e-06, "epoch": 15.655136268343815, "percentage": 78.28, "elapsed_time": "1:30:10", "remaining_time": "0:25:01", "throughput": 3601.27, "total_tokens": 19483256}
|
| 5990 |
+
{"current_steps": 29875, "total_steps": 38160, "loss": 0.4331, "lr": 6.843931654254285e-06, "epoch": 15.65775681341719, "percentage": 78.29, "elapsed_time": "1:30:10", "remaining_time": "0:25:00", "throughput": 3601.3, "total_tokens": 19486392}
|
| 5991 |
+
{"current_steps": 29880, "total_steps": 38160, "loss": 0.4733, "lr": 6.836073187528425e-06, "epoch": 15.660377358490566, "percentage": 78.3, "elapsed_time": "1:30:11", "remaining_time": "0:24:59", "throughput": 3601.27, "total_tokens": 19489464}
|
| 5992 |
+
{"current_steps": 29885, "total_steps": 38160, "loss": 0.4666, "lr": 6.828218520488821e-06, "epoch": 15.66299790356394, "percentage": 78.31, "elapsed_time": "1:30:12", "remaining_time": "0:24:58", "throughput": 3601.33, "total_tokens": 19492952}
|
| 5993 |
+
{"current_steps": 29890, "total_steps": 38160, "loss": 0.5853, "lr": 6.820367654778589e-06, "epoch": 15.665618448637316, "percentage": 78.33, "elapsed_time": "1:30:13", "remaining_time": "0:24:57", "throughput": 3601.26, "total_tokens": 19495928}
|
| 5994 |
+
{"current_steps": 29895, "total_steps": 38160, "loss": 0.4804, "lr": 6.812520592040039e-06, "epoch": 15.668238993710691, "percentage": 78.34, "elapsed_time": "1:30:14", "remaining_time": "0:24:56", "throughput": 3601.3, "total_tokens": 19499800}
|
| 5995 |
+
{"current_steps": 29900, "total_steps": 38160, "loss": 0.3885, "lr": 6.804677333914689e-06, "epoch": 15.670859538784066, "percentage": 78.35, "elapsed_time": "1:30:15", "remaining_time": "0:24:56", "throughput": 3601.49, "total_tokens": 19504408}
|
| 5996 |
+
{"current_steps": 29905, "total_steps": 38160, "loss": 0.4081, "lr": 6.796837882043261e-06, "epoch": 15.673480083857442, "percentage": 78.37, "elapsed_time": "1:30:16", "remaining_time": "0:24:55", "throughput": 3601.52, "total_tokens": 19507576}
|
| 5997 |
+
{"current_steps": 29910, "total_steps": 38160, "loss": 0.6589, "lr": 6.7890022380656785e-06, "epoch": 15.676100628930818, "percentage": 78.38, "elapsed_time": "1:30:17", "remaining_time": "0:24:54", "throughput": 3601.55, "total_tokens": 19510712}
|
| 5998 |
+
{"current_steps": 29915, "total_steps": 38160, "loss": 0.4244, "lr": 6.781170403621056e-06, "epoch": 15.678721174004194, "percentage": 78.39, "elapsed_time": "1:30:18", "remaining_time": "0:24:53", "throughput": 3601.6, "total_tokens": 19514168}
|
| 5999 |
+
{"current_steps": 29920, "total_steps": 38160, "loss": 0.4213, "lr": 6.773342380347736e-06, "epoch": 15.681341719077569, "percentage": 78.41, "elapsed_time": "1:30:19", "remaining_time": "0:24:52", "throughput": 3601.56, "total_tokens": 19517304}
|
| 6000 |
+
{"current_steps": 29925, "total_steps": 38160, "loss": 0.474, "lr": 6.765518169883248e-06, "epoch": 15.683962264150944, "percentage": 78.42, "elapsed_time": "1:30:19", "remaining_time": "0:24:51", "throughput": 3601.58, "total_tokens": 19520504}
|
| 6001 |
+
{"current_steps": 29930, "total_steps": 38160, "loss": 0.407, "lr": 6.757697773864338e-06, "epoch": 15.68658280922432, "percentage": 78.43, "elapsed_time": "1:30:21", "remaining_time": "0:24:50", "throughput": 3601.82, "total_tokens": 19527000}
|
| 6002 |
+
{"current_steps": 29935, "total_steps": 38160, "loss": 0.487, "lr": 6.749881193926932e-06, "epoch": 15.689203354297694, "percentage": 78.45, "elapsed_time": "1:30:22", "remaining_time": "0:24:49", "throughput": 3601.83, "total_tokens": 19530008}
|
| 6003 |
+
{"current_steps": 29940, "total_steps": 38160, "loss": 0.3558, "lr": 6.742068431706167e-06, "epoch": 15.69182389937107, "percentage": 78.46, "elapsed_time": "1:30:23", "remaining_time": "0:24:48", "throughput": 3601.82, "total_tokens": 19533208}
|
| 6004 |
+
{"current_steps": 29945, "total_steps": 38160, "loss": 0.6988, "lr": 6.734259488836386e-06, "epoch": 15.694444444444445, "percentage": 78.47, "elapsed_time": "1:30:24", "remaining_time": "0:24:48", "throughput": 3601.77, "total_tokens": 19536248}
|
| 6005 |
+
{"current_steps": 29950, "total_steps": 38160, "loss": 0.4437, "lr": 6.726454366951135e-06, "epoch": 15.69706498951782, "percentage": 78.49, "elapsed_time": "1:30:24", "remaining_time": "0:24:47", "throughput": 3601.81, "total_tokens": 19539480}
|
| 6006 |
+
{"current_steps": 29955, "total_steps": 38160, "loss": 0.5373, "lr": 6.7186530676831616e-06, "epoch": 15.699685534591195, "percentage": 78.5, "elapsed_time": "1:30:25", "remaining_time": "0:24:46", "throughput": 3601.82, "total_tokens": 19542584}
|
| 6007 |
+
{"current_steps": 29960, "total_steps": 38160, "loss": 0.6184, "lr": 6.710855592664403e-06, "epoch": 15.70230607966457, "percentage": 78.51, "elapsed_time": "1:30:26", "remaining_time": "0:24:45", "throughput": 3601.89, "total_tokens": 19546296}
|
| 6008 |
+
{"current_steps": 29965, "total_steps": 38160, "loss": 0.5122, "lr": 6.703061943525993e-06, "epoch": 15.704926624737945, "percentage": 78.52, "elapsed_time": "1:30:27", "remaining_time": "0:24:44", "throughput": 3601.91, "total_tokens": 19549400}
|
| 6009 |
+
{"current_steps": 29970, "total_steps": 38160, "loss": 0.5185, "lr": 6.69527212189828e-06, "epoch": 15.70754716981132, "percentage": 78.54, "elapsed_time": "1:30:28", "remaining_time": "0:24:43", "throughput": 3602.02, "total_tokens": 19552984}
|
| 6010 |
+
{"current_steps": 29975, "total_steps": 38160, "loss": 0.6384, "lr": 6.687486129410811e-06, "epoch": 15.710167714884696, "percentage": 78.55, "elapsed_time": "1:30:29", "remaining_time": "0:24:42", "throughput": 3601.9, "total_tokens": 19555448}
|
| 6011 |
+
{"current_steps": 29980, "total_steps": 38160, "loss": 0.4735, "lr": 6.679703967692322e-06, "epoch": 15.71278825995807, "percentage": 78.56, "elapsed_time": "1:30:30", "remaining_time": "0:24:41", "throughput": 3601.84, "total_tokens": 19558136}
|
| 6012 |
+
{"current_steps": 29985, "total_steps": 38160, "loss": 0.4058, "lr": 6.67192563837076e-06, "epoch": 15.715408805031446, "percentage": 78.58, "elapsed_time": "1:30:30", "remaining_time": "0:24:40", "throughput": 3601.8, "total_tokens": 19560824}
|
| 6013 |
+
{"current_steps": 29990, "total_steps": 38160, "loss": 0.3725, "lr": 6.664151143073258e-06, "epoch": 15.718029350104821, "percentage": 78.59, "elapsed_time": "1:30:31", "remaining_time": "0:24:39", "throughput": 3601.89, "total_tokens": 19564472}
|
| 6014 |
+
{"current_steps": 29995, "total_steps": 38160, "loss": 0.5922, "lr": 6.656380483426141e-06, "epoch": 15.720649895178196, "percentage": 78.6, "elapsed_time": "1:30:32", "remaining_time": "0:24:38", "throughput": 3601.92, "total_tokens": 19567960}
|
| 6015 |
+
{"current_steps": 30000, "total_steps": 38160, "loss": 0.4761, "lr": 6.648613661054956e-06, "epoch": 15.723270440251572, "percentage": 78.62, "elapsed_time": "1:30:33", "remaining_time": "0:24:37", "throughput": 3601.84, "total_tokens": 19570616}
|
| 6016 |
+
{"current_steps": 30005, "total_steps": 38160, "loss": 0.6293, "lr": 6.6408506775844256e-06, "epoch": 15.725890985324948, "percentage": 78.63, "elapsed_time": "1:30:34", "remaining_time": "0:24:37", "throughput": 3601.85, "total_tokens": 19573816}
|
| 6017 |
+
{"current_steps": 30010, "total_steps": 38160, "loss": 0.5287, "lr": 6.633091534638492e-06, "epoch": 15.728511530398324, "percentage": 78.64, "elapsed_time": "1:30:35", "remaining_time": "0:24:36", "throughput": 3601.93, "total_tokens": 19577272}
|
| 6018 |
+
{"current_steps": 30015, "total_steps": 38160, "loss": 0.3903, "lr": 6.625336233840257e-06, "epoch": 15.731132075471699, "percentage": 78.66, "elapsed_time": "1:30:36", "remaining_time": "0:24:35", "throughput": 3601.96, "total_tokens": 19580408}
|
| 6019 |
+
{"current_steps": 30020, "total_steps": 38160, "loss": 0.4584, "lr": 6.617584776812064e-06, "epoch": 15.733752620545074, "percentage": 78.67, "elapsed_time": "1:30:36", "remaining_time": "0:24:34", "throughput": 3601.95, "total_tokens": 19583640}
|
| 6020 |
+
{"current_steps": 30025, "total_steps": 38160, "loss": 0.436, "lr": 6.6098371651754085e-06, "epoch": 15.73637316561845, "percentage": 78.68, "elapsed_time": "1:30:37", "remaining_time": "0:24:33", "throughput": 3602.02, "total_tokens": 19587128}
|
| 6021 |
+
{"current_steps": 30030, "total_steps": 38160, "loss": 0.488, "lr": 6.602093400551012e-06, "epoch": 15.738993710691824, "percentage": 78.69, "elapsed_time": "1:30:38", "remaining_time": "0:24:32", "throughput": 3602.06, "total_tokens": 19590584}
|
| 6022 |
+
{"current_steps": 30035, "total_steps": 38160, "loss": 0.4739, "lr": 6.59435348455879e-06, "epoch": 15.7416142557652, "percentage": 78.71, "elapsed_time": "1:30:39", "remaining_time": "0:24:31", "throughput": 3602.15, "total_tokens": 19594264}
|
| 6023 |
+
{"current_steps": 30040, "total_steps": 38160, "loss": 0.538, "lr": 6.586617418817828e-06, "epoch": 15.744234800838575, "percentage": 78.72, "elapsed_time": "1:30:40", "remaining_time": "0:24:30", "throughput": 3602.07, "total_tokens": 19597176}
|
| 6024 |
+
{"current_steps": 30045, "total_steps": 38160, "loss": 0.5906, "lr": 6.578885204946439e-06, "epoch": 15.74685534591195, "percentage": 78.73, "elapsed_time": "1:30:41", "remaining_time": "0:24:29", "throughput": 3602.02, "total_tokens": 19599896}
|
| 6025 |
+
{"current_steps": 30050, "total_steps": 38160, "loss": 0.453, "lr": 6.571156844562098e-06, "epoch": 15.749475890985325, "percentage": 78.75, "elapsed_time": "1:30:42", "remaining_time": "0:24:28", "throughput": 3602.01, "total_tokens": 19602904}
|
| 6026 |
+
{"current_steps": 30055, "total_steps": 38160, "loss": 0.4918, "lr": 6.563432339281497e-06, "epoch": 15.7520964360587, "percentage": 78.76, "elapsed_time": "1:30:43", "remaining_time": "0:24:27", "throughput": 3602.13, "total_tokens": 19606456}
|
| 6027 |
+
{"current_steps": 30060, "total_steps": 38160, "loss": 0.3694, "lr": 6.555711690720517e-06, "epoch": 15.754716981132075, "percentage": 78.77, "elapsed_time": "1:30:43", "remaining_time": "0:24:26", "throughput": 3602.1, "total_tokens": 19609304}
|
| 6028 |
+
{"current_steps": 30065, "total_steps": 38160, "loss": 0.448, "lr": 6.547994900494234e-06, "epoch": 15.75733752620545, "percentage": 78.79, "elapsed_time": "1:30:44", "remaining_time": "0:24:25", "throughput": 3602.24, "total_tokens": 19613304}
|
| 6029 |
+
{"current_steps": 30070, "total_steps": 38160, "loss": 0.4928, "lr": 6.540281970216899e-06, "epoch": 15.759958071278826, "percentage": 78.8, "elapsed_time": "1:30:45", "remaining_time": "0:24:25", "throughput": 3602.34, "total_tokens": 19616952}
|
| 6030 |
+
{"current_steps": 30075, "total_steps": 38160, "loss": 0.4492, "lr": 6.5325729015019845e-06, "epoch": 15.7625786163522, "percentage": 78.81, "elapsed_time": "1:30:46", "remaining_time": "0:24:24", "throughput": 3602.38, "total_tokens": 19620056}
|
| 6031 |
+
{"current_steps": 30080, "total_steps": 38160, "loss": 0.4203, "lr": 6.524867695962122e-06, "epoch": 15.765199161425576, "percentage": 78.83, "elapsed_time": "1:30:47", "remaining_time": "0:24:23", "throughput": 3602.35, "total_tokens": 19623096}
|
| 6032 |
+
{"current_steps": 30085, "total_steps": 38160, "loss": 0.7025, "lr": 6.517166355209165e-06, "epoch": 15.767819706498951, "percentage": 78.84, "elapsed_time": "1:30:48", "remaining_time": "0:24:22", "throughput": 3602.21, "total_tokens": 19625624}
|
| 6033 |
+
{"current_steps": 30090, "total_steps": 38160, "loss": 0.5253, "lr": 6.50946888085415e-06, "epoch": 15.770440251572326, "percentage": 78.85, "elapsed_time": "1:30:49", "remaining_time": "0:24:21", "throughput": 3602.14, "total_tokens": 19628312}
|
| 6034 |
+
{"current_steps": 30095, "total_steps": 38160, "loss": 0.4087, "lr": 6.5017752745072855e-06, "epoch": 15.773060796645701, "percentage": 78.87, "elapsed_time": "1:30:50", "remaining_time": "0:24:20", "throughput": 3602.17, "total_tokens": 19632056}
|
| 6035 |
+
{"current_steps": 30100, "total_steps": 38160, "loss": 0.4748, "lr": 6.494085537777994e-06, "epoch": 15.775681341719078, "percentage": 78.88, "elapsed_time": "1:30:51", "remaining_time": "0:24:19", "throughput": 3602.33, "total_tokens": 19637560}
|
| 6036 |
+
{"current_steps": 30105, "total_steps": 38160, "loss": 0.4875, "lr": 6.48639967227489e-06, "epoch": 15.778301886792454, "percentage": 78.89, "elapsed_time": "1:30:52", "remaining_time": "0:24:18", "throughput": 3602.33, "total_tokens": 19640600}
|
| 6037 |
+
{"current_steps": 30110, "total_steps": 38160, "loss": 0.4653, "lr": 6.47871767960575e-06, "epoch": 15.780922431865829, "percentage": 78.9, "elapsed_time": "1:30:53", "remaining_time": "0:24:17", "throughput": 3602.3, "total_tokens": 19643608}
|
| 6038 |
+
{"current_steps": 30115, "total_steps": 38160, "loss": 0.5786, "lr": 6.471039561377581e-06, "epoch": 15.783542976939204, "percentage": 78.92, "elapsed_time": "1:30:53", "remaining_time": "0:24:16", "throughput": 3602.23, "total_tokens": 19646040}
|
| 6039 |
+
{"current_steps": 30120, "total_steps": 38160, "loss": 0.5565, "lr": 6.463365319196538e-06, "epoch": 15.786163522012579, "percentage": 78.93, "elapsed_time": "1:30:54", "remaining_time": "0:24:16", "throughput": 3602.16, "total_tokens": 19648600}
|
| 6040 |
+
{"current_steps": 30125, "total_steps": 38160, "loss": 0.3523, "lr": 6.4556949546679905e-06, "epoch": 15.788784067085954, "percentage": 78.94, "elapsed_time": "1:30:55", "remaining_time": "0:24:15", "throughput": 3602.22, "total_tokens": 19652280}
|
| 6041 |
+
{"current_steps": 30130, "total_steps": 38160, "loss": 0.51, "lr": 6.448028469396497e-06, "epoch": 15.79140461215933, "percentage": 78.96, "elapsed_time": "1:30:56", "remaining_time": "0:24:14", "throughput": 3602.25, "total_tokens": 19655736}
|
| 6042 |
+
{"current_steps": 30135, "total_steps": 38160, "loss": 0.535, "lr": 6.440365864985801e-06, "epoch": 15.794025157232705, "percentage": 78.97, "elapsed_time": "1:30:57", "remaining_time": "0:24:13", "throughput": 3602.12, "total_tokens": 19658456}
|
| 6043 |
+
{"current_steps": 30140, "total_steps": 38160, "loss": 0.4436, "lr": 6.432707143038818e-06, "epoch": 15.79664570230608, "percentage": 78.98, "elapsed_time": "1:30:58", "remaining_time": "0:24:12", "throughput": 3602.13, "total_tokens": 19661688}
|
| 6044 |
+
{"current_steps": 30145, "total_steps": 38160, "loss": 0.493, "lr": 6.4250523051576834e-06, "epoch": 15.799266247379455, "percentage": 79.0, "elapsed_time": "1:30:59", "remaining_time": "0:24:11", "throughput": 3602.13, "total_tokens": 19665400}
|
| 6045 |
+
{"current_steps": 30150, "total_steps": 38160, "loss": 0.4442, "lr": 6.417401352943686e-06, "epoch": 15.80188679245283, "percentage": 79.01, "elapsed_time": "1:31:00", "remaining_time": "0:24:10", "throughput": 3602.09, "total_tokens": 19668472}
|
| 6046 |
+
{"current_steps": 30155, "total_steps": 38160, "loss": 0.3789, "lr": 6.409754287997322e-06, "epoch": 15.804507337526205, "percentage": 79.02, "elapsed_time": "1:31:01", "remaining_time": "0:24:09", "throughput": 3602.16, "total_tokens": 19672088}
|
| 6047 |
+
{"current_steps": 30160, "total_steps": 38160, "loss": 0.4447, "lr": 6.40211111191828e-06, "epoch": 15.80712788259958, "percentage": 79.04, "elapsed_time": "1:31:02", "remaining_time": "0:24:08", "throughput": 3602.22, "total_tokens": 19675864}
|
| 6048 |
+
{"current_steps": 30165, "total_steps": 38160, "loss": 0.414, "lr": 6.394471826305409e-06, "epoch": 15.809748427672956, "percentage": 79.05, "elapsed_time": "1:31:03", "remaining_time": "0:24:07", "throughput": 3602.32, "total_tokens": 19680216}
|
| 6049 |
+
{"current_steps": 30170, "total_steps": 38160, "loss": 0.6131, "lr": 6.3868364327567795e-06, "epoch": 15.81236897274633, "percentage": 79.06, "elapsed_time": "1:31:04", "remaining_time": "0:24:07", "throughput": 3602.38, "total_tokens": 19683480}
|
| 6050 |
+
{"current_steps": 30175, "total_steps": 38160, "loss": 0.5721, "lr": 6.379204932869606e-06, "epoch": 15.814989517819706, "percentage": 79.07, "elapsed_time": "1:31:04", "remaining_time": "0:24:06", "throughput": 3602.48, "total_tokens": 19687160}
|
| 6051 |
+
{"current_steps": 30180, "total_steps": 38160, "loss": 0.569, "lr": 6.371577328240327e-06, "epoch": 15.817610062893081, "percentage": 79.09, "elapsed_time": "1:31:05", "remaining_time": "0:24:05", "throughput": 3602.46, "total_tokens": 19690424}
|
| 6052 |
+
{"current_steps": 30185, "total_steps": 38160, "loss": 0.4772, "lr": 6.363953620464547e-06, "epoch": 15.820230607966456, "percentage": 79.1, "elapsed_time": "1:31:06", "remaining_time": "0:24:04", "throughput": 3602.47, "total_tokens": 19693656}
|
| 6053 |
+
{"current_steps": 30190, "total_steps": 38160, "loss": 0.615, "lr": 6.356333811137064e-06, "epoch": 15.822851153039831, "percentage": 79.11, "elapsed_time": "1:31:07", "remaining_time": "0:24:03", "throughput": 3602.47, "total_tokens": 19696920}
|
| 6054 |
+
{"current_steps": 30195, "total_steps": 38160, "loss": 0.3708, "lr": 6.34871790185185e-06, "epoch": 15.825471698113208, "percentage": 79.13, "elapsed_time": "1:31:08", "remaining_time": "0:24:02", "throughput": 3602.47, "total_tokens": 19700088}
|
| 6055 |
+
{"current_steps": 30200, "total_steps": 38160, "loss": 0.4524, "lr": 6.341105894202057e-06, "epoch": 15.828092243186584, "percentage": 79.14, "elapsed_time": "1:31:09", "remaining_time": "0:24:01", "throughput": 3602.52, "total_tokens": 19703320}
|
| 6056 |
+
{"current_steps": 30205, "total_steps": 38160, "loss": 0.5069, "lr": 6.333497789780041e-06, "epoch": 15.830712788259959, "percentage": 79.15, "elapsed_time": "1:31:10", "remaining_time": "0:24:00", "throughput": 3602.5, "total_tokens": 19706168}
|
| 6057 |
+
{"current_steps": 30210, "total_steps": 38160, "loss": 0.3864, "lr": 6.325893590177329e-06, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "1:31:11", "remaining_time": "0:23:59", "throughput": 3602.4, "total_tokens": 19708984}
|
| 6058 |
+
{"current_steps": 30215, "total_steps": 38160, "loss": 0.4971, "lr": 6.318293296984631e-06, "epoch": 15.835953878406709, "percentage": 79.18, "elapsed_time": "1:31:11", "remaining_time": "0:23:58", "throughput": 3602.39, "total_tokens": 19711928}
|
| 6059 |
+
{"current_steps": 30220, "total_steps": 38160, "loss": 0.467, "lr": 6.3106969117918495e-06, "epoch": 15.838574423480084, "percentage": 79.19, "elapsed_time": "1:31:12", "remaining_time": "0:23:57", "throughput": 3602.46, "total_tokens": 19715640}
|
| 6060 |
+
{"current_steps": 30225, "total_steps": 38160, "loss": 0.444, "lr": 6.303104436188057e-06, "epoch": 15.84119496855346, "percentage": 79.21, "elapsed_time": "1:31:13", "remaining_time": "0:23:57", "throughput": 3602.46, "total_tokens": 19718616}
|
| 6061 |
+
{"current_steps": 30230, "total_steps": 38160, "loss": 0.4401, "lr": 6.2955158717615036e-06, "epoch": 15.843815513626835, "percentage": 79.22, "elapsed_time": "1:31:14", "remaining_time": "0:23:56", "throughput": 3602.42, "total_tokens": 19721688}
|
| 6062 |
+
{"current_steps": 30235, "total_steps": 38160, "loss": 0.483, "lr": 6.287931220099638e-06, "epoch": 15.84643605870021, "percentage": 79.23, "elapsed_time": "1:31:15", "remaining_time": "0:23:55", "throughput": 3602.47, "total_tokens": 19725272}
|
| 6063 |
+
{"current_steps": 30240, "total_steps": 38160, "loss": 0.3995, "lr": 6.280350482789082e-06, "epoch": 15.849056603773585, "percentage": 79.25, "elapsed_time": "1:31:16", "remaining_time": "0:23:54", "throughput": 3602.42, "total_tokens": 19727960}
|
| 6064 |
+
{"current_steps": 30245, "total_steps": 38160, "loss": 0.5543, "lr": 6.272773661415645e-06, "epoch": 15.85167714884696, "percentage": 79.26, "elapsed_time": "1:31:17", "remaining_time": "0:23:53", "throughput": 3602.37, "total_tokens": 19730904}
|
| 6065 |
+
{"current_steps": 30250, "total_steps": 38160, "loss": 0.5555, "lr": 6.26520075756431e-06, "epoch": 15.854297693920335, "percentage": 79.27, "elapsed_time": "1:31:18", "remaining_time": "0:23:52", "throughput": 3602.47, "total_tokens": 19734552}
|
| 6066 |
+
{"current_steps": 30255, "total_steps": 38160, "loss": 0.4107, "lr": 6.2576317728192304e-06, "epoch": 15.85691823899371, "percentage": 79.28, "elapsed_time": "1:31:18", "remaining_time": "0:23:51", "throughput": 3602.44, "total_tokens": 19737400}
|
| 6067 |
+
{"current_steps": 30260, "total_steps": 38160, "loss": 0.434, "lr": 6.250066708763761e-06, "epoch": 15.859538784067086, "percentage": 79.3, "elapsed_time": "1:31:19", "remaining_time": "0:23:50", "throughput": 3602.51, "total_tokens": 19740792}
|
| 6068 |
+
{"current_steps": 30265, "total_steps": 38160, "loss": 0.5105, "lr": 6.242505566980422e-06, "epoch": 15.86215932914046, "percentage": 79.31, "elapsed_time": "1:31:20", "remaining_time": "0:23:49", "throughput": 3602.59, "total_tokens": 19744248}
|
| 6069 |
+
{"current_steps": 30270, "total_steps": 38160, "loss": 0.3727, "lr": 6.234948349050931e-06, "epoch": 15.864779874213836, "percentage": 79.32, "elapsed_time": "1:31:21", "remaining_time": "0:23:48", "throughput": 3602.49, "total_tokens": 19746744}
|
| 6070 |
+
{"current_steps": 30275, "total_steps": 38160, "loss": 0.4765, "lr": 6.227395056556162e-06, "epoch": 15.867400419287211, "percentage": 79.34, "elapsed_time": "1:31:22", "remaining_time": "0:23:47", "throughput": 3602.49, "total_tokens": 19749688}
|
| 6071 |
+
{"current_steps": 30280, "total_steps": 38160, "loss": 0.401, "lr": 6.219845691076173e-06, "epoch": 15.870020964360586, "percentage": 79.35, "elapsed_time": "1:31:23", "remaining_time": "0:23:46", "throughput": 3602.59, "total_tokens": 19753208}
|
| 6072 |
+
{"current_steps": 30285, "total_steps": 38160, "loss": 0.5554, "lr": 6.212300254190206e-06, "epoch": 15.872641509433961, "percentage": 79.36, "elapsed_time": "1:31:23", "remaining_time": "0:23:45", "throughput": 3602.67, "total_tokens": 19756568}
|
| 6073 |
+
{"current_steps": 30290, "total_steps": 38160, "loss": 0.4656, "lr": 6.204758747476688e-06, "epoch": 15.875262054507338, "percentage": 79.38, "elapsed_time": "1:31:24", "remaining_time": "0:23:45", "throughput": 3602.65, "total_tokens": 19759576}
|
| 6074 |
+
{"current_steps": 30295, "total_steps": 38160, "loss": 0.3664, "lr": 6.1972211725132095e-06, "epoch": 15.877882599580714, "percentage": 79.39, "elapsed_time": "1:31:25", "remaining_time": "0:23:44", "throughput": 3602.8, "total_tokens": 19763608}
|
| 6075 |
+
{"current_steps": 30300, "total_steps": 38160, "loss": 0.6658, "lr": 6.189687530876559e-06, "epoch": 15.880503144654089, "percentage": 79.4, "elapsed_time": "1:31:26", "remaining_time": "0:23:43", "throughput": 3602.86, "total_tokens": 19766808}
|
| 6076 |
+
{"current_steps": 30305, "total_steps": 38160, "loss": 0.3446, "lr": 6.182157824142676e-06, "epoch": 15.883123689727464, "percentage": 79.42, "elapsed_time": "1:31:27", "remaining_time": "0:23:42", "throughput": 3602.83, "total_tokens": 19769752}
|
| 6077 |
+
{"current_steps": 30310, "total_steps": 38160, "loss": 0.6795, "lr": 6.174632053886681e-06, "epoch": 15.885744234800839, "percentage": 79.43, "elapsed_time": "1:31:28", "remaining_time": "0:23:41", "throughput": 3602.87, "total_tokens": 19772920}
|
| 6078 |
+
{"current_steps": 30315, "total_steps": 38160, "loss": 0.4636, "lr": 6.167110221682893e-06, "epoch": 15.888364779874214, "percentage": 79.44, "elapsed_time": "1:31:28", "remaining_time": "0:23:40", "throughput": 3602.83, "total_tokens": 19775512}
|
| 6079 |
+
{"current_steps": 30320, "total_steps": 38160, "loss": 0.5671, "lr": 6.159592329104788e-06, "epoch": 15.89098532494759, "percentage": 79.45, "elapsed_time": "1:31:29", "remaining_time": "0:23:39", "throughput": 3602.9, "total_tokens": 19778968}
|
| 6080 |
+
{"current_steps": 30325, "total_steps": 38160, "loss": 0.426, "lr": 6.152078377725032e-06, "epoch": 15.893605870020965, "percentage": 79.47, "elapsed_time": "1:31:30", "remaining_time": "0:23:38", "throughput": 3602.85, "total_tokens": 19781656}
|
| 6081 |
+
{"current_steps": 30330, "total_steps": 38160, "loss": 0.5325, "lr": 6.144568369115453e-06, "epoch": 15.89622641509434, "percentage": 79.48, "elapsed_time": "1:31:31", "remaining_time": "0:23:37", "throughput": 3602.92, "total_tokens": 19785112}
|
| 6082 |
+
{"current_steps": 30335, "total_steps": 38160, "loss": 0.5056, "lr": 6.137062304847046e-06, "epoch": 15.898846960167715, "percentage": 79.49, "elapsed_time": "1:31:32", "remaining_time": "0:23:36", "throughput": 3602.93, "total_tokens": 19788088}
|
| 6083 |
+
{"current_steps": 30340, "total_steps": 38160, "loss": 0.3467, "lr": 6.129560186490008e-06, "epoch": 15.90146750524109, "percentage": 79.51, "elapsed_time": "1:31:33", "remaining_time": "0:23:35", "throughput": 3602.98, "total_tokens": 19791352}
|
| 6084 |
+
{"current_steps": 30345, "total_steps": 38160, "loss": 0.4621, "lr": 6.122062015613694e-06, "epoch": 15.904088050314465, "percentage": 79.52, "elapsed_time": "1:31:34", "remaining_time": "0:23:34", "throughput": 3603.06, "total_tokens": 19795288}
|
| 6085 |
+
{"current_steps": 30350, "total_steps": 38160, "loss": 0.5108, "lr": 6.114567793786641e-06, "epoch": 15.90670859538784, "percentage": 79.53, "elapsed_time": "1:31:34", "remaining_time": "0:23:33", "throughput": 3603.07, "total_tokens": 19798328}
|
| 6086 |
+
{"current_steps": 30355, "total_steps": 38160, "loss": 0.3631, "lr": 6.107077522576543e-06, "epoch": 15.909329140461216, "percentage": 79.55, "elapsed_time": "1:31:35", "remaining_time": "0:23:33", "throughput": 3603.13, "total_tokens": 19801432}
|
| 6087 |
+
{"current_steps": 30360, "total_steps": 38160, "loss": 0.568, "lr": 6.099591203550292e-06, "epoch": 15.91194968553459, "percentage": 79.56, "elapsed_time": "1:31:36", "remaining_time": "0:23:32", "throughput": 3603.11, "total_tokens": 19804280}
|
| 6088 |
+
{"current_steps": 30365, "total_steps": 38160, "loss": 0.4187, "lr": 6.092108838273927e-06, "epoch": 15.914570230607966, "percentage": 79.57, "elapsed_time": "1:31:37", "remaining_time": "0:23:31", "throughput": 3603.17, "total_tokens": 19807768}
|
| 6089 |
+
{"current_steps": 30370, "total_steps": 38160, "loss": 0.4557, "lr": 6.08463042831268e-06, "epoch": 15.917190775681341, "percentage": 79.59, "elapsed_time": "1:31:38", "remaining_time": "0:23:30", "throughput": 3603.17, "total_tokens": 19810616}
|
| 6090 |
+
{"current_steps": 30375, "total_steps": 38160, "loss": 0.4015, "lr": 6.0771559752309496e-06, "epoch": 15.919811320754716, "percentage": 79.6, "elapsed_time": "1:31:39", "remaining_time": "0:23:29", "throughput": 3603.3, "total_tokens": 19816472}
|
| 6091 |
+
{"current_steps": 30380, "total_steps": 38160, "loss": 0.4054, "lr": 6.069685480592313e-06, "epoch": 15.922431865828091, "percentage": 79.61, "elapsed_time": "1:31:40", "remaining_time": "0:23:28", "throughput": 3603.33, "total_tokens": 19819928}
|
| 6092 |
+
{"current_steps": 30385, "total_steps": 38160, "loss": 0.4584, "lr": 6.062218945959497e-06, "epoch": 15.925052410901468, "percentage": 79.63, "elapsed_time": "1:31:41", "remaining_time": "0:23:27", "throughput": 3603.44, "total_tokens": 19823704}
|
| 6093 |
+
{"current_steps": 30390, "total_steps": 38160, "loss": 0.5188, "lr": 6.054756372894435e-06, "epoch": 15.927672955974844, "percentage": 79.64, "elapsed_time": "1:31:42", "remaining_time": "0:23:26", "throughput": 3603.44, "total_tokens": 19826456}
|
| 6094 |
+
{"current_steps": 30395, "total_steps": 38160, "loss": 0.4431, "lr": 6.04729776295819e-06, "epoch": 15.930293501048219, "percentage": 79.65, "elapsed_time": "1:31:42", "remaining_time": "0:23:25", "throughput": 3603.51, "total_tokens": 19829848}
|
| 6095 |
+
{"current_steps": 30400, "total_steps": 38160, "loss": 0.4965, "lr": 6.0398431177110306e-06, "epoch": 15.932914046121594, "percentage": 79.66, "elapsed_time": "1:31:43", "remaining_time": "0:23:24", "throughput": 3603.5, "total_tokens": 19832856}
|
| 6096 |
+
{"current_steps": 30405, "total_steps": 38160, "loss": 0.4197, "lr": 6.032392438712389e-06, "epoch": 15.935534591194969, "percentage": 79.68, "elapsed_time": "1:31:44", "remaining_time": "0:23:23", "throughput": 3603.49, "total_tokens": 19835704}
|
| 6097 |
+
{"current_steps": 30410, "total_steps": 38160, "loss": 0.4461, "lr": 6.024945727520847e-06, "epoch": 15.938155136268344, "percentage": 79.69, "elapsed_time": "1:31:45", "remaining_time": "0:23:23", "throughput": 3603.57, "total_tokens": 19839352}
|
| 6098 |
+
{"current_steps": 30415, "total_steps": 38160, "loss": 0.4668, "lr": 6.01750298569419e-06, "epoch": 15.94077568134172, "percentage": 79.7, "elapsed_time": "1:31:46", "remaining_time": "0:23:22", "throughput": 3603.62, "total_tokens": 19842840}
|
| 6099 |
+
{"current_steps": 30420, "total_steps": 38160, "loss": 0.4105, "lr": 6.010064214789335e-06, "epoch": 15.943396226415095, "percentage": 79.72, "elapsed_time": "1:31:47", "remaining_time": "0:23:21", "throughput": 3603.74, "total_tokens": 19846808}
|
| 6100 |
+
{"current_steps": 30425, "total_steps": 38160, "loss": 0.4568, "lr": 6.002629416362399e-06, "epoch": 15.94601677148847, "percentage": 79.73, "elapsed_time": "1:31:48", "remaining_time": "0:23:20", "throughput": 3603.78, "total_tokens": 19849816}
|
| 6101 |
+
{"current_steps": 30430, "total_steps": 38160, "loss": 0.3543, "lr": 5.995198591968662e-06, "epoch": 15.948637316561845, "percentage": 79.74, "elapsed_time": "1:31:48", "remaining_time": "0:23:19", "throughput": 3603.78, "total_tokens": 19852568}
|
| 6102 |
+
{"current_steps": 30435, "total_steps": 38160, "loss": 0.3172, "lr": 5.987771743162554e-06, "epoch": 15.95125786163522, "percentage": 79.76, "elapsed_time": "1:31:50", "remaining_time": "0:23:18", "throughput": 3603.98, "total_tokens": 19857976}
|
| 6103 |
+
{"current_steps": 30440, "total_steps": 38160, "loss": 0.4962, "lr": 5.9803488714976955e-06, "epoch": 15.953878406708595, "percentage": 79.77, "elapsed_time": "1:31:50", "remaining_time": "0:23:17", "throughput": 3604.04, "total_tokens": 19861432}
|
| 6104 |
+
{"current_steps": 30445, "total_steps": 38160, "loss": 0.5323, "lr": 5.972929978526872e-06, "epoch": 15.95649895178197, "percentage": 79.78, "elapsed_time": "1:31:51", "remaining_time": "0:23:16", "throughput": 3604.12, "total_tokens": 19865144}
|
| 6105 |
+
{"current_steps": 30450, "total_steps": 38160, "loss": 0.4145, "lr": 5.965515065802019e-06, "epoch": 15.959119496855346, "percentage": 79.8, "elapsed_time": "1:31:52", "remaining_time": "0:23:15", "throughput": 3604.05, "total_tokens": 19867928}
|
| 6106 |
+
{"current_steps": 30455, "total_steps": 38160, "loss": 0.4547, "lr": 5.958104134874254e-06, "epoch": 15.96174004192872, "percentage": 79.81, "elapsed_time": "1:31:53", "remaining_time": "0:23:14", "throughput": 3603.98, "total_tokens": 19870456}
|
| 6107 |
+
{"current_steps": 30460, "total_steps": 38160, "loss": 0.4334, "lr": 5.950697187293872e-06, "epoch": 15.964360587002096, "percentage": 79.82, "elapsed_time": "1:31:54", "remaining_time": "0:23:13", "throughput": 3603.83, "total_tokens": 19872824}
|
| 6108 |
+
{"current_steps": 30465, "total_steps": 38160, "loss": 0.3867, "lr": 5.943294224610305e-06, "epoch": 15.966981132075471, "percentage": 79.83, "elapsed_time": "1:31:55", "remaining_time": "0:23:13", "throughput": 3603.68, "total_tokens": 19875256}
|
| 6109 |
+
{"current_steps": 30470, "total_steps": 38160, "loss": 0.5586, "lr": 5.935895248372175e-06, "epoch": 15.969601677148846, "percentage": 79.85, "elapsed_time": "1:31:56", "remaining_time": "0:23:12", "throughput": 3603.7, "total_tokens": 19878328}
|
| 6110 |
+
{"current_steps": 30475, "total_steps": 38160, "loss": 0.5539, "lr": 5.928500260127273e-06, "epoch": 15.972222222222221, "percentage": 79.86, "elapsed_time": "1:31:56", "remaining_time": "0:23:11", "throughput": 3603.74, "total_tokens": 19881784}
|
| 6111 |
+
{"current_steps": 30480, "total_steps": 38160, "loss": 0.5036, "lr": 5.921109261422531e-06, "epoch": 15.974842767295598, "percentage": 79.87, "elapsed_time": "1:31:57", "remaining_time": "0:23:10", "throughput": 3603.76, "total_tokens": 19885144}
|
| 6112 |
+
{"current_steps": 30485, "total_steps": 38160, "loss": 0.5554, "lr": 5.913722253804071e-06, "epoch": 15.977463312368974, "percentage": 79.89, "elapsed_time": "1:31:58", "remaining_time": "0:23:09", "throughput": 3603.74, "total_tokens": 19888120}
|
| 6113 |
+
{"current_steps": 30490, "total_steps": 38160, "loss": 0.5611, "lr": 5.906339238817163e-06, "epoch": 15.980083857442349, "percentage": 79.9, "elapsed_time": "1:31:59", "remaining_time": "0:23:08", "throughput": 3603.74, "total_tokens": 19891352}
|
| 6114 |
+
{"current_steps": 30495, "total_steps": 38160, "loss": 0.558, "lr": 5.8989602180062554e-06, "epoch": 15.982704402515724, "percentage": 79.91, "elapsed_time": "1:32:00", "remaining_time": "0:23:07", "throughput": 3603.79, "total_tokens": 19894872}
|
| 6115 |
+
{"current_steps": 30500, "total_steps": 38160, "loss": 0.4288, "lr": 5.891585192914953e-06, "epoch": 15.985324947589099, "percentage": 79.93, "elapsed_time": "1:32:01", "remaining_time": "0:23:06", "throughput": 3603.91, "total_tokens": 19898904}
|
| 6116 |
+
{"current_steps": 30505, "total_steps": 38160, "loss": 0.519, "lr": 5.884214165086036e-06, "epoch": 15.987945492662474, "percentage": 79.94, "elapsed_time": "1:32:02", "remaining_time": "0:23:05", "throughput": 3604.03, "total_tokens": 19902840}
|
| 6117 |
+
{"current_steps": 30510, "total_steps": 38160, "loss": 0.4166, "lr": 5.876847136061428e-06, "epoch": 15.99056603773585, "percentage": 79.95, "elapsed_time": "1:32:03", "remaining_time": "0:23:04", "throughput": 3604.09, "total_tokens": 19906168}
|
| 6118 |
+
{"current_steps": 30515, "total_steps": 38160, "loss": 0.7079, "lr": 5.869484107382228e-06, "epoch": 15.993186582809225, "percentage": 79.97, "elapsed_time": "1:32:04", "remaining_time": "0:23:03", "throughput": 3604.03, "total_tokens": 19908920}
|
| 6119 |
+
{"current_steps": 30520, "total_steps": 38160, "loss": 0.5732, "lr": 5.862125080588696e-06, "epoch": 15.9958071278826, "percentage": 79.98, "elapsed_time": "1:32:04", "remaining_time": "0:23:03", "throughput": 3604.07, "total_tokens": 19912216}
|
| 6120 |
+
{"current_steps": 30525, "total_steps": 38160, "loss": 0.5009, "lr": 5.854770057220263e-06, "epoch": 15.998427672955975, "percentage": 79.99, "elapsed_time": "1:32:05", "remaining_time": "0:23:02", "throughput": 3604.03, "total_tokens": 19914904}
|
| 6121 |
+
{"current_steps": 30528, "total_steps": 38160, "eval_loss": 0.4689185619354248, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "1:32:16", "remaining_time": "0:23:04", "throughput": 3597.37, "total_tokens": 19916008}
|
| 6122 |
+
{"current_steps": 30530, "total_steps": 38160, "loss": 0.5205, "lr": 5.8474190388155216e-06, "epoch": 16.00104821802935, "percentage": 80.01, "elapsed_time": "1:32:18", "remaining_time": "0:23:04", "throughput": 3596.3, "total_tokens": 19917032}
|
| 6123 |
+
{"current_steps": 30535, "total_steps": 38160, "loss": 0.6343, "lr": 5.840072026912205e-06, "epoch": 16.003668763102727, "percentage": 80.02, "elapsed_time": "1:32:19", "remaining_time": "0:23:03", "throughput": 3596.29, "total_tokens": 19919976}
|
| 6124 |
+
{"current_steps": 30540, "total_steps": 38160, "loss": 0.4998, "lr": 5.83272902304724e-06, "epoch": 16.0062893081761, "percentage": 80.03, "elapsed_time": "1:32:19", "remaining_time": "0:23:02", "throughput": 3596.09, "total_tokens": 19922312}
|