rbelanec commited on
Commit
e1248fe
verified
1 Parent(s): 47b6e0b

Training in progress, step 12940

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +129 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d834435a5774f261eff8e00b29084e06208e0e69547dc2b69ac0d89a18b251b
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5deb7db2ca033fe4e5d66be880c3b5cbee1b796617bf40daf21c9952d42fa8f2
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -2478,3 +2478,132 @@
2478
  {"current_steps": 12295, "total_steps": 12940, "loss": 0.2858, "lr": 3.7863599923442516e-07, "epoch": 9.501545595054095, "percentage": 95.02, "elapsed_time": "0:41:24", "remaining_time": "0:02:10", "throughput": 1669.79, "total_tokens": 4148032}
2479
  {"current_steps": 12300, "total_steps": 12940, "loss": 0.3949, "lr": 3.728119986970147e-07, "epoch": 9.505409582689335, "percentage": 95.05, "elapsed_time": "0:41:25", "remaining_time": "0:02:09", "throughput": 1669.85, "total_tokens": 4149664}
2480
  {"current_steps": 12305, "total_steps": 12940, "loss": 0.3249, "lr": 3.670328005248663e-07, "epoch": 9.509273570324575, "percentage": 95.09, "elapsed_time": "0:41:25", "remaining_time": "0:02:08", "throughput": 1669.9, "total_tokens": 4151328}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2478
  {"current_steps": 12295, "total_steps": 12940, "loss": 0.2858, "lr": 3.7863599923442516e-07, "epoch": 9.501545595054095, "percentage": 95.02, "elapsed_time": "0:41:24", "remaining_time": "0:02:10", "throughput": 1669.79, "total_tokens": 4148032}
2479
  {"current_steps": 12300, "total_steps": 12940, "loss": 0.3949, "lr": 3.728119986970147e-07, "epoch": 9.505409582689335, "percentage": 95.05, "elapsed_time": "0:41:25", "remaining_time": "0:02:09", "throughput": 1669.85, "total_tokens": 4149664}
2480
  {"current_steps": 12305, "total_steps": 12940, "loss": 0.3249, "lr": 3.670328005248663e-07, "epoch": 9.509273570324575, "percentage": 95.09, "elapsed_time": "0:41:25", "remaining_time": "0:02:08", "throughput": 1669.9, "total_tokens": 4151328}
2481
+ {"current_steps": 12310, "total_steps": 12940, "loss": 0.3484, "lr": 3.612984152316368e-07, "epoch": 9.513137557959814, "percentage": 95.13, "elapsed_time": "0:41:26", "remaining_time": "0:02:07", "throughput": 1669.95, "total_tokens": 4152928}
2482
+ {"current_steps": 12315, "total_steps": 12940, "loss": 0.5367, "lr": 3.5560885324945614e-07, "epoch": 9.517001545595054, "percentage": 95.17, "elapsed_time": "0:41:27", "remaining_time": "0:02:06", "throughput": 1670.02, "total_tokens": 4154656}
2483
+ {"current_steps": 12320, "total_steps": 12940, "loss": 0.3529, "lr": 3.499641249289087e-07, "epoch": 9.520865533230294, "percentage": 95.21, "elapsed_time": "0:41:28", "remaining_time": "0:02:05", "throughput": 1670.11, "total_tokens": 4156384}
2484
+ {"current_steps": 12325, "total_steps": 12940, "loss": 0.3302, "lr": 3.443642405390191e-07, "epoch": 9.524729520865533, "percentage": 95.25, "elapsed_time": "0:41:29", "remaining_time": "0:02:04", "throughput": 1670.19, "total_tokens": 4158112}
2485
+ {"current_steps": 12330, "total_steps": 12940, "loss": 0.3465, "lr": 3.3880921026723243e-07, "epoch": 9.528593508500773, "percentage": 95.29, "elapsed_time": "0:41:30", "remaining_time": "0:02:03", "throughput": 1670.27, "total_tokens": 4159808}
2486
+ {"current_steps": 12335, "total_steps": 12940, "loss": 0.3361, "lr": 3.332990442193901e-07, "epoch": 9.532457496136013, "percentage": 95.32, "elapsed_time": "0:41:31", "remaining_time": "0:02:02", "throughput": 1670.36, "total_tokens": 4161568}
2487
+ {"current_steps": 12340, "total_steps": 12940, "loss": 0.3253, "lr": 3.2783375241971223e-07, "epoch": 9.536321483771252, "percentage": 95.36, "elapsed_time": "0:41:32", "remaining_time": "0:02:01", "throughput": 1670.41, "total_tokens": 4163200}
2488
+ {"current_steps": 12345, "total_steps": 12940, "loss": 0.4767, "lr": 3.2241334481079576e-07, "epoch": 9.540185471406492, "percentage": 95.4, "elapsed_time": "0:41:33", "remaining_time": "0:02:00", "throughput": 1670.39, "total_tokens": 4164544}
2489
+ {"current_steps": 12350, "total_steps": 12940, "loss": 0.3247, "lr": 3.170378312535693e-07, "epoch": 9.54404945904173, "percentage": 95.44, "elapsed_time": "0:41:34", "remaining_time": "0:01:59", "throughput": 1670.5, "total_tokens": 4166368}
2490
+ {"current_steps": 12355, "total_steps": 12940, "loss": 0.4064, "lr": 3.1170722152729925e-07, "epoch": 9.54791344667697, "percentage": 95.48, "elapsed_time": "0:41:35", "remaining_time": "0:01:58", "throughput": 1670.58, "total_tokens": 4168160}
2491
+ {"current_steps": 12360, "total_steps": 12940, "loss": 0.4454, "lr": 3.064215253295505e-07, "epoch": 9.55177743431221, "percentage": 95.52, "elapsed_time": "0:41:35", "remaining_time": "0:01:57", "throughput": 1670.63, "total_tokens": 4169760}
2492
+ {"current_steps": 12365, "total_steps": 12940, "loss": 0.3249, "lr": 3.011807522761922e-07, "epoch": 9.555641421947449, "percentage": 95.56, "elapsed_time": "0:41:36", "remaining_time": "0:01:56", "throughput": 1670.61, "total_tokens": 4171072}
2493
+ {"current_steps": 12370, "total_steps": 12940, "loss": 0.3822, "lr": 2.9598491190136467e-07, "epoch": 9.55950540958269, "percentage": 95.6, "elapsed_time": "0:41:37", "remaining_time": "0:01:55", "throughput": 1670.7, "total_tokens": 4172736}
2494
+ {"current_steps": 12375, "total_steps": 12940, "loss": 0.3543, "lr": 2.908340136574594e-07, "epoch": 9.56336939721793, "percentage": 95.63, "elapsed_time": "0:41:38", "remaining_time": "0:01:54", "throughput": 1670.73, "total_tokens": 4174240}
2495
+ {"current_steps": 12380, "total_steps": 12940, "loss": 0.4579, "lr": 2.857280669151141e-07, "epoch": 9.567233384853168, "percentage": 95.67, "elapsed_time": "0:41:39", "remaining_time": "0:01:53", "throughput": 1670.88, "total_tokens": 4176224}
2496
+ {"current_steps": 12385, "total_steps": 12940, "loss": 0.4261, "lr": 2.8066708096319005e-07, "epoch": 9.571097372488408, "percentage": 95.71, "elapsed_time": "0:41:40", "remaining_time": "0:01:52", "throughput": 1670.92, "total_tokens": 4177792}
2497
+ {"current_steps": 12390, "total_steps": 12940, "loss": 0.5223, "lr": 2.756510650087557e-07, "epoch": 9.574961360123648, "percentage": 95.75, "elapsed_time": "0:41:41", "remaining_time": "0:01:51", "throughput": 1671.04, "total_tokens": 4179744}
2498
+ {"current_steps": 12395, "total_steps": 12940, "loss": 0.4787, "lr": 2.706800281770588e-07, "epoch": 9.578825347758887, "percentage": 95.79, "elapsed_time": "0:41:42", "remaining_time": "0:01:50", "throughput": 1671.11, "total_tokens": 4181376}
2499
+ {"current_steps": 12400, "total_steps": 12940, "loss": 0.4241, "lr": 2.6575397951153213e-07, "epoch": 9.582689335394127, "percentage": 95.83, "elapsed_time": "0:41:43", "remaining_time": "0:01:49", "throughput": 1671.2, "total_tokens": 4183136}
2500
+ {"current_steps": 12405, "total_steps": 12940, "loss": 0.5363, "lr": 2.6087292797375983e-07, "epoch": 9.586553323029367, "percentage": 95.87, "elapsed_time": "0:41:43", "remaining_time": "0:01:47", "throughput": 1671.25, "total_tokens": 4184704}
2501
+ {"current_steps": 12410, "total_steps": 12940, "loss": 0.4596, "lr": 2.5603688244346947e-07, "epoch": 9.590417310664606, "percentage": 95.9, "elapsed_time": "0:41:44", "remaining_time": "0:01:46", "throughput": 1671.3, "total_tokens": 4186304}
2502
+ {"current_steps": 12415, "total_steps": 12940, "loss": 0.6847, "lr": 2.512458517185068e-07, "epoch": 9.594281298299846, "percentage": 95.94, "elapsed_time": "0:41:45", "remaining_time": "0:01:45", "throughput": 1671.33, "total_tokens": 4187840}
2503
+ {"current_steps": 12420, "total_steps": 12940, "loss": 0.3525, "lr": 2.4649984451482753e-07, "epoch": 9.598145285935084, "percentage": 95.98, "elapsed_time": "0:41:46", "remaining_time": "0:01:44", "throughput": 1671.36, "total_tokens": 4189376}
2504
+ {"current_steps": 12425, "total_steps": 12940, "loss": 0.2785, "lr": 2.417988694664836e-07, "epoch": 9.602009273570324, "percentage": 96.02, "elapsed_time": "0:41:47", "remaining_time": "0:01:43", "throughput": 1671.37, "total_tokens": 4190816}
2505
+ {"current_steps": 12430, "total_steps": 12940, "loss": 0.3433, "lr": 2.3714293512560048e-07, "epoch": 9.605873261205565, "percentage": 96.06, "elapsed_time": "0:41:48", "remaining_time": "0:01:42", "throughput": 1671.52, "total_tokens": 4192800}
2506
+ {"current_steps": 12435, "total_steps": 12940, "loss": 0.351, "lr": 2.3253204996236122e-07, "epoch": 9.609737248840803, "percentage": 96.1, "elapsed_time": "0:41:49", "remaining_time": "0:01:41", "throughput": 1671.55, "total_tokens": 4194304}
2507
+ {"current_steps": 12440, "total_steps": 12940, "loss": 0.6185, "lr": 2.279662223650031e-07, "epoch": 9.613601236476043, "percentage": 96.14, "elapsed_time": "0:41:50", "remaining_time": "0:01:40", "throughput": 1671.65, "total_tokens": 4196128}
2508
+ {"current_steps": 12445, "total_steps": 12940, "loss": 0.4445, "lr": 2.23445460639779e-07, "epoch": 9.617465224111283, "percentage": 96.17, "elapsed_time": "0:41:51", "remaining_time": "0:01:39", "throughput": 1671.71, "total_tokens": 4197760}
2509
+ {"current_steps": 12450, "total_steps": 12940, "loss": 0.4241, "lr": 2.1896977301097686e-07, "epoch": 9.621329211746522, "percentage": 96.21, "elapsed_time": "0:41:51", "remaining_time": "0:01:38", "throughput": 1671.8, "total_tokens": 4199520}
2510
+ {"current_steps": 12455, "total_steps": 12940, "loss": 0.3857, "lr": 2.145391676208669e-07, "epoch": 9.625193199381762, "percentage": 96.25, "elapsed_time": "0:41:52", "remaining_time": "0:01:37", "throughput": 1671.88, "total_tokens": 4201152}
2511
+ {"current_steps": 12460, "total_steps": 12940, "loss": 0.4717, "lr": 2.1015365252971265e-07, "epoch": 9.629057187017002, "percentage": 96.29, "elapsed_time": "0:41:53", "remaining_time": "0:01:36", "throughput": 1671.98, "total_tokens": 4202944}
2512
+ {"current_steps": 12465, "total_steps": 12940, "loss": 0.5431, "lr": 2.0581323571574885e-07, "epoch": 9.63292117465224, "percentage": 96.33, "elapsed_time": "0:41:54", "remaining_time": "0:01:35", "throughput": 1672.05, "total_tokens": 4204640}
2513
+ {"current_steps": 12470, "total_steps": 12940, "loss": 0.3901, "lr": 2.0151792507516754e-07, "epoch": 9.636785162287481, "percentage": 96.37, "elapsed_time": "0:41:55", "remaining_time": "0:01:34", "throughput": 1672.11, "total_tokens": 4206304}
2514
+ {"current_steps": 12475, "total_steps": 12940, "loss": 0.3175, "lr": 1.9726772842209307e-07, "epoch": 9.64064914992272, "percentage": 96.41, "elapsed_time": "0:41:56", "remaining_time": "0:01:33", "throughput": 1672.18, "total_tokens": 4207968}
2515
+ {"current_steps": 12480, "total_steps": 12940, "loss": 0.3083, "lr": 1.9306265348858766e-07, "epoch": 9.64451313755796, "percentage": 96.45, "elapsed_time": "0:41:57", "remaining_time": "0:01:32", "throughput": 1672.23, "total_tokens": 4209568}
2516
+ {"current_steps": 12485, "total_steps": 12940, "loss": 0.4579, "lr": 1.889027079246236e-07, "epoch": 9.6483771251932, "percentage": 96.48, "elapsed_time": "0:41:58", "remaining_time": "0:01:31", "throughput": 1672.29, "total_tokens": 4211200}
2517
+ {"current_steps": 12490, "total_steps": 12940, "loss": 0.4784, "lr": 1.8478789929807505e-07, "epoch": 9.652241112828438, "percentage": 96.52, "elapsed_time": "0:41:59", "remaining_time": "0:01:30", "throughput": 1672.34, "total_tokens": 4212800}
2518
+ {"current_steps": 12495, "total_steps": 12940, "loss": 0.3265, "lr": 1.8071823509469288e-07, "epoch": 9.656105100463678, "percentage": 96.56, "elapsed_time": "0:41:59", "remaining_time": "0:01:29", "throughput": 1672.35, "total_tokens": 4214240}
2519
+ {"current_steps": 12500, "total_steps": 12940, "loss": 0.2973, "lr": 1.7669372271811313e-07, "epoch": 9.659969088098919, "percentage": 96.6, "elapsed_time": "0:42:00", "remaining_time": "0:01:28", "throughput": 1672.39, "total_tokens": 4215808}
2520
+ {"current_steps": 12505, "total_steps": 12940, "loss": 0.2802, "lr": 1.7271436948982368e-07, "epoch": 9.663833075734157, "percentage": 96.64, "elapsed_time": "0:42:01", "remaining_time": "0:01:27", "throughput": 1672.41, "total_tokens": 4217280}
2521
+ {"current_steps": 12510, "total_steps": 12940, "loss": 0.4277, "lr": 1.6878018264915584e-07, "epoch": 9.667697063369397, "percentage": 96.68, "elapsed_time": "0:42:02", "remaining_time": "0:01:26", "throughput": 1672.46, "total_tokens": 4218944}
2522
+ {"current_steps": 12515, "total_steps": 12940, "loss": 0.3073, "lr": 1.6489116935327343e-07, "epoch": 9.671561051004637, "percentage": 96.72, "elapsed_time": "0:42:03", "remaining_time": "0:01:25", "throughput": 1672.53, "total_tokens": 4220640}
2523
+ {"current_steps": 12520, "total_steps": 12940, "loss": 0.4403, "lr": 1.6104733667716975e-07, "epoch": 9.675425038639876, "percentage": 96.75, "elapsed_time": "0:42:04", "remaining_time": "0:01:24", "throughput": 1672.64, "total_tokens": 4222464}
2524
+ {"current_steps": 12525, "total_steps": 12940, "loss": 0.3775, "lr": 1.57248691613629e-07, "epoch": 9.679289026275116, "percentage": 96.79, "elapsed_time": "0:42:05", "remaining_time": "0:01:23", "throughput": 1672.77, "total_tokens": 4224352}
2525
+ {"current_steps": 12530, "total_steps": 12940, "loss": 0.5308, "lr": 1.5349524107323988e-07, "epoch": 9.683153013910356, "percentage": 96.83, "elapsed_time": "0:42:06", "remaining_time": "0:01:22", "throughput": 1672.89, "total_tokens": 4226304}
2526
+ {"current_steps": 12535, "total_steps": 12940, "loss": 0.3465, "lr": 1.4978699188437083e-07, "epoch": 9.687017001545595, "percentage": 96.87, "elapsed_time": "0:42:07", "remaining_time": "0:01:21", "throughput": 1672.93, "total_tokens": 4227872}
2527
+ {"current_steps": 12540, "total_steps": 12940, "loss": 0.3727, "lr": 1.4612395079315334e-07, "epoch": 9.690880989180835, "percentage": 96.91, "elapsed_time": "0:42:08", "remaining_time": "0:01:20", "throughput": 1672.96, "total_tokens": 4229472}
2528
+ {"current_steps": 12545, "total_steps": 12940, "loss": 0.582, "lr": 1.4250612446347622e-07, "epoch": 9.694744976816073, "percentage": 96.95, "elapsed_time": "0:42:09", "remaining_time": "0:01:19", "throughput": 1673.09, "total_tokens": 4231360}
2529
+ {"current_steps": 12550, "total_steps": 12940, "loss": 0.2928, "lr": 1.389335194769803e-07, "epoch": 9.698608964451314, "percentage": 96.99, "elapsed_time": "0:42:09", "remaining_time": "0:01:18", "throughput": 1673.18, "total_tokens": 4233056}
2530
+ {"current_steps": 12555, "total_steps": 12940, "loss": 0.3887, "lr": 1.354061423330333e-07, "epoch": 9.702472952086554, "percentage": 97.02, "elapsed_time": "0:42:10", "remaining_time": "0:01:17", "throughput": 1673.24, "total_tokens": 4234784}
2531
+ {"current_steps": 12560, "total_steps": 12940, "loss": 0.4355, "lr": 1.3192399944872148e-07, "epoch": 9.706336939721792, "percentage": 97.06, "elapsed_time": "0:42:11", "remaining_time": "0:01:16", "throughput": 1673.28, "total_tokens": 4236416}
2532
+ {"current_steps": 12565, "total_steps": 12940, "loss": 0.4145, "lr": 1.2848709715884145e-07, "epoch": 9.710200927357032, "percentage": 97.1, "elapsed_time": "0:42:12", "remaining_time": "0:01:15", "throughput": 1673.27, "total_tokens": 4237856}
2533
+ {"current_steps": 12570, "total_steps": 12940, "loss": 0.4684, "lr": 1.2509544171588893e-07, "epoch": 9.714064914992273, "percentage": 97.14, "elapsed_time": "0:42:13", "remaining_time": "0:01:14", "throughput": 1673.34, "total_tokens": 4239616}
2534
+ {"current_steps": 12575, "total_steps": 12940, "loss": 0.3258, "lr": 1.217490392900422e-07, "epoch": 9.717928902627511, "percentage": 97.18, "elapsed_time": "0:42:14", "remaining_time": "0:01:13", "throughput": 1673.44, "total_tokens": 4241440}
2535
+ {"current_steps": 12580, "total_steps": 12940, "loss": 0.2769, "lr": 1.1844789596915651e-07, "epoch": 9.721792890262751, "percentage": 97.22, "elapsed_time": "0:42:15", "remaining_time": "0:01:12", "throughput": 1673.49, "total_tokens": 4243136}
2536
+ {"current_steps": 12585, "total_steps": 12940, "loss": 0.4726, "lr": 1.1519201775875288e-07, "epoch": 9.725656877897991, "percentage": 97.26, "elapsed_time": "0:42:16", "remaining_time": "0:01:11", "throughput": 1673.51, "total_tokens": 4244608}
2537
+ {"current_steps": 12590, "total_steps": 12940, "loss": 0.378, "lr": 1.1198141058199885e-07, "epoch": 9.72952086553323, "percentage": 97.3, "elapsed_time": "0:42:17", "remaining_time": "0:01:10", "throughput": 1673.49, "total_tokens": 4245984}
2538
+ {"current_steps": 12595, "total_steps": 12940, "loss": 0.4027, "lr": 1.0881608027971114e-07, "epoch": 9.73338485316847, "percentage": 97.33, "elapsed_time": "0:42:18", "remaining_time": "0:01:09", "throughput": 1673.59, "total_tokens": 4247776}
2539
+ {"current_steps": 12600, "total_steps": 12940, "loss": 0.4663, "lr": 1.056960326103279e-07, "epoch": 9.737248840803709, "percentage": 97.37, "elapsed_time": "0:42:18", "remaining_time": "0:01:08", "throughput": 1673.66, "total_tokens": 4249376}
2540
+ {"current_steps": 12605, "total_steps": 12940, "loss": 0.3626, "lr": 1.0262127324991988e-07, "epoch": 9.741112828438949, "percentage": 97.41, "elapsed_time": "0:42:19", "remaining_time": "0:01:07", "throughput": 1673.7, "total_tokens": 4250944}
2541
+ {"current_steps": 12610, "total_steps": 12940, "loss": 0.3471, "lr": 9.959180779215704e-08, "epoch": 9.744976816074189, "percentage": 97.45, "elapsed_time": "0:42:20", "remaining_time": "0:01:06", "throughput": 1673.85, "total_tokens": 4253056}
2542
+ {"current_steps": 12615, "total_steps": 12940, "loss": 0.3606, "lr": 9.660764174831971e-08, "epoch": 9.748840803709427, "percentage": 97.49, "elapsed_time": "0:42:21", "remaining_time": "0:01:05", "throughput": 1673.89, "total_tokens": 4254624}
2543
+ {"current_steps": 12620, "total_steps": 12940, "loss": 0.3397, "lr": 9.366878054727079e-08, "epoch": 9.752704791344668, "percentage": 97.53, "elapsed_time": "0:42:22", "remaining_time": "0:01:04", "throughput": 1673.96, "total_tokens": 4256352}
2544
+ {"current_steps": 12625, "total_steps": 12940, "loss": 0.3416, "lr": 9.077522953545859e-08, "epoch": 9.756568778979908, "percentage": 97.57, "elapsed_time": "0:42:23", "remaining_time": "0:01:03", "throughput": 1674.03, "total_tokens": 4258112}
2545
+ {"current_steps": 12630, "total_steps": 12940, "loss": 0.3426, "lr": 8.79269939768973e-08, "epoch": 9.760432766615146, "percentage": 97.6, "elapsed_time": "0:42:24", "remaining_time": "0:01:02", "throughput": 1674.1, "total_tokens": 4259776}
2546
+ {"current_steps": 12635, "total_steps": 12940, "loss": 0.2975, "lr": 8.512407905316432e-08, "epoch": 9.764296754250386, "percentage": 97.64, "elapsed_time": "0:42:25", "remaining_time": "0:01:01", "throughput": 1674.19, "total_tokens": 4261568}
2547
+ {"current_steps": 12640, "total_steps": 12940, "loss": 0.6986, "lr": 8.23664898633919e-08, "epoch": 9.768160741885627, "percentage": 97.68, "elapsed_time": "0:42:26", "remaining_time": "0:01:00", "throughput": 1674.23, "total_tokens": 4263200}
2548
+ {"current_steps": 12645, "total_steps": 12940, "loss": 0.5382, "lr": 7.965423142425044e-08, "epoch": 9.772024729520865, "percentage": 97.72, "elapsed_time": "0:42:27", "remaining_time": "0:00:59", "throughput": 1674.28, "total_tokens": 4264800}
2549
+ {"current_steps": 12650, "total_steps": 12940, "loss": 0.6548, "lr": 7.698730866994575e-08, "epoch": 9.775888717156105, "percentage": 97.76, "elapsed_time": "0:42:28", "remaining_time": "0:00:58", "throughput": 1674.35, "total_tokens": 4266464}
2550
+ {"current_steps": 12655, "total_steps": 12940, "loss": 0.582, "lr": 7.436572645220519e-08, "epoch": 9.779752704791346, "percentage": 97.8, "elapsed_time": "0:42:29", "remaining_time": "0:00:57", "throughput": 1674.38, "total_tokens": 4268000}
2551
+ {"current_steps": 12660, "total_steps": 12940, "loss": 0.4289, "lr": 7.178948954027209e-08, "epoch": 9.783616692426584, "percentage": 97.84, "elapsed_time": "0:42:29", "remaining_time": "0:00:56", "throughput": 1674.44, "total_tokens": 4269632}
2552
+ {"current_steps": 12665, "total_steps": 12940, "loss": 0.3258, "lr": 6.925860262090301e-08, "epoch": 9.787480680061824, "percentage": 97.87, "elapsed_time": "0:42:30", "remaining_time": "0:00:55", "throughput": 1674.58, "total_tokens": 4271616}
2553
+ {"current_steps": 12670, "total_steps": 12940, "loss": 0.3312, "lr": 6.677307029834267e-08, "epoch": 9.791344667697063, "percentage": 97.91, "elapsed_time": "0:42:31", "remaining_time": "0:00:54", "throughput": 1674.65, "total_tokens": 4273280}
2554
+ {"current_steps": 12675, "total_steps": 12940, "loss": 0.3729, "lr": 6.43328970943352e-08, "epoch": 9.795208655332303, "percentage": 97.95, "elapsed_time": "0:42:32", "remaining_time": "0:00:53", "throughput": 1674.78, "total_tokens": 4275136}
2555
+ {"current_steps": 12680, "total_steps": 12940, "loss": 0.3585, "lr": 6.193808744809626e-08, "epoch": 9.799072642967543, "percentage": 97.99, "elapsed_time": "0:42:33", "remaining_time": "0:00:52", "throughput": 1674.83, "total_tokens": 4276736}
2556
+ {"current_steps": 12685, "total_steps": 12940, "loss": 0.3921, "lr": 5.9588645716324164e-08, "epoch": 9.802936630602781, "percentage": 98.03, "elapsed_time": "0:42:34", "remaining_time": "0:00:51", "throughput": 1674.83, "total_tokens": 4278144}
2557
+ {"current_steps": 12690, "total_steps": 12940, "loss": 0.3523, "lr": 5.728457617317773e-08, "epoch": 9.806800618238022, "percentage": 98.07, "elapsed_time": "0:42:35", "remaining_time": "0:00:50", "throughput": 1674.91, "total_tokens": 4279904}
2558
+ {"current_steps": 12695, "total_steps": 12940, "loss": 0.3226, "lr": 5.502588301027345e-08, "epoch": 9.810664605873262, "percentage": 98.11, "elapsed_time": "0:42:36", "remaining_time": "0:00:49", "throughput": 1674.98, "total_tokens": 4281600}
2559
+ {"current_steps": 12700, "total_steps": 12940, "loss": 0.4694, "lr": 5.281257033668552e-08, "epoch": 9.8145285935085, "percentage": 98.15, "elapsed_time": "0:42:37", "remaining_time": "0:00:48", "throughput": 1675.01, "total_tokens": 4283168}
2560
+ {"current_steps": 12705, "total_steps": 12940, "loss": 0.5221, "lr": 5.064464217891807e-08, "epoch": 9.81839258114374, "percentage": 98.18, "elapsed_time": "0:42:37", "remaining_time": "0:00:47", "throughput": 1674.99, "total_tokens": 4284512}
2561
+ {"current_steps": 12710, "total_steps": 12940, "loss": 0.3996, "lr": 4.852210248091904e-08, "epoch": 9.82225656877898, "percentage": 98.22, "elapsed_time": "0:42:38", "remaining_time": "0:00:46", "throughput": 1675.12, "total_tokens": 4286400}
2562
+ {"current_steps": 12715, "total_steps": 12940, "loss": 0.4497, "lr": 4.644495510406632e-08, "epoch": 9.826120556414219, "percentage": 98.26, "elapsed_time": "0:42:39", "remaining_time": "0:00:45", "throughput": 1675.24, "total_tokens": 4288320}
2563
+ {"current_steps": 12720, "total_steps": 12940, "loss": 0.3119, "lr": 4.441320382715108e-08, "epoch": 9.82998454404946, "percentage": 98.3, "elapsed_time": "0:42:40", "remaining_time": "0:00:44", "throughput": 1675.33, "total_tokens": 4290080}
2564
+ {"current_steps": 12725, "total_steps": 12940, "loss": 0.3016, "lr": 4.242685234638888e-08, "epoch": 9.833848531684698, "percentage": 98.34, "elapsed_time": "0:42:41", "remaining_time": "0:00:43", "throughput": 1675.4, "total_tokens": 4291808}
2565
+ {"current_steps": 12730, "total_steps": 12940, "loss": 0.4407, "lr": 4.0485904275391897e-08, "epoch": 9.837712519319938, "percentage": 98.38, "elapsed_time": "0:42:42", "remaining_time": "0:00:42", "throughput": 1675.47, "total_tokens": 4293504}
2566
+ {"current_steps": 12735, "total_steps": 12940, "loss": 0.353, "lr": 3.859036314518283e-08, "epoch": 9.841576506955178, "percentage": 98.42, "elapsed_time": "0:42:43", "remaining_time": "0:00:41", "throughput": 1675.54, "total_tokens": 4295232}
2567
+ {"current_steps": 12740, "total_steps": 12940, "loss": 0.3825, "lr": 3.6740232404175454e-08, "epoch": 9.845440494590417, "percentage": 98.45, "elapsed_time": "0:42:44", "remaining_time": "0:00:40", "throughput": 1675.61, "total_tokens": 4296928}
2568
+ {"current_steps": 12745, "total_steps": 12940, "loss": 0.3618, "lr": 3.4935515418169085e-08, "epoch": 9.849304482225657, "percentage": 98.49, "elapsed_time": "0:42:45", "remaining_time": "0:00:39", "throughput": 1675.73, "total_tokens": 4298848}
2569
+ {"current_steps": 12750, "total_steps": 12940, "loss": 0.4566, "lr": 3.3176215470348546e-08, "epoch": 9.853168469860897, "percentage": 98.53, "elapsed_time": "0:42:46", "remaining_time": "0:00:38", "throughput": 1675.87, "total_tokens": 4300768}
2570
+ {"current_steps": 12755, "total_steps": 12940, "loss": 0.3259, "lr": 3.146233576127311e-08, "epoch": 9.857032457496135, "percentage": 98.57, "elapsed_time": "0:42:47", "remaining_time": "0:00:37", "throughput": 1675.91, "total_tokens": 4302304}
2571
+ {"current_steps": 12760, "total_steps": 12940, "loss": 0.3904, "lr": 2.979387940887646e-08, "epoch": 9.860896445131376, "percentage": 98.61, "elapsed_time": "0:42:48", "remaining_time": "0:00:36", "throughput": 1675.97, "total_tokens": 4303936}
2572
+ {"current_steps": 12765, "total_steps": 12940, "loss": 0.3409, "lr": 2.817084944845283e-08, "epoch": 9.864760432766616, "percentage": 98.65, "elapsed_time": "0:42:48", "remaining_time": "0:00:35", "throughput": 1676.02, "total_tokens": 4305536}
2573
+ {"current_steps": 12770, "total_steps": 12940, "loss": 0.388, "lr": 2.6593248832654237e-08, "epoch": 9.868624420401854, "percentage": 98.69, "elapsed_time": "0:42:49", "remaining_time": "0:00:34", "throughput": 1676.06, "total_tokens": 4307136}
2574
+ {"current_steps": 12775, "total_steps": 12940, "loss": 0.4761, "lr": 2.506108043149047e-08, "epoch": 9.872488408037094, "percentage": 98.72, "elapsed_time": "0:42:50", "remaining_time": "0:00:33", "throughput": 1676.2, "total_tokens": 4309088}
2575
+ {"current_steps": 12780, "total_steps": 12940, "loss": 0.3983, "lr": 2.3574347032320754e-08, "epoch": 9.876352395672335, "percentage": 98.76, "elapsed_time": "0:42:51", "remaining_time": "0:00:32", "throughput": 1676.21, "total_tokens": 4310592}
2576
+ {"current_steps": 12785, "total_steps": 12940, "loss": 0.347, "lr": 2.2133051339842668e-08, "epoch": 9.880216383307573, "percentage": 98.8, "elapsed_time": "0:42:52", "remaining_time": "0:00:31", "throughput": 1676.26, "total_tokens": 4312256}
2577
+ {"current_steps": 12790, "total_steps": 12940, "loss": 0.329, "lr": 2.0737195976100464e-08, "epoch": 9.884080370942813, "percentage": 98.84, "elapsed_time": "0:42:53", "remaining_time": "0:00:30", "throughput": 1676.3, "total_tokens": 4313888}
2578
+ {"current_steps": 12795, "total_steps": 12940, "loss": 0.4648, "lr": 1.938678348046008e-08, "epoch": 9.887944358578052, "percentage": 98.88, "elapsed_time": "0:42:54", "remaining_time": "0:00:29", "throughput": 1676.36, "total_tokens": 4315552}
2579
+ {"current_steps": 12800, "total_steps": 12940, "loss": 0.2715, "lr": 1.808181630963135e-08, "epoch": 9.891808346213292, "percentage": 98.92, "elapsed_time": "0:42:55", "remaining_time": "0:00:28", "throughput": 1676.45, "total_tokens": 4317344}
2580
+ {"current_steps": 12805, "total_steps": 12940, "loss": 0.4197, "lr": 1.6822296837637474e-08, "epoch": 9.895672333848532, "percentage": 98.96, "elapsed_time": "0:42:56", "remaining_time": "0:00:27", "throughput": 1676.52, "total_tokens": 4319008}
2581
+ {"current_steps": 12810, "total_steps": 12940, "loss": 0.3993, "lr": 1.5608227355826123e-08, "epoch": 9.89953632148377, "percentage": 99.0, "elapsed_time": "0:42:57", "remaining_time": "0:00:26", "throughput": 1676.58, "total_tokens": 4320640}
2582
+ {"current_steps": 12815, "total_steps": 12940, "loss": 0.3689, "lr": 1.4439610072863874e-08, "epoch": 9.90340030911901, "percentage": 99.03, "elapsed_time": "0:42:58", "remaining_time": "0:00:25", "throughput": 1676.74, "total_tokens": 4322752}
2583
+ {"current_steps": 12820, "total_steps": 12940, "loss": 0.2754, "lr": 1.3316447114725129e-08, "epoch": 9.907264296754251, "percentage": 99.07, "elapsed_time": "0:42:58", "remaining_time": "0:00:24", "throughput": 1676.76, "total_tokens": 4324288}
2584
+ {"current_steps": 12825, "total_steps": 12940, "loss": 0.2842, "lr": 1.2238740524692094e-08, "epoch": 9.91112828438949, "percentage": 99.11, "elapsed_time": "0:42:59", "remaining_time": "0:00:23", "throughput": 1676.76, "total_tokens": 4325728}
2585
+ {"current_steps": 12830, "total_steps": 12940, "loss": 0.4791, "lr": 1.1206492263360347e-08, "epoch": 9.91499227202473, "percentage": 99.15, "elapsed_time": "0:43:00", "remaining_time": "0:00:22", "throughput": 1676.82, "total_tokens": 4327424}
2586
+ {"current_steps": 12835, "total_steps": 12940, "loss": 0.3733, "lr": 1.0219704208616621e-08, "epoch": 9.91885625965997, "percentage": 99.19, "elapsed_time": "0:43:01", "remaining_time": "0:00:21", "throughput": 1676.83, "total_tokens": 4328928}
2587
+ {"current_steps": 12840, "total_steps": 12940, "loss": 0.4155, "lr": 9.278378155647138e-09, "epoch": 9.922720247295208, "percentage": 99.23, "elapsed_time": "0:43:02", "remaining_time": "0:00:20", "throughput": 1676.98, "total_tokens": 4330944}
2588
+ {"current_steps": 12845, "total_steps": 12940, "loss": 0.2891, "lr": 8.382515816937609e-09, "epoch": 9.926584234930449, "percentage": 99.27, "elapsed_time": "0:43:03", "remaining_time": "0:00:19", "throughput": 1677.11, "total_tokens": 4332864}
2589
+ {"current_steps": 12850, "total_steps": 12940, "loss": 0.3042, "lr": 7.532118822262124e-09, "epoch": 9.930448222565687, "percentage": 99.3, "elapsed_time": "0:43:04", "remaining_time": "0:00:18", "throughput": 1677.14, "total_tokens": 4334432}
2590
+ {"current_steps": 12855, "total_steps": 12940, "loss": 0.3138, "lr": 6.727188718683164e-09, "epoch": 9.934312210200927, "percentage": 99.34, "elapsed_time": "0:43:05", "remaining_time": "0:00:17", "throughput": 1677.21, "total_tokens": 4336096}
2591
+ {"current_steps": 12860, "total_steps": 12940, "loss": 0.3776, "lr": 5.967726970548815e-09, "epoch": 9.938176197836167, "percentage": 99.38, "elapsed_time": "0:43:06", "remaining_time": "0:00:16", "throughput": 1677.25, "total_tokens": 4337728}
2592
+ {"current_steps": 12865, "total_steps": 12940, "loss": 0.3127, "lr": 5.2537349594872224e-09, "epoch": 9.942040185471406, "percentage": 99.42, "elapsed_time": "0:43:07", "remaining_time": "0:00:15", "throughput": 1677.3, "total_tokens": 4339360}
2593
+ {"current_steps": 12870, "total_steps": 12940, "loss": 0.3483, "lr": 4.58521398441214e-09, "epoch": 9.945904173106646, "percentage": 99.46, "elapsed_time": "0:43:08", "remaining_time": "0:00:14", "throughput": 1677.41, "total_tokens": 4341184}
2594
+ {"current_steps": 12875, "total_steps": 12940, "loss": 0.5156, "lr": 3.9621652615118295e-09, "epoch": 9.949768160741886, "percentage": 99.5, "elapsed_time": "0:43:08", "remaining_time": "0:00:13", "throughput": 1677.46, "total_tokens": 4342848}
2595
+ {"current_steps": 12880, "total_steps": 12940, "loss": 0.4649, "lr": 3.3845899242546108e-09, "epoch": 9.953632148377125, "percentage": 99.54, "elapsed_time": "0:43:09", "remaining_time": "0:00:12", "throughput": 1677.56, "total_tokens": 4344672}
2596
+ {"current_steps": 12885, "total_steps": 12940, "loss": 0.3251, "lr": 2.8524890233722066e-09, "epoch": 9.957496136012365, "percentage": 99.57, "elapsed_time": "0:43:10", "remaining_time": "0:00:11", "throughput": 1677.59, "total_tokens": 4346272}
2597
+ {"current_steps": 12890, "total_steps": 12940, "loss": 0.5271, "lr": 2.3658635268819507e-09, "epoch": 9.961360123647605, "percentage": 99.61, "elapsed_time": "0:43:11", "remaining_time": "0:00:10", "throughput": 1677.71, "total_tokens": 4348224}
2598
+ {"current_steps": 12895, "total_steps": 12940, "loss": 0.4358, "lr": 1.9247143200618043e-09, "epoch": 9.965224111282843, "percentage": 99.65, "elapsed_time": "0:43:12", "remaining_time": "0:00:09", "throughput": 1677.79, "total_tokens": 4349984}
2599
+ {"current_steps": 12900, "total_steps": 12940, "loss": 0.3581, "lr": 1.529042205458686e-09, "epoch": 9.969088098918084, "percentage": 99.69, "elapsed_time": "0:43:13", "remaining_time": "0:00:08", "throughput": 1677.81, "total_tokens": 4351488}
2600
+ {"current_steps": 12905, "total_steps": 12940, "loss": 0.3865, "lr": 1.1788479028940203e-09, "epoch": 9.972952086553324, "percentage": 99.73, "elapsed_time": "0:43:14", "remaining_time": "0:00:07", "throughput": 1677.85, "total_tokens": 4353024}
2601
+ {"current_steps": 12910, "total_steps": 12940, "loss": 0.5942, "lr": 8.741320494443095e-10, "epoch": 9.976816074188562, "percentage": 99.77, "elapsed_time": "0:43:15", "remaining_time": "0:00:06", "throughput": 1677.87, "total_tokens": 4354560}
2602
+ {"current_steps": 12915, "total_steps": 12940, "loss": 0.3823, "lr": 6.148951994577878e-10, "epoch": 9.980680061823803, "percentage": 99.81, "elapsed_time": "0:43:16", "remaining_time": "0:00:05", "throughput": 1677.89, "total_tokens": 4356128}
2603
+ {"current_steps": 12920, "total_steps": 12940, "loss": 0.4517, "lr": 4.0113782454609395e-10, "epoch": 9.984544049459041, "percentage": 99.85, "elapsed_time": "0:43:17", "remaining_time": "0:00:04", "throughput": 1677.99, "total_tokens": 4357888}
2604
+ {"current_steps": 12925, "total_steps": 12940, "loss": 0.3333, "lr": 2.3286031357871994e-10, "epoch": 9.988408037094281, "percentage": 99.88, "elapsed_time": "0:43:17", "remaining_time": "0:00:03", "throughput": 1678.02, "total_tokens": 4359456}
2605
+ {"current_steps": 12930, "total_steps": 12940, "loss": 0.4075, "lr": 1.1006297269411381e-10, "epoch": 9.992272024729521, "percentage": 99.92, "elapsed_time": "0:43:18", "remaining_time": "0:00:02", "throughput": 1678.08, "total_tokens": 4361152}
2606
+ {"current_steps": 12935, "total_steps": 12940, "loss": 0.734, "lr": 3.274602528302584e-11, "epoch": 9.99613601236476, "percentage": 99.96, "elapsed_time": "0:43:19", "remaining_time": "0:00:01", "throughput": 1678.05, "total_tokens": 4362496}
2607
+ {"current_steps": 12940, "total_steps": 12940, "loss": 0.4486, "lr": 9.096120051621526e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:43:20", "remaining_time": "0:00:00", "throughput": 1678.1, "total_tokens": 4364240}
2608
+ {"current_steps": 12940, "total_steps": 12940, "eval_loss": 0.5031372904777527, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:43:32", "remaining_time": "0:00:00", "throughput": 1670.67, "total_tokens": 4364240}
2609
+ {"current_steps": 12940, "total_steps": 12940, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:43:33", "remaining_time": "0:00:00", "throughput": 1669.72, "total_tokens": 4364240}