rbelanec commited on
Commit
ddd2f16
·
verified ·
1 Parent(s): 9d02d10

Training in progress, step 1600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +29 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6647c29501aa5271c2400fef9e1df270eecec3326775c4ada92430b8e2b3dc57
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf58ed33c0af14d1a87a2e6b18383d3eaf2c93297f233349cb078c5d46a690b1
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -300,3 +300,32 @@
300
  {"current_steps": 1455, "total_steps": 1600, "loss": 0.0002, "lr": 2.5150436899061494e-07, "epoch": 18.1875, "percentage": 90.94, "elapsed_time": "0:02:31", "remaining_time": "0:00:15", "throughput": 3000.23, "total_tokens": 455936}
301
  {"current_steps": 1460, "total_steps": 1600, "loss": 0.0002, "lr": 2.3470678346851517e-07, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3001.92, "total_tokens": 457536}
302
  {"current_steps": 1465, "total_steps": 1600, "loss": 0.0001, "lr": 2.1847622018482283e-07, "epoch": 18.3125, "percentage": 91.56, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3003.26, "total_tokens": 459072}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  {"current_steps": 1455, "total_steps": 1600, "loss": 0.0002, "lr": 2.5150436899061494e-07, "epoch": 18.1875, "percentage": 90.94, "elapsed_time": "0:02:31", "remaining_time": "0:00:15", "throughput": 3000.23, "total_tokens": 455936}
301
  {"current_steps": 1460, "total_steps": 1600, "loss": 0.0002, "lr": 2.3470678346851517e-07, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3001.92, "total_tokens": 457536}
302
  {"current_steps": 1465, "total_steps": 1600, "loss": 0.0001, "lr": 2.1847622018482283e-07, "epoch": 18.3125, "percentage": 91.56, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3003.26, "total_tokens": 459072}
303
+ {"current_steps": 1470, "total_steps": 1600, "loss": 0.0001, "lr": 2.028146104142581e-07, "epoch": 18.375, "percentage": 91.88, "elapsed_time": "0:02:33", "remaining_time": "0:00:13", "throughput": 3004.72, "total_tokens": 460640}
304
+ {"current_steps": 1475, "total_steps": 1600, "loss": 0.0002, "lr": 1.8772381773176417e-07, "epoch": 18.4375, "percentage": 92.19, "elapsed_time": "0:02:33", "remaining_time": "0:00:13", "throughput": 3006.21, "total_tokens": 462208}
305
+ {"current_steps": 1480, "total_steps": 1600, "loss": 0.0001, "lr": 1.7320563779075595e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:02:34", "remaining_time": "0:00:12", "throughput": 3007.54, "total_tokens": 463744}
306
+ {"current_steps": 1485, "total_steps": 1600, "loss": 0.0002, "lr": 1.5926179810946185e-07, "epoch": 18.5625, "percentage": 92.81, "elapsed_time": "0:02:34", "remaining_time": "0:00:11", "throughput": 3009.21, "total_tokens": 465344}
307
+ {"current_steps": 1490, "total_steps": 1600, "loss": 0.0001, "lr": 1.4589395786535954e-07, "epoch": 18.625, "percentage": 93.12, "elapsed_time": "0:02:35", "remaining_time": "0:00:11", "throughput": 3010.51, "total_tokens": 466880}
308
+ {"current_steps": 1495, "total_steps": 1600, "loss": 0.0002, "lr": 1.331037076977576e-07, "epoch": 18.6875, "percentage": 93.44, "elapsed_time": "0:02:35", "remaining_time": "0:00:10", "throughput": 3011.99, "total_tokens": 468448}
309
+ {"current_steps": 1500, "total_steps": 1600, "loss": 0.0002, "lr": 1.2089256951851923e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:02:35", "remaining_time": "0:00:10", "throughput": 3013.63, "total_tokens": 470048}
310
+ {"current_steps": 1505, "total_steps": 1600, "loss": 0.0001, "lr": 1.0926199633097156e-07, "epoch": 18.8125, "percentage": 94.06, "elapsed_time": "0:02:36", "remaining_time": "0:00:09", "throughput": 3014.93, "total_tokens": 471584}
311
+ {"current_steps": 1510, "total_steps": 1600, "loss": 0.0002, "lr": 9.821337205701664e-08, "epoch": 18.875, "percentage": 94.38, "elapsed_time": "0:02:36", "remaining_time": "0:00:09", "throughput": 3016.37, "total_tokens": 473152}
312
+ {"current_steps": 1515, "total_steps": 1600, "loss": 0.0004, "lr": 8.77480113724516e-08, "epoch": 18.9375, "percentage": 94.69, "elapsed_time": "0:02:37", "remaining_time": "0:00:08", "throughput": 3017.8, "total_tokens": 474720}
313
+ {"current_steps": 1520, "total_steps": 1600, "loss": 0.0001, "lr": 7.786715955054202e-08, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:02:37", "remaining_time": "0:00:08", "throughput": 3018.76, "total_tokens": 476320}
314
+ {"current_steps": 1525, "total_steps": 1600, "loss": 0.0002, "lr": 6.857199231384282e-08, "epoch": 19.0625, "percentage": 95.31, "elapsed_time": "0:02:38", "remaining_time": "0:00:07", "throughput": 3018.56, "total_tokens": 477824}
315
+ {"current_steps": 1530, "total_steps": 1600, "loss": 0.0002, "lr": 5.986361569430166e-08, "epoch": 19.125, "percentage": 95.62, "elapsed_time": "0:02:38", "remaining_time": "0:00:07", "throughput": 3019.8, "total_tokens": 479360}
316
+ {"current_steps": 1535, "total_steps": 1600, "loss": 0.0003, "lr": 5.174306590164879e-08, "epoch": 19.1875, "percentage": 95.94, "elapsed_time": "0:02:39", "remaining_time": "0:00:06", "throughput": 3021.04, "total_tokens": 480896}
317
+ {"current_steps": 1540, "total_steps": 1600, "loss": 0.0002, "lr": 4.42113092001023e-08, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:02:39", "remaining_time": "0:00:06", "throughput": 3022.45, "total_tokens": 482464}
318
+ {"current_steps": 1545, "total_steps": 1600, "loss": 0.0001, "lr": 3.726924179339009e-08, "epoch": 19.3125, "percentage": 96.56, "elapsed_time": "0:02:40", "remaining_time": "0:00:05", "throughput": 3023.84, "total_tokens": 484032}
319
+ {"current_steps": 1550, "total_steps": 1600, "loss": 0.0001, "lr": 3.09176897181096e-08, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:02:40", "remaining_time": "0:00:05", "throughput": 3025.41, "total_tokens": 485632}
320
+ {"current_steps": 1555, "total_steps": 1600, "loss": 0.0002, "lr": 2.515740874544148e-08, "epoch": 19.4375, "percentage": 97.19, "elapsed_time": "0:02:40", "remaining_time": "0:00:04", "throughput": 3026.97, "total_tokens": 487232}
321
+ {"current_steps": 1560, "total_steps": 1600, "loss": 0.0002, "lr": 1.9989084291216487e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:02:41", "remaining_time": "0:00:04", "throughput": 3028.5, "total_tokens": 488832}
322
+ {"current_steps": 1565, "total_steps": 1600, "loss": 0.0001, "lr": 1.541333133436018e-08, "epoch": 19.5625, "percentage": 97.81, "elapsed_time": "0:02:41", "remaining_time": "0:00:03", "throughput": 3029.86, "total_tokens": 490400}
323
+ {"current_steps": 1570, "total_steps": 1600, "loss": 0.0001, "lr": 1.1430694343715354e-08, "epoch": 19.625, "percentage": 98.12, "elapsed_time": "0:02:42", "remaining_time": "0:00:03", "throughput": 3031.05, "total_tokens": 491936}
324
+ {"current_steps": 1575, "total_steps": 1600, "loss": 0.0002, "lr": 8.041647213256066e-09, "epoch": 19.6875, "percentage": 98.44, "elapsed_time": "0:02:42", "remaining_time": "0:00:02", "throughput": 3032.4, "total_tokens": 493504}
325
+ {"current_steps": 1580, "total_steps": 1600, "loss": 0.0001, "lr": 5.246593205699424e-09, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:02:43", "remaining_time": "0:00:02", "throughput": 3033.92, "total_tokens": 495104}
326
+ {"current_steps": 1585, "total_steps": 1600, "loss": 0.0002, "lr": 3.0458649045211897e-09, "epoch": 19.8125, "percentage": 99.06, "elapsed_time": "0:02:43", "remaining_time": "0:00:01", "throughput": 3035.24, "total_tokens": 496672}
327
+ {"current_steps": 1590, "total_steps": 1600, "loss": 0.0001, "lr": 1.4397241743813185e-09, "epoch": 19.875, "percentage": 99.38, "elapsed_time": "0:02:44", "remaining_time": "0:00:01", "throughput": 3036.57, "total_tokens": 498240}
328
+ {"current_steps": 1595, "total_steps": 1600, "loss": 0.0002, "lr": 4.283621299649987e-10, "epoch": 19.9375, "percentage": 99.69, "elapsed_time": "0:02:44", "remaining_time": "0:00:00", "throughput": 3037.9, "total_tokens": 499808}
329
+ {"current_steps": 1600, "total_steps": 1600, "loss": 0.0003, "lr": 1.189911324084303e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:45", "remaining_time": "0:00:00", "throughput": 3038.86, "total_tokens": 501440}
330
+ {"current_steps": 1600, "total_steps": 1600, "eval_loss": 1.093362808227539, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:45", "remaining_time": "0:00:00", "throughput": 3021.98, "total_tokens": 501440}
331
+ {"current_steps": 1600, "total_steps": 1600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:47", "remaining_time": "0:00:00", "throughput": 3000.57, "total_tokens": 501440}