rbelanec commited on
Commit
0d0c030
·
verified ·
1 Parent(s): 5d178f1

Training in progress, step 3600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +35 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f92cad823930b17e20eb1c7cacfa58edaf27c865b8ec830d9729092d72927b38
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db66563b53b0afa209cc23a5ca3ed6e7c82164320258c240127f2120aec4a349
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -704,3 +704,38 @@
704
  {"current_steps": 3425, "total_steps": 3600, "loss": 0.0006, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:59", "remaining_time": "0:00:18", "throughput": 1449.64, "total_tokens": 520832}
705
  {"current_steps": 3430, "total_steps": 3600, "loss": 0.0003, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:59", "remaining_time": "0:00:17", "throughput": 1449.93, "total_tokens": 521584}
706
  {"current_steps": 3435, "total_steps": 3600, "loss": 0.0004, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:06:00", "remaining_time": "0:00:17", "throughput": 1450.32, "total_tokens": 522352}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
  {"current_steps": 3425, "total_steps": 3600, "loss": 0.0006, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:59", "remaining_time": "0:00:18", "throughput": 1449.64, "total_tokens": 520832}
705
  {"current_steps": 3430, "total_steps": 3600, "loss": 0.0003, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:59", "remaining_time": "0:00:17", "throughput": 1449.93, "total_tokens": 521584}
706
  {"current_steps": 3435, "total_steps": 3600, "loss": 0.0004, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:06:00", "remaining_time": "0:00:17", "throughput": 1450.32, "total_tokens": 522352}
707
+ {"current_steps": 3440, "total_steps": 3600, "loss": 0.0002, "lr": 3.040110147984221e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:06:00", "remaining_time": "0:00:16", "throughput": 1450.67, "total_tokens": 523088}
708
+ {"current_steps": 3445, "total_steps": 3600, "loss": 0.0009, "lr": 2.8545701257221e-07, "epoch": 19.13888888888889, "percentage": 95.69, "elapsed_time": "0:06:00", "remaining_time": "0:00:16", "throughput": 1451.1, "total_tokens": 523840}
709
+ {"current_steps": 3450, "total_steps": 3600, "loss": 0.0001, "lr": 2.674839104671367e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:06:01", "remaining_time": "0:00:15", "throughput": 1451.52, "total_tokens": 524592}
710
+ {"current_steps": 3455, "total_steps": 3600, "loss": 0.0005, "lr": 2.5009213092991034e-07, "epoch": 19.194444444444443, "percentage": 95.97, "elapsed_time": "0:06:01", "remaining_time": "0:00:15", "throughput": 1451.92, "total_tokens": 525344}
711
+ {"current_steps": 3460, "total_steps": 3600, "loss": 0.0003, "lr": 2.3328208274359942e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:06:02", "remaining_time": "0:00:14", "throughput": 1452.34, "total_tokens": 526096}
712
+ {"current_steps": 3465, "total_steps": 3600, "loss": 0.0004, "lr": 2.170541610180432e-07, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:06:02", "remaining_time": "0:00:14", "throughput": 1452.72, "total_tokens": 526832}
713
+ {"current_steps": 3470, "total_steps": 3600, "loss": 0.0002, "lr": 2.014087471805509e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:06:03", "remaining_time": "0:00:13", "throughput": 1453.13, "total_tokens": 527584}
714
+ {"current_steps": 3475, "total_steps": 3600, "loss": 0.0002, "lr": 1.8634620896695043e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:06:03", "remaining_time": "0:00:13", "throughput": 1453.59, "total_tokens": 528352}
715
+ {"current_steps": 3480, "total_steps": 3600, "loss": 0.0012, "lr": 1.7186690041292586e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:06:03", "remaining_time": "0:00:12", "throughput": 1453.93, "total_tokens": 529088}
716
+ {"current_steps": 3485, "total_steps": 3600, "loss": 0.0004, "lr": 1.5797116184571304e-07, "epoch": 19.36111111111111, "percentage": 96.81, "elapsed_time": "0:06:04", "remaining_time": "0:00:12", "throughput": 1454.35, "total_tokens": 529856}
717
+ {"current_steps": 3490, "total_steps": 3600, "loss": 0.0004, "lr": 1.4465931987609482e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:06:04", "remaining_time": "0:00:11", "throughput": 1454.79, "total_tokens": 530624}
718
+ {"current_steps": 3495, "total_steps": 3600, "loss": 0.0004, "lr": 1.319316873907267e-07, "epoch": 19.416666666666668, "percentage": 97.08, "elapsed_time": "0:06:05", "remaining_time": "0:00:10", "throughput": 1455.2, "total_tokens": 531376}
719
+ {"current_steps": 3500, "total_steps": 3600, "loss": 0.0003, "lr": 1.1978856354477595e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:06:05", "remaining_time": "0:00:10", "throughput": 1455.66, "total_tokens": 532144}
720
+ {"current_steps": 3505, "total_steps": 3600, "loss": 0.0007, "lr": 1.0823023375489127e-07, "epoch": 19.47222222222222, "percentage": 97.36, "elapsed_time": "0:06:05", "remaining_time": "0:00:09", "throughput": 1456.16, "total_tokens": 532928}
721
+ {"current_steps": 3510, "total_steps": 3600, "loss": 0.0009, "lr": 9.725696969249965e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:06:06", "remaining_time": "0:00:09", "throughput": 1456.58, "total_tokens": 533680}
722
+ {"current_steps": 3515, "total_steps": 3600, "loss": 0.0004, "lr": 8.686902927741991e-08, "epoch": 19.52777777777778, "percentage": 97.64, "elapsed_time": "0:06:06", "remaining_time": "0:00:08", "throughput": 1457.04, "total_tokens": 534448}
723
+ {"current_steps": 3520, "total_steps": 3600, "loss": 0.0003, "lr": 7.706665667180091e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:06:07", "remaining_time": "0:00:08", "throughput": 1457.54, "total_tokens": 535232}
724
+ {"current_steps": 3525, "total_steps": 3600, "loss": 0.0002, "lr": 6.785008227437329e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:06:07", "remaining_time": "0:00:07", "throughput": 1457.99, "total_tokens": 536000}
725
+ {"current_steps": 3530, "total_steps": 3600, "loss": 0.0003, "lr": 5.921952271504827e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:06:08", "remaining_time": "0:00:07", "throughput": 1458.41, "total_tokens": 536752}
726
+ {"current_steps": 3535, "total_steps": 3600, "loss": 0.0197, "lr": 5.117518084981621e-08, "epoch": 19.63888888888889, "percentage": 98.19, "elapsed_time": "0:06:08", "remaining_time": "0:00:06", "throughput": 1458.86, "total_tokens": 537520}
727
+ {"current_steps": 3540, "total_steps": 3600, "loss": 0.0006, "lr": 4.371724575597535e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:06:08", "remaining_time": "0:00:06", "throughput": 1459.28, "total_tokens": 538272}
728
+ {"current_steps": 3545, "total_steps": 3600, "loss": 0.0004, "lr": 3.684589272771044e-08, "epoch": 19.694444444444443, "percentage": 98.47, "elapsed_time": "0:06:09", "remaining_time": "0:00:05", "throughput": 1459.69, "total_tokens": 539024}
729
+ {"current_steps": 3550, "total_steps": 3600, "loss": 0.0003, "lr": 3.056128327193486e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:06:09", "remaining_time": "0:00:05", "throughput": 1460.14, "total_tokens": 539792}
730
+ {"current_steps": 3555, "total_steps": 3600, "loss": 0.0003, "lr": 2.486356510453258e-08, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:06:10", "remaining_time": "0:00:04", "throughput": 1460.72, "total_tokens": 540608}
731
+ {"current_steps": 3560, "total_steps": 3600, "loss": 0.0003, "lr": 1.975287214685817e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:06:10", "remaining_time": "0:00:04", "throughput": 1461.11, "total_tokens": 541360}
732
+ {"current_steps": 3565, "total_steps": 3600, "loss": 0.0003, "lr": 1.522932452260595e-08, "epoch": 19.805555555555557, "percentage": 99.03, "elapsed_time": "0:06:10", "remaining_time": "0:00:03", "throughput": 1461.51, "total_tokens": 542112}
733
+ {"current_steps": 3570, "total_steps": 3600, "loss": 0.0003, "lr": 1.1293028554978935e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:06:11", "remaining_time": "0:00:03", "throughput": 1462.0, "total_tokens": 542896}
734
+ {"current_steps": 3575, "total_steps": 3600, "loss": 0.0007, "lr": 7.944076764190845e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:06:11", "remaining_time": "0:00:02", "throughput": 1462.44, "total_tokens": 543664}
735
+ {"current_steps": 3580, "total_steps": 3600, "loss": 0.0026, "lr": 5.182547865290044e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:06:12", "remaining_time": "0:00:02", "throughput": 1462.83, "total_tokens": 544416}
736
+ {"current_steps": 3585, "total_steps": 3600, "loss": 0.0005, "lr": 3.008506766313812e-09, "epoch": 19.916666666666668, "percentage": 99.58, "elapsed_time": "0:06:12", "remaining_time": "0:00:01", "throughput": 1463.27, "total_tokens": 545184}
737
+ {"current_steps": 3590, "total_steps": 3600, "loss": 0.0004, "lr": 1.4220045667645566e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:06:12", "remaining_time": "0:00:01", "throughput": 1463.62, "total_tokens": 545920}
738
+ {"current_steps": 3595, "total_steps": 3600, "loss": 0.0002, "lr": 4.2307855639411865e-10, "epoch": 19.97222222222222, "percentage": 99.86, "elapsed_time": "0:06:13", "remaining_time": "0:00:00", "throughput": 1464.03, "total_tokens": 546672}
739
+ {"current_steps": 3600, "total_steps": 3600, "loss": 0.0008, "lr": 1.1752214348903501e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:06:13", "remaining_time": "0:00:00", "throughput": 1464.15, "total_tokens": 547440}
740
+ {"current_steps": 3600, "total_steps": 3600, "eval_loss": 0.9462894201278687, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:06:14", "remaining_time": "0:00:00", "throughput": 1460.73, "total_tokens": 547440}
741
+ {"current_steps": 3600, "total_steps": 3600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:06:16", "remaining_time": "0:00:00", "throughput": 1455.42, "total_tokens": 547440}