rbelanec commited on
Commit
e060eeb
·
verified ·
1 Parent(s): cc04f8d

Training in progress, step 39800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccceb97718050bdd524d6e0a92c61866053be12108241a56eeee8696b86bb128
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:143b6dbcd54fce66c0b560a0d301ab1d5391f1350aa5ec2c5d67ec9da0eebd3e
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -8116,3 +8116,44 @@
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 6.459254425320173, "percentage": 98.99, "elapsed_time": "15:46:24", "remaining_time": "0:09:40", "throughput": 1321.07, "total_tokens": 75015984}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "15:46:28", "remaining_time": "0:09:33", "throughput": 1321.16, "total_tokens": 75025808}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.5072615146636963, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "15:48:52", "remaining_time": "0:09:35", "throughput": 1317.81, "total_tokens": 75025808}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 6.459254425320173, "percentage": 98.99, "elapsed_time": "15:46:24", "remaining_time": "0:09:40", "throughput": 1321.07, "total_tokens": 75015984}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "15:46:28", "remaining_time": "0:09:33", "throughput": 1321.16, "total_tokens": 75025808}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.5072615146636963, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "15:48:52", "remaining_time": "0:09:35", "throughput": 1317.81, "total_tokens": 75025808}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0, "lr": 1.209052442764369e-08, "epoch": 6.460885879761808, "percentage": 99.01, "elapsed_time": "15:48:57", "remaining_time": "0:09:27", "throughput": 1317.87, "total_tokens": 75036560}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 0.0, "lr": 1.17871594988328e-08, "epoch": 6.461701606982625, "percentage": 99.02, "elapsed_time": "15:49:01", "remaining_time": "0:09:20", "throughput": 1317.95, "total_tokens": 75045664}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 0.0, "lr": 1.1487648066466072e-08, "epoch": 6.462517334203443, "percentage": 99.04, "elapsed_time": "15:49:04", "remaining_time": "0:09:13", "throughput": 1318.05, "total_tokens": 75056032}
8122
+ {"current_steps": 39620, "total_steps": 40000, "loss": 0.0, "lr": 1.1191990176728784e-08, "epoch": 6.46333306142426, "percentage": 99.05, "elapsed_time": "15:49:08", "remaining_time": "0:09:06", "throughput": 1318.13, "total_tokens": 75065760}
8123
+ {"current_steps": 39625, "total_steps": 40000, "loss": 0.0001, "lr": 1.0900185875215018e-08, "epoch": 6.464148788645077, "percentage": 99.06, "elapsed_time": "15:49:12", "remaining_time": "0:08:58", "throughput": 1318.23, "total_tokens": 75076128}
8124
+ {"current_steps": 39630, "total_steps": 40000, "loss": 0.0, "lr": 1.0612235206924891e-08, "epoch": 6.464964515865894, "percentage": 99.08, "elapsed_time": "15:49:15", "remaining_time": "0:08:51", "throughput": 1318.3, "total_tokens": 75084416}
8125
+ {"current_steps": 39635, "total_steps": 40000, "loss": 0.0, "lr": 1.0328138216264549e-08, "epoch": 6.465780243086712, "percentage": 99.09, "elapsed_time": "15:49:18", "remaining_time": "0:08:44", "throughput": 1318.38, "total_tokens": 75093552}
8126
+ {"current_steps": 39640, "total_steps": 40000, "loss": 0.0001, "lr": 1.004789494704339e-08, "epoch": 6.466595970307529, "percentage": 99.1, "elapsed_time": "15:49:22", "remaining_time": "0:08:37", "throughput": 1318.46, "total_tokens": 75102032}
8127
+ {"current_steps": 39645, "total_steps": 40000, "loss": 0.0032, "lr": 9.771505442482397e-09, "epoch": 6.467411697528346, "percentage": 99.11, "elapsed_time": "15:49:25", "remaining_time": "0:08:30", "throughput": 1318.54, "total_tokens": 75111584}
8128
+ {"current_steps": 39650, "total_steps": 40000, "loss": 0.0, "lr": 9.498969745200259e-09, "epoch": 6.468227424749164, "percentage": 99.12, "elapsed_time": "15:49:29", "remaining_time": "0:08:22", "throughput": 1318.63, "total_tokens": 75121104}
8129
+ {"current_steps": 39655, "total_steps": 40000, "loss": 0.0, "lr": 9.230287897230017e-09, "epoch": 6.469043151969982, "percentage": 99.14, "elapsed_time": "15:49:32", "remaining_time": "0:08:15", "throughput": 1318.71, "total_tokens": 75130048}
8130
+ {"current_steps": 39660, "total_steps": 40000, "loss": 0.0001, "lr": 8.965459940002419e-09, "epoch": 6.469858879190799, "percentage": 99.15, "elapsed_time": "15:49:35", "remaining_time": "0:08:08", "throughput": 1318.79, "total_tokens": 75139312}
8131
+ {"current_steps": 39665, "total_steps": 40000, "loss": 0.0, "lr": 8.704485914357019e-09, "epoch": 6.470674606411616, "percentage": 99.16, "elapsed_time": "15:49:39", "remaining_time": "0:08:01", "throughput": 1318.87, "total_tokens": 75148080}
8132
+ {"current_steps": 39670, "total_steps": 40000, "loss": 0.0, "lr": 8.447365860539402e-09, "epoch": 6.471490333632433, "percentage": 99.17, "elapsed_time": "15:49:42", "remaining_time": "0:07:54", "throughput": 1318.94, "total_tokens": 75156992}
8133
+ {"current_steps": 39675, "total_steps": 40000, "loss": 0.0, "lr": 8.194099818201184e-09, "epoch": 6.472306060853251, "percentage": 99.19, "elapsed_time": "15:49:46", "remaining_time": "0:07:46", "throughput": 1319.03, "total_tokens": 75166768}
8134
+ {"current_steps": 39680, "total_steps": 40000, "loss": 0.0, "lr": 7.944687826400011e-09, "epoch": 6.473121788074068, "percentage": 99.2, "elapsed_time": "15:49:50", "remaining_time": "0:07:39", "throughput": 1319.12, "total_tokens": 75176944}
8135
+ {"current_steps": 39685, "total_steps": 40000, "loss": 0.0, "lr": 7.699129923599557e-09, "epoch": 6.473937515294885, "percentage": 99.21, "elapsed_time": "15:49:53", "remaining_time": "0:07:32", "throughput": 1319.2, "total_tokens": 75185520}
8136
+ {"current_steps": 39690, "total_steps": 40000, "loss": 0.0, "lr": 7.457426147663982e-09, "epoch": 6.474753242515702, "percentage": 99.22, "elapsed_time": "15:49:56", "remaining_time": "0:07:25", "throughput": 1319.29, "total_tokens": 75195232}
8137
+ {"current_steps": 39695, "total_steps": 40000, "loss": 0.0376, "lr": 7.219576535871797e-09, "epoch": 6.47556896973652, "percentage": 99.24, "elapsed_time": "15:50:00", "remaining_time": "0:07:17", "throughput": 1319.36, "total_tokens": 75203904}
8138
+ {"current_steps": 39700, "total_steps": 40000, "loss": 0.0, "lr": 6.985581124896445e-09, "epoch": 6.4763846969573375, "percentage": 99.25, "elapsed_time": "15:50:03", "remaining_time": "0:07:10", "throughput": 1319.45, "total_tokens": 75213472}
8139
+ {"current_steps": 39705, "total_steps": 40000, "loss": 0.0, "lr": 6.755439950828501e-09, "epoch": 6.477200424178155, "percentage": 99.26, "elapsed_time": "15:50:07", "remaining_time": "0:07:03", "throughput": 1319.53, "total_tokens": 75222928}
8140
+ {"current_steps": 39710, "total_steps": 40000, "loss": 0.0, "lr": 6.5291530491562444e-09, "epoch": 6.478016151398972, "percentage": 99.28, "elapsed_time": "15:50:10", "remaining_time": "0:06:56", "throughput": 1319.62, "total_tokens": 75232512}
8141
+ {"current_steps": 39715, "total_steps": 40000, "loss": 0.0011, "lr": 6.3067204547739845e-09, "epoch": 6.47883187861979, "percentage": 99.29, "elapsed_time": "15:50:14", "remaining_time": "0:06:49", "throughput": 1319.69, "total_tokens": 75240800}
8142
+ {"current_steps": 39720, "total_steps": 40000, "loss": 0.0, "lr": 6.088142201987612e-09, "epoch": 6.479647605840607, "percentage": 99.3, "elapsed_time": "15:50:17", "remaining_time": "0:06:41", "throughput": 1319.78, "total_tokens": 75251024}
8143
+ {"current_steps": 39725, "total_steps": 40000, "loss": 0.0, "lr": 5.873418324503499e-09, "epoch": 6.480463333061424, "percentage": 99.31, "elapsed_time": "15:50:21", "remaining_time": "0:06:34", "throughput": 1319.87, "total_tokens": 75261456}
8144
+ {"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 6.481279060282241, "percentage": 99.33, "elapsed_time": "15:50:25", "remaining_time": "0:06:27", "throughput": 1319.96, "total_tokens": 75271072}
8145
+ {"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 6.482094787503059, "percentage": 99.34, "elapsed_time": "15:50:28", "remaining_time": "0:06:20", "throughput": 1320.05, "total_tokens": 75281104}
8146
+ {"current_steps": 39740, "total_steps": 40000, "loss": 0.0, "lr": 5.252373272018885e-09, "epoch": 6.482910514723876, "percentage": 99.35, "elapsed_time": "15:50:32", "remaining_time": "0:06:13", "throughput": 1320.13, "total_tokens": 75290608}
8147
+ {"current_steps": 39745, "total_steps": 40000, "loss": 0.0, "lr": 5.053067220925356e-09, "epoch": 6.4837262419446935, "percentage": 99.36, "elapsed_time": "15:50:36", "remaining_time": "0:06:05", "throughput": 1320.23, "total_tokens": 75301520}
8148
+ {"current_steps": 39750, "total_steps": 40000, "loss": 0.0, "lr": 4.857615704759177e-09, "epoch": 6.484541969165511, "percentage": 99.38, "elapsed_time": "15:50:39", "remaining_time": "0:05:58", "throughput": 1320.3, "total_tokens": 75309568}
8149
+ {"current_steps": 39755, "total_steps": 40000, "loss": 0.0, "lr": 4.666018753654577e-09, "epoch": 6.485357696386329, "percentage": 99.39, "elapsed_time": "15:50:43", "remaining_time": "0:05:51", "throughput": 1320.39, "total_tokens": 75319136}
8150
+ {"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 6.486173423607146, "percentage": 99.4, "elapsed_time": "15:50:46", "remaining_time": "0:05:44", "throughput": 1320.47, "total_tokens": 75328000}
8151
+ {"current_steps": 39765, "total_steps": 40000, "loss": 0.0762, "lr": 4.294388664233262e-09, "epoch": 6.486989150827963, "percentage": 99.41, "elapsed_time": "15:50:49", "remaining_time": "0:05:37", "throughput": 1320.55, "total_tokens": 75336752}
8152
+ {"current_steps": 39770, "total_steps": 40000, "loss": 0.0, "lr": 4.114355583223484e-09, "epoch": 6.487804878048781, "percentage": 99.42, "elapsed_time": "15:50:52", "remaining_time": "0:05:29", "throughput": 1320.62, "total_tokens": 75345488}
8153
+ {"current_steps": 39775, "total_steps": 40000, "loss": 0.0, "lr": 3.9381771818974845e-09, "epoch": 6.488620605269598, "percentage": 99.44, "elapsed_time": "15:50:56", "remaining_time": "0:05:22", "throughput": 1320.72, "total_tokens": 75355680}
8154
+ {"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 6.489436332490415, "percentage": 99.45, "elapsed_time": "15:51:00", "remaining_time": "0:05:15", "throughput": 1320.81, "total_tokens": 75365712}
8155
+ {"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 6.490252059711232, "percentage": 99.46, "elapsed_time": "15:51:03", "remaining_time": "0:05:08", "throughput": 1320.88, "total_tokens": 75374320}
8156
+ {"current_steps": 39790, "total_steps": 40000, "loss": 0.0, "lr": 3.4327703247488684e-09, "epoch": 6.49106778693205, "percentage": 99.48, "elapsed_time": "15:51:06", "remaining_time": "0:05:01", "throughput": 1320.96, "total_tokens": 75382832}
8157
+ {"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 6.4918835141528675, "percentage": 99.49, "elapsed_time": "15:51:10", "remaining_time": "0:04:53", "throughput": 1321.04, "total_tokens": 75392208}
8158
+ {"current_steps": 39800, "total_steps": 40000, "loss": 0.0, "lr": 3.1151063006468193e-09, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "15:51:14", "remaining_time": "0:04:46", "throughput": 1321.13, "total_tokens": 75402576}
8159
+ {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.5073046684265137, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "15:53:38", "remaining_time": "0:04:47", "throughput": 1317.8, "total_tokens": 75402576}