rbelanec commited on
Commit
76dd8d6
·
verified ·
1 Parent(s): 42ccc1f

Training in progress, step 39800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9f432bf6913eb7d08cd317b2c784fd246604af3202293a58de2ee59ff90a9ac
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6009a2fb01bdbdd45368ea5fb68b8870cfaec509eacfc6564203706a6a183fa
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -8116,3 +8116,44 @@
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.024, "lr": 1.2708814586862016e-08, "epoch": 6.459254425320173, "percentage": 98.99, "elapsed_time": "9:04:56", "remaining_time": "0:05:34", "throughput": 2330.15, "total_tokens": 76188512}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0001, "lr": 1.2397742806111168e-08, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:04:58", "remaining_time": "0:05:30", "throughput": 2330.31, "total_tokens": 76198368}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.43227317929267883, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:06:19", "remaining_time": "0:05:31", "throughput": 2324.55, "total_tokens": 76198368}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.024, "lr": 1.2708814586862016e-08, "epoch": 6.459254425320173, "percentage": 98.99, "elapsed_time": "9:04:56", "remaining_time": "0:05:34", "throughput": 2330.15, "total_tokens": 76188512}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0001, "lr": 1.2397742806111168e-08, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:04:58", "remaining_time": "0:05:30", "throughput": 2330.31, "total_tokens": 76198368}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.43227317929267883, "epoch": 6.4600701525409905, "percentage": 99.0, "elapsed_time": "9:06:19", "remaining_time": "0:05:31", "throughput": 2324.55, "total_tokens": 76198368}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0001, "lr": 1.209052442764369e-08, "epoch": 6.460885879761808, "percentage": 99.01, "elapsed_time": "9:06:23", "remaining_time": "0:05:26", "throughput": 2324.61, "total_tokens": 76209312}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 0.0162, "lr": 1.17871594988328e-08, "epoch": 6.461701606982625, "percentage": 99.02, "elapsed_time": "9:06:25", "remaining_time": "0:05:22", "throughput": 2324.74, "total_tokens": 76218528}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 0.0028, "lr": 1.1487648066466072e-08, "epoch": 6.462517334203443, "percentage": 99.04, "elapsed_time": "9:06:27", "remaining_time": "0:05:18", "throughput": 2324.92, "total_tokens": 76229056}
8122
+ {"current_steps": 39620, "total_steps": 40000, "loss": 0.0001, "lr": 1.1191990176728784e-08, "epoch": 6.46333306142426, "percentage": 99.05, "elapsed_time": "9:06:29", "remaining_time": "0:05:14", "throughput": 2325.07, "total_tokens": 76238912}
8123
+ {"current_steps": 39625, "total_steps": 40000, "loss": 0.0002, "lr": 1.0900185875215018e-08, "epoch": 6.464148788645077, "percentage": 99.06, "elapsed_time": "9:06:32", "remaining_time": "0:05:10", "throughput": 2325.25, "total_tokens": 76249632}
8124
+ {"current_steps": 39630, "total_steps": 40000, "loss": 0.0002, "lr": 1.0612235206924891e-08, "epoch": 6.464964515865894, "percentage": 99.08, "elapsed_time": "9:06:34", "remaining_time": "0:05:06", "throughput": 2325.36, "total_tokens": 76258144}
8125
+ {"current_steps": 39635, "total_steps": 40000, "loss": 0.0001, "lr": 1.0328138216264549e-08, "epoch": 6.465780243086712, "percentage": 99.09, "elapsed_time": "9:06:36", "remaining_time": "0:05:02", "throughput": 2325.5, "total_tokens": 76267360}
8126
+ {"current_steps": 39640, "total_steps": 40000, "loss": 0.1334, "lr": 1.004789494704339e-08, "epoch": 6.466595970307529, "percentage": 99.1, "elapsed_time": "9:06:38", "remaining_time": "0:04:57", "throughput": 2325.61, "total_tokens": 76276000}
8127
+ {"current_steps": 39645, "total_steps": 40000, "loss": 0.055, "lr": 9.771505442482397e-09, "epoch": 6.467411697528346, "percentage": 99.11, "elapsed_time": "9:06:40", "remaining_time": "0:04:53", "throughput": 2325.76, "total_tokens": 76285744}
8128
+ {"current_steps": 39650, "total_steps": 40000, "loss": 0.0001, "lr": 9.498969745200259e-09, "epoch": 6.468227424749164, "percentage": 99.12, "elapsed_time": "9:06:42", "remaining_time": "0:04:49", "throughput": 2325.91, "total_tokens": 76295280}
8129
+ {"current_steps": 39655, "total_steps": 40000, "loss": 0.0006, "lr": 9.230287897230017e-09, "epoch": 6.469043151969982, "percentage": 99.14, "elapsed_time": "9:06:44", "remaining_time": "0:04:45", "throughput": 2326.03, "total_tokens": 76304304}
8130
+ {"current_steps": 39660, "total_steps": 40000, "loss": 0.0002, "lr": 8.965459940002419e-09, "epoch": 6.469858879190799, "percentage": 99.15, "elapsed_time": "9:06:46", "remaining_time": "0:04:41", "throughput": 2326.17, "total_tokens": 76313600}
8131
+ {"current_steps": 39665, "total_steps": 40000, "loss": 0.0001, "lr": 8.704485914357019e-09, "epoch": 6.470674606411616, "percentage": 99.16, "elapsed_time": "9:06:48", "remaining_time": "0:04:37", "throughput": 2326.29, "total_tokens": 76322496}
8132
+ {"current_steps": 39670, "total_steps": 40000, "loss": 0.0001, "lr": 8.447365860539402e-09, "epoch": 6.471490333632433, "percentage": 99.17, "elapsed_time": "9:06:50", "remaining_time": "0:04:32", "throughput": 2326.42, "total_tokens": 76331520}
8133
+ {"current_steps": 39675, "total_steps": 40000, "loss": 0.0001, "lr": 8.194099818201184e-09, "epoch": 6.472306060853251, "percentage": 99.19, "elapsed_time": "9:06:52", "remaining_time": "0:04:28", "throughput": 2326.57, "total_tokens": 76341344}
8134
+ {"current_steps": 39680, "total_steps": 40000, "loss": 0.0793, "lr": 7.944687826400011e-09, "epoch": 6.473121788074068, "percentage": 99.2, "elapsed_time": "9:06:54", "remaining_time": "0:04:24", "throughput": 2326.74, "total_tokens": 76351664}
8135
+ {"current_steps": 39685, "total_steps": 40000, "loss": 0.0002, "lr": 7.699129923599557e-09, "epoch": 6.473937515294885, "percentage": 99.21, "elapsed_time": "9:06:56", "remaining_time": "0:04:20", "throughput": 2326.86, "total_tokens": 76360352}
8136
+ {"current_steps": 39690, "total_steps": 40000, "loss": 0.0, "lr": 7.457426147663982e-09, "epoch": 6.474753242515702, "percentage": 99.22, "elapsed_time": "9:06:58", "remaining_time": "0:04:16", "throughput": 2327.01, "total_tokens": 76370240}
8137
+ {"current_steps": 39695, "total_steps": 40000, "loss": 0.0002, "lr": 7.219576535871797e-09, "epoch": 6.47556896973652, "percentage": 99.24, "elapsed_time": "9:07:01", "remaining_time": "0:04:12", "throughput": 2327.13, "total_tokens": 76379024}
8138
+ {"current_steps": 39700, "total_steps": 40000, "loss": 0.0004, "lr": 6.985581124896445e-09, "epoch": 6.4763846969573375, "percentage": 99.25, "elapsed_time": "9:07:03", "remaining_time": "0:04:08", "throughput": 2327.29, "total_tokens": 76388832}
8139
+ {"current_steps": 39705, "total_steps": 40000, "loss": 0.0002, "lr": 6.755439950828501e-09, "epoch": 6.477200424178155, "percentage": 99.26, "elapsed_time": "9:07:05", "remaining_time": "0:04:03", "throughput": 2327.43, "total_tokens": 76398400}
8140
+ {"current_steps": 39710, "total_steps": 40000, "loss": 0.0, "lr": 6.5291530491562444e-09, "epoch": 6.478016151398972, "percentage": 99.28, "elapsed_time": "9:07:07", "remaining_time": "0:03:59", "throughput": 2327.58, "total_tokens": 76408144}
8141
+ {"current_steps": 39715, "total_steps": 40000, "loss": 0.0002, "lr": 6.3067204547739845e-09, "epoch": 6.47883187861979, "percentage": 99.29, "elapsed_time": "9:07:09", "remaining_time": "0:03:55", "throughput": 2327.69, "total_tokens": 76416624}
8142
+ {"current_steps": 39720, "total_steps": 40000, "loss": 0.0001, "lr": 6.088142201987612e-09, "epoch": 6.479647605840607, "percentage": 99.3, "elapsed_time": "9:07:11", "remaining_time": "0:03:51", "throughput": 2327.86, "total_tokens": 76426912}
8143
+ {"current_steps": 39725, "total_steps": 40000, "loss": 0.0002, "lr": 5.873418324503499e-09, "epoch": 6.480463333061424, "percentage": 99.31, "elapsed_time": "9:07:13", "remaining_time": "0:03:47", "throughput": 2328.04, "total_tokens": 76437568}
8144
+ {"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 6.481279060282241, "percentage": 99.33, "elapsed_time": "9:07:15", "remaining_time": "0:03:43", "throughput": 2328.19, "total_tokens": 76447328}
8145
+ {"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 6.482094787503059, "percentage": 99.34, "elapsed_time": "9:07:17", "remaining_time": "0:03:39", "throughput": 2328.35, "total_tokens": 76457584}
8146
+ {"current_steps": 39740, "total_steps": 40000, "loss": 0.0053, "lr": 5.252373272018885e-09, "epoch": 6.482910514723876, "percentage": 99.35, "elapsed_time": "9:07:19", "remaining_time": "0:03:34", "throughput": 2328.5, "total_tokens": 76467152}
8147
+ {"current_steps": 39745, "total_steps": 40000, "loss": 0.0072, "lr": 5.053067220925356e-09, "epoch": 6.4837262419446935, "percentage": 99.36, "elapsed_time": "9:07:21", "remaining_time": "0:03:30", "throughput": 2328.69, "total_tokens": 76478304}
8148
+ {"current_steps": 39750, "total_steps": 40000, "loss": 0.0, "lr": 4.857615704759177e-09, "epoch": 6.484541969165511, "percentage": 99.38, "elapsed_time": "9:07:23", "remaining_time": "0:03:26", "throughput": 2328.79, "total_tokens": 76486464}
8149
+ {"current_steps": 39755, "total_steps": 40000, "loss": 0.0001, "lr": 4.666018753654577e-09, "epoch": 6.485357696386329, "percentage": 99.39, "elapsed_time": "9:07:25", "remaining_time": "0:03:22", "throughput": 2328.94, "total_tokens": 76496176}
8150
+ {"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 6.486173423607146, "percentage": 99.4, "elapsed_time": "9:07:28", "remaining_time": "0:03:18", "throughput": 2329.06, "total_tokens": 76505120}
8151
+ {"current_steps": 39765, "total_steps": 40000, "loss": 0.0005, "lr": 4.294388664233262e-09, "epoch": 6.486989150827963, "percentage": 99.41, "elapsed_time": "9:07:30", "remaining_time": "0:03:14", "throughput": 2329.19, "total_tokens": 76513968}
8152
+ {"current_steps": 39770, "total_steps": 40000, "loss": 0.0001, "lr": 4.114355583223484e-09, "epoch": 6.487804878048781, "percentage": 99.42, "elapsed_time": "9:07:32", "remaining_time": "0:03:09", "throughput": 2329.31, "total_tokens": 76522912}
8153
+ {"current_steps": 39775, "total_steps": 40000, "loss": 0.0035, "lr": 3.9381771818974845e-09, "epoch": 6.488620605269598, "percentage": 99.44, "elapsed_time": "9:07:34", "remaining_time": "0:03:05", "throughput": 2329.48, "total_tokens": 76533248}
8154
+ {"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 6.489436332490415, "percentage": 99.45, "elapsed_time": "9:07:36", "remaining_time": "0:03:01", "throughput": 2329.64, "total_tokens": 76543376}
8155
+ {"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 6.490252059711232, "percentage": 99.46, "elapsed_time": "9:07:38", "remaining_time": "0:02:57", "throughput": 2329.76, "total_tokens": 76552096}
8156
+ {"current_steps": 39790, "total_steps": 40000, "loss": 0.0001, "lr": 3.4327703247488684e-09, "epoch": 6.49106778693205, "percentage": 99.48, "elapsed_time": "9:07:40", "remaining_time": "0:02:53", "throughput": 2329.88, "total_tokens": 76560832}
8157
+ {"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 6.4918835141528675, "percentage": 99.49, "elapsed_time": "9:07:42", "remaining_time": "0:02:49", "throughput": 2330.02, "total_tokens": 76570480}
8158
+ {"current_steps": 39800, "total_steps": 40000, "loss": 0.0001, "lr": 3.1151063006468193e-09, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "9:07:44", "remaining_time": "0:02:45", "throughput": 2330.2, "total_tokens": 76581104}
8159
+ {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.4304603636264801, "epoch": 6.492699241373685, "percentage": 99.5, "elapsed_time": "9:09:05", "remaining_time": "0:02:45", "throughput": 2324.47, "total_tokens": 76581104}