"auto-commit"
Browse files- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/rng_state.pth +1 -1
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/trainer_state.json +317 -5
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/training_args.bin +0 -0
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/rng_state.pth +2 -2
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/trainer_state.json +1749 -6
- model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/training_args.bin +0 -0
- model-bin/finetune/base/log/1629840697.4590368/events.out.tfevents.1629840697.c435e1c5ee04.920.231 +3 -0
- model-bin/finetune/base/log/1629841338.3804567/events.out.tfevents.1629841338.c435e1c5ee04.920.233 +3 -0
- model-bin/finetune/base/log/1629841998.772923/events.out.tfevents.1629841998.c435e1c5ee04.920.235 +3 -0
- model-bin/finetune/base/log/1629842644.4759989/events.out.tfevents.1629842644.c435e1c5ee04.920.237 +3 -0
- model-bin/finetune/base/log/1629843285.8477933/events.out.tfevents.1629843285.c435e1c5ee04.920.239 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629840697.c435e1c5ee04.920.230 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629841338.c435e1c5ee04.920.232 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629841998.c435e1c5ee04.920.234 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629842644.c435e1c5ee04.920.236 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629843285.c435e1c5ee04.920.238 +3 -0
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51466514ff46e014697533d374223efd8d0f28ef3d479f3f85da7a120794091a
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9eca26b4d912d8163ec7356526a3cb6e0d499f65711c3a03987b8a7a7ab2f22b
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4efbd31df0b38278619f9e8e69d966325f5fcd10f44279c23b159dec6316e226
|
| 3 |
size 14503
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b01834bbb5ad7da18b208abc1899397cef1e36be4c74025882aee2138c95ff7
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:873ebfa7b0a1fec5638b4c9861dd2cc6b33ab6a77483e2566fffb863052317c3
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68444}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -186132,11 +186132,323 @@
|
|
| 186132 |
"eval_steps_per_second": 0.69,
|
| 186133 |
"eval_wer": 0.1986700537955768,
|
| 186134 |
"step": 68195
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186135 |
}
|
| 186136 |
],
|
| 186137 |
"max_steps": 620000,
|
| 186138 |
"num_train_epochs": 5000,
|
| 186139 |
-
"total_flos": 1.
|
| 186140 |
"trial_name": null,
|
| 186141 |
"trial_params": null
|
| 186142 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.18525332578545145,
|
| 3 |
+
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-68444",
|
| 4 |
+
"epoch": 550.995983935743,
|
| 5 |
+
"global_step": 68444,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 186132 |
"eval_steps_per_second": 0.69,
|
| 186133 |
"eval_wer": 0.1986700537955768,
|
| 186134 |
"step": 68195
|
| 186135 |
+
},
|
| 186136 |
+
{
|
| 186137 |
+
"epoch": 545.04,
|
| 186138 |
+
"learning_rate": 8.92349358974359e-06,
|
| 186139 |
+
"loss": 0.3853,
|
| 186140 |
+
"step": 68200
|
| 186141 |
+
},
|
| 186142 |
+
{
|
| 186143 |
+
"epoch": 545.08,
|
| 186144 |
+
"learning_rate": 8.923413461538464e-06,
|
| 186145 |
+
"loss": 0.2899,
|
| 186146 |
+
"step": 68205
|
| 186147 |
+
},
|
| 186148 |
+
{
|
| 186149 |
+
"epoch": 545.12,
|
| 186150 |
+
"learning_rate": 8.923333333333333e-06,
|
| 186151 |
+
"loss": 0.3688,
|
| 186152 |
+
"step": 68210
|
| 186153 |
+
},
|
| 186154 |
+
{
|
| 186155 |
+
"epoch": 545.16,
|
| 186156 |
+
"learning_rate": 8.923253205128206e-06,
|
| 186157 |
+
"loss": 0.6488,
|
| 186158 |
+
"step": 68215
|
| 186159 |
+
},
|
| 186160 |
+
{
|
| 186161 |
+
"epoch": 545.2,
|
| 186162 |
+
"learning_rate": 8.923173076923078e-06,
|
| 186163 |
+
"loss": 1.2436,
|
| 186164 |
+
"step": 68220
|
| 186165 |
+
},
|
| 186166 |
+
{
|
| 186167 |
+
"epoch": 545.24,
|
| 186168 |
+
"learning_rate": 8.923092948717949e-06,
|
| 186169 |
+
"loss": 0.3309,
|
| 186170 |
+
"step": 68225
|
| 186171 |
+
},
|
| 186172 |
+
{
|
| 186173 |
+
"epoch": 545.28,
|
| 186174 |
+
"learning_rate": 8.92301282051282e-06,
|
| 186175 |
+
"loss": 0.3145,
|
| 186176 |
+
"step": 68230
|
| 186177 |
+
},
|
| 186178 |
+
{
|
| 186179 |
+
"epoch": 545.32,
|
| 186180 |
+
"learning_rate": 8.922932692307693e-06,
|
| 186181 |
+
"loss": 0.3409,
|
| 186182 |
+
"step": 68235
|
| 186183 |
+
},
|
| 186184 |
+
{
|
| 186185 |
+
"epoch": 545.36,
|
| 186186 |
+
"learning_rate": 8.922852564102565e-06,
|
| 186187 |
+
"loss": 0.446,
|
| 186188 |
+
"step": 68240
|
| 186189 |
+
},
|
| 186190 |
+
{
|
| 186191 |
+
"epoch": 545.4,
|
| 186192 |
+
"learning_rate": 8.922772435897436e-06,
|
| 186193 |
+
"loss": 1.1688,
|
| 186194 |
+
"step": 68245
|
| 186195 |
+
},
|
| 186196 |
+
{
|
| 186197 |
+
"epoch": 545.44,
|
| 186198 |
+
"learning_rate": 8.922692307692309e-06,
|
| 186199 |
+
"loss": 0.3211,
|
| 186200 |
+
"step": 68250
|
| 186201 |
+
},
|
| 186202 |
+
{
|
| 186203 |
+
"epoch": 545.48,
|
| 186204 |
+
"learning_rate": 8.92261217948718e-06,
|
| 186205 |
+
"loss": 0.366,
|
| 186206 |
+
"step": 68255
|
| 186207 |
+
},
|
| 186208 |
+
{
|
| 186209 |
+
"epoch": 545.52,
|
| 186210 |
+
"learning_rate": 8.922532051282052e-06,
|
| 186211 |
+
"loss": 0.3367,
|
| 186212 |
+
"step": 68260
|
| 186213 |
+
},
|
| 186214 |
+
{
|
| 186215 |
+
"epoch": 545.56,
|
| 186216 |
+
"learning_rate": 8.922451923076923e-06,
|
| 186217 |
+
"loss": 0.4458,
|
| 186218 |
+
"step": 68265
|
| 186219 |
+
},
|
| 186220 |
+
{
|
| 186221 |
+
"epoch": 545.6,
|
| 186222 |
+
"learning_rate": 8.922371794871796e-06,
|
| 186223 |
+
"loss": 1.3543,
|
| 186224 |
+
"step": 68270
|
| 186225 |
+
},
|
| 186226 |
+
{
|
| 186227 |
+
"epoch": 545.64,
|
| 186228 |
+
"learning_rate": 8.922291666666668e-06,
|
| 186229 |
+
"loss": 0.4659,
|
| 186230 |
+
"step": 68275
|
| 186231 |
+
},
|
| 186232 |
+
{
|
| 186233 |
+
"epoch": 545.68,
|
| 186234 |
+
"learning_rate": 8.922211538461539e-06,
|
| 186235 |
+
"loss": 0.294,
|
| 186236 |
+
"step": 68280
|
| 186237 |
+
},
|
| 186238 |
+
{
|
| 186239 |
+
"epoch": 545.72,
|
| 186240 |
+
"learning_rate": 8.92213141025641e-06,
|
| 186241 |
+
"loss": 0.3322,
|
| 186242 |
+
"step": 68285
|
| 186243 |
+
},
|
| 186244 |
+
{
|
| 186245 |
+
"epoch": 545.76,
|
| 186246 |
+
"learning_rate": 8.922051282051283e-06,
|
| 186247 |
+
"loss": 0.7161,
|
| 186248 |
+
"step": 68290
|
| 186249 |
+
},
|
| 186250 |
+
{
|
| 186251 |
+
"epoch": 545.8,
|
| 186252 |
+
"learning_rate": 8.921971153846155e-06,
|
| 186253 |
+
"loss": 1.387,
|
| 186254 |
+
"step": 68295
|
| 186255 |
+
},
|
| 186256 |
+
{
|
| 186257 |
+
"epoch": 545.84,
|
| 186258 |
+
"learning_rate": 8.921891025641026e-06,
|
| 186259 |
+
"loss": 0.3497,
|
| 186260 |
+
"step": 68300
|
| 186261 |
+
},
|
| 186262 |
+
{
|
| 186263 |
+
"epoch": 545.88,
|
| 186264 |
+
"learning_rate": 8.921810897435899e-06,
|
| 186265 |
+
"loss": 0.6459,
|
| 186266 |
+
"step": 68305
|
| 186267 |
+
},
|
| 186268 |
+
{
|
| 186269 |
+
"epoch": 545.92,
|
| 186270 |
+
"learning_rate": 8.92173076923077e-06,
|
| 186271 |
+
"loss": 0.3734,
|
| 186272 |
+
"step": 68310
|
| 186273 |
+
},
|
| 186274 |
+
{
|
| 186275 |
+
"epoch": 545.96,
|
| 186276 |
+
"learning_rate": 8.921650641025642e-06,
|
| 186277 |
+
"loss": 0.5237,
|
| 186278 |
+
"step": 68315
|
| 186279 |
+
},
|
| 186280 |
+
{
|
| 186281 |
+
"epoch": 546.0,
|
| 186282 |
+
"learning_rate": 8.921570512820513e-06,
|
| 186283 |
+
"loss": 1.6727,
|
| 186284 |
+
"step": 68320
|
| 186285 |
+
},
|
| 186286 |
+
{
|
| 186287 |
+
"epoch": 546.0,
|
| 186288 |
+
"eval_loss": 0.47506964206695557,
|
| 186289 |
+
"eval_runtime": 41.5276,
|
| 186290 |
+
"eval_samples_per_second": 20.203,
|
| 186291 |
+
"eval_steps_per_second": 0.65,
|
| 186292 |
+
"eval_wer": 0.1952493381984689,
|
| 186293 |
+
"step": 68320
|
| 186294 |
+
},
|
| 186295 |
+
{
|
| 186296 |
+
"epoch": 550.04,
|
| 186297 |
+
"learning_rate": 8.921490384615386e-06,
|
| 186298 |
+
"loss": 0.4511,
|
| 186299 |
+
"step": 68325
|
| 186300 |
+
},
|
| 186301 |
+
{
|
| 186302 |
+
"epoch": 550.08,
|
| 186303 |
+
"learning_rate": 8.921410256410256e-06,
|
| 186304 |
+
"loss": 0.2694,
|
| 186305 |
+
"step": 68330
|
| 186306 |
+
},
|
| 186307 |
+
{
|
| 186308 |
+
"epoch": 550.12,
|
| 186309 |
+
"learning_rate": 8.921330128205129e-06,
|
| 186310 |
+
"loss": 0.3538,
|
| 186311 |
+
"step": 68335
|
| 186312 |
+
},
|
| 186313 |
+
{
|
| 186314 |
+
"epoch": 550.16,
|
| 186315 |
+
"learning_rate": 8.92125e-06,
|
| 186316 |
+
"loss": 0.449,
|
| 186317 |
+
"step": 68340
|
| 186318 |
+
},
|
| 186319 |
+
{
|
| 186320 |
+
"epoch": 550.2,
|
| 186321 |
+
"learning_rate": 8.921169871794872e-06,
|
| 186322 |
+
"loss": 1.2809,
|
| 186323 |
+
"step": 68345
|
| 186324 |
+
},
|
| 186325 |
+
{
|
| 186326 |
+
"epoch": 550.24,
|
| 186327 |
+
"learning_rate": 8.921089743589745e-06,
|
| 186328 |
+
"loss": 0.3631,
|
| 186329 |
+
"step": 68350
|
| 186330 |
+
},
|
| 186331 |
+
{
|
| 186332 |
+
"epoch": 550.28,
|
| 186333 |
+
"learning_rate": 8.921009615384616e-06,
|
| 186334 |
+
"loss": 0.3321,
|
| 186335 |
+
"step": 68355
|
| 186336 |
+
},
|
| 186337 |
+
{
|
| 186338 |
+
"epoch": 550.32,
|
| 186339 |
+
"learning_rate": 8.920929487179487e-06,
|
| 186340 |
+
"loss": 0.3193,
|
| 186341 |
+
"step": 68360
|
| 186342 |
+
},
|
| 186343 |
+
{
|
| 186344 |
+
"epoch": 550.36,
|
| 186345 |
+
"learning_rate": 8.920849358974359e-06,
|
| 186346 |
+
"loss": 0.4592,
|
| 186347 |
+
"step": 68365
|
| 186348 |
+
},
|
| 186349 |
+
{
|
| 186350 |
+
"epoch": 550.4,
|
| 186351 |
+
"learning_rate": 8.920769230769232e-06,
|
| 186352 |
+
"loss": 1.1757,
|
| 186353 |
+
"step": 68370
|
| 186354 |
+
},
|
| 186355 |
+
{
|
| 186356 |
+
"epoch": 550.44,
|
| 186357 |
+
"learning_rate": 8.920689102564103e-06,
|
| 186358 |
+
"loss": 0.3315,
|
| 186359 |
+
"step": 68375
|
| 186360 |
+
},
|
| 186361 |
+
{
|
| 186362 |
+
"epoch": 550.48,
|
| 186363 |
+
"learning_rate": 8.920608974358975e-06,
|
| 186364 |
+
"loss": 0.316,
|
| 186365 |
+
"step": 68380
|
| 186366 |
+
},
|
| 186367 |
+
{
|
| 186368 |
+
"epoch": 550.52,
|
| 186369 |
+
"learning_rate": 8.920528846153846e-06,
|
| 186370 |
+
"loss": 0.3592,
|
| 186371 |
+
"step": 68385
|
| 186372 |
+
},
|
| 186373 |
+
{
|
| 186374 |
+
"epoch": 550.56,
|
| 186375 |
+
"learning_rate": 8.920448717948719e-06,
|
| 186376 |
+
"loss": 0.5418,
|
| 186377 |
+
"step": 68390
|
| 186378 |
+
},
|
| 186379 |
+
{
|
| 186380 |
+
"epoch": 550.6,
|
| 186381 |
+
"learning_rate": 8.92036858974359e-06,
|
| 186382 |
+
"loss": 1.1373,
|
| 186383 |
+
"step": 68395
|
| 186384 |
+
},
|
| 186385 |
+
{
|
| 186386 |
+
"epoch": 550.64,
|
| 186387 |
+
"learning_rate": 8.920288461538462e-06,
|
| 186388 |
+
"loss": 0.4661,
|
| 186389 |
+
"step": 68400
|
| 186390 |
+
},
|
| 186391 |
+
{
|
| 186392 |
+
"epoch": 550.68,
|
| 186393 |
+
"learning_rate": 8.920208333333335e-06,
|
| 186394 |
+
"loss": 0.3396,
|
| 186395 |
+
"step": 68405
|
| 186396 |
+
},
|
| 186397 |
+
{
|
| 186398 |
+
"epoch": 550.72,
|
| 186399 |
+
"learning_rate": 8.920128205128206e-06,
|
| 186400 |
+
"loss": 0.3348,
|
| 186401 |
+
"step": 68410
|
| 186402 |
+
},
|
| 186403 |
+
{
|
| 186404 |
+
"epoch": 550.76,
|
| 186405 |
+
"learning_rate": 8.920048076923078e-06,
|
| 186406 |
+
"loss": 0.4885,
|
| 186407 |
+
"step": 68415
|
| 186408 |
+
},
|
| 186409 |
+
{
|
| 186410 |
+
"epoch": 550.8,
|
| 186411 |
+
"learning_rate": 8.919967948717949e-06,
|
| 186412 |
+
"loss": 1.2031,
|
| 186413 |
+
"step": 68420
|
| 186414 |
+
},
|
| 186415 |
+
{
|
| 186416 |
+
"epoch": 550.84,
|
| 186417 |
+
"learning_rate": 8.919887820512822e-06,
|
| 186418 |
+
"loss": 0.353,
|
| 186419 |
+
"step": 68425
|
| 186420 |
+
},
|
| 186421 |
+
{
|
| 186422 |
+
"epoch": 550.88,
|
| 186423 |
+
"learning_rate": 8.919807692307693e-06,
|
| 186424 |
+
"loss": 0.3036,
|
| 186425 |
+
"step": 68430
|
| 186426 |
+
},
|
| 186427 |
+
{
|
| 186428 |
+
"epoch": 550.92,
|
| 186429 |
+
"learning_rate": 8.919727564102565e-06,
|
| 186430 |
+
"loss": 0.3383,
|
| 186431 |
+
"step": 68435
|
| 186432 |
+
},
|
| 186433 |
+
{
|
| 186434 |
+
"epoch": 550.96,
|
| 186435 |
+
"learning_rate": 8.919647435897436e-06,
|
| 186436 |
+
"loss": 0.4424,
|
| 186437 |
+
"step": 68440
|
| 186438 |
+
},
|
| 186439 |
+
{
|
| 186440 |
+
"epoch": 551.0,
|
| 186441 |
+
"eval_loss": 0.3531026244163513,
|
| 186442 |
+
"eval_runtime": 41.0545,
|
| 186443 |
+
"eval_samples_per_second": 20.436,
|
| 186444 |
+
"eval_steps_per_second": 0.658,
|
| 186445 |
+
"eval_wer": 0.18525332578545145,
|
| 186446 |
+
"step": 68444
|
| 186447 |
}
|
| 186448 |
],
|
| 186449 |
"max_steps": 620000,
|
| 186450 |
"num_train_epochs": 5000,
|
| 186451 |
+
"total_flos": 1.9260183589460607e+20,
|
| 186452 |
"trial_name": null,
|
| 186453 |
"trial_params": null
|
| 186454 |
}
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68444}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d36abf3a2c5bf4d3050eeafe51b7abe8283b26d662aa895d1c4b2a29d7a740e5
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c93ff5fcd4e89802ee95abc960093c807c19054952e8b77db72016e9b0895371
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3184289183c73c4a5f506139f7799f696e37e4b880a814a87374ea84bfb05744
|
| 3 |
+
size 14503
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b48d35ca13554d6b818ee40b007b767588033ef3615fae55bbe7980615d8fa2
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eab293b67371458c5eedb98ca7c7368a43de31a3f787717c48d0bf8927ac86f1
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-67447 β checkpoint-68818}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -185178,11 +185178,1754 @@
|
|
| 185178 |
"eval_steps_per_second": 0.664,
|
| 185179 |
"eval_wer": 0.1855363713557883,
|
| 185180 |
"step": 67447
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185181 |
}
|
| 185182 |
],
|
| 185183 |
-
"max_steps":
|
| 185184 |
"num_train_epochs": 5000,
|
| 185185 |
-
"total_flos": 1.
|
| 185186 |
"trial_name": null,
|
| 185187 |
"trial_params": null
|
| 185188 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.18525332578545145,
|
| 3 |
+
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-68444",
|
| 4 |
+
"epoch": 549.9960159362549,
|
| 5 |
+
"global_step": 68818,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 185178 |
"eval_steps_per_second": 0.664,
|
| 185179 |
"eval_wer": 0.1855363713557883,
|
| 185180 |
"step": 67447
|
| 185181 |
+
},
|
| 185182 |
+
{
|
| 185183 |
+
"epoch": 539.02,
|
| 185184 |
+
"learning_rate": 8.935496794871795e-06,
|
| 185185 |
+
"loss": 0.3408,
|
| 185186 |
+
"step": 67450
|
| 185187 |
+
},
|
| 185188 |
+
{
|
| 185189 |
+
"epoch": 539.06,
|
| 185190 |
+
"learning_rate": 8.935416666666668e-06,
|
| 185191 |
+
"loss": 0.3909,
|
| 185192 |
+
"step": 67455
|
| 185193 |
+
},
|
| 185194 |
+
{
|
| 185195 |
+
"epoch": 539.1,
|
| 185196 |
+
"learning_rate": 8.935336538461539e-06,
|
| 185197 |
+
"loss": 0.3071,
|
| 185198 |
+
"step": 67460
|
| 185199 |
+
},
|
| 185200 |
+
{
|
| 185201 |
+
"epoch": 539.14,
|
| 185202 |
+
"learning_rate": 8.93525641025641e-06,
|
| 185203 |
+
"loss": 0.446,
|
| 185204 |
+
"step": 67465
|
| 185205 |
+
},
|
| 185206 |
+
{
|
| 185207 |
+
"epoch": 539.18,
|
| 185208 |
+
"learning_rate": 8.935176282051283e-06,
|
| 185209 |
+
"loss": 0.9182,
|
| 185210 |
+
"step": 67470
|
| 185211 |
+
},
|
| 185212 |
+
{
|
| 185213 |
+
"epoch": 539.22,
|
| 185214 |
+
"learning_rate": 8.935096153846155e-06,
|
| 185215 |
+
"loss": 0.8721,
|
| 185216 |
+
"step": 67475
|
| 185217 |
+
},
|
| 185218 |
+
{
|
| 185219 |
+
"epoch": 539.26,
|
| 185220 |
+
"learning_rate": 8.935016025641026e-06,
|
| 185221 |
+
"loss": 0.2998,
|
| 185222 |
+
"step": 67480
|
| 185223 |
+
},
|
| 185224 |
+
{
|
| 185225 |
+
"epoch": 539.3,
|
| 185226 |
+
"learning_rate": 8.934935897435897e-06,
|
| 185227 |
+
"loss": 0.3355,
|
| 185228 |
+
"step": 67485
|
| 185229 |
+
},
|
| 185230 |
+
{
|
| 185231 |
+
"epoch": 539.34,
|
| 185232 |
+
"learning_rate": 8.93485576923077e-06,
|
| 185233 |
+
"loss": 0.3517,
|
| 185234 |
+
"step": 67490
|
| 185235 |
+
},
|
| 185236 |
+
{
|
| 185237 |
+
"epoch": 539.38,
|
| 185238 |
+
"learning_rate": 8.934775641025642e-06,
|
| 185239 |
+
"loss": 0.7553,
|
| 185240 |
+
"step": 67495
|
| 185241 |
+
},
|
| 185242 |
+
{
|
| 185243 |
+
"epoch": 539.42,
|
| 185244 |
+
"learning_rate": 8.934695512820513e-06,
|
| 185245 |
+
"loss": 0.9665,
|
| 185246 |
+
"step": 67500
|
| 185247 |
+
},
|
| 185248 |
+
{
|
| 185249 |
+
"epoch": 539.46,
|
| 185250 |
+
"learning_rate": 8.934615384615385e-06,
|
| 185251 |
+
"loss": 0.3252,
|
| 185252 |
+
"step": 67505
|
| 185253 |
+
},
|
| 185254 |
+
{
|
| 185255 |
+
"epoch": 539.5,
|
| 185256 |
+
"learning_rate": 8.934535256410258e-06,
|
| 185257 |
+
"loss": 0.4006,
|
| 185258 |
+
"step": 67510
|
| 185259 |
+
},
|
| 185260 |
+
{
|
| 185261 |
+
"epoch": 539.54,
|
| 185262 |
+
"learning_rate": 8.934455128205129e-06,
|
| 185263 |
+
"loss": 0.414,
|
| 185264 |
+
"step": 67515
|
| 185265 |
+
},
|
| 185266 |
+
{
|
| 185267 |
+
"epoch": 539.58,
|
| 185268 |
+
"learning_rate": 8.934375e-06,
|
| 185269 |
+
"loss": 0.7396,
|
| 185270 |
+
"step": 67520
|
| 185271 |
+
},
|
| 185272 |
+
{
|
| 185273 |
+
"epoch": 539.62,
|
| 185274 |
+
"learning_rate": 8.934294871794873e-06,
|
| 185275 |
+
"loss": 1.0143,
|
| 185276 |
+
"step": 67525
|
| 185277 |
+
},
|
| 185278 |
+
{
|
| 185279 |
+
"epoch": 539.66,
|
| 185280 |
+
"learning_rate": 8.934214743589745e-06,
|
| 185281 |
+
"loss": 0.2893,
|
| 185282 |
+
"step": 67530
|
| 185283 |
+
},
|
| 185284 |
+
{
|
| 185285 |
+
"epoch": 539.7,
|
| 185286 |
+
"learning_rate": 8.934134615384616e-06,
|
| 185287 |
+
"loss": 0.2996,
|
| 185288 |
+
"step": 67535
|
| 185289 |
+
},
|
| 185290 |
+
{
|
| 185291 |
+
"epoch": 539.74,
|
| 185292 |
+
"learning_rate": 8.934054487179487e-06,
|
| 185293 |
+
"loss": 0.4244,
|
| 185294 |
+
"step": 67540
|
| 185295 |
+
},
|
| 185296 |
+
{
|
| 185297 |
+
"epoch": 539.78,
|
| 185298 |
+
"learning_rate": 8.93397435897436e-06,
|
| 185299 |
+
"loss": 0.83,
|
| 185300 |
+
"step": 67545
|
| 185301 |
+
},
|
| 185302 |
+
{
|
| 185303 |
+
"epoch": 539.82,
|
| 185304 |
+
"learning_rate": 8.933894230769232e-06,
|
| 185305 |
+
"loss": 1.0249,
|
| 185306 |
+
"step": 67550
|
| 185307 |
+
},
|
| 185308 |
+
{
|
| 185309 |
+
"epoch": 539.86,
|
| 185310 |
+
"learning_rate": 8.933814102564103e-06,
|
| 185311 |
+
"loss": 0.3636,
|
| 185312 |
+
"step": 67555
|
| 185313 |
+
},
|
| 185314 |
+
{
|
| 185315 |
+
"epoch": 539.9,
|
| 185316 |
+
"learning_rate": 8.933733974358975e-06,
|
| 185317 |
+
"loss": 0.3307,
|
| 185318 |
+
"step": 67560
|
| 185319 |
+
},
|
| 185320 |
+
{
|
| 185321 |
+
"epoch": 539.94,
|
| 185322 |
+
"learning_rate": 8.933653846153848e-06,
|
| 185323 |
+
"loss": 0.3467,
|
| 185324 |
+
"step": 67565
|
| 185325 |
+
},
|
| 185326 |
+
{
|
| 185327 |
+
"epoch": 539.98,
|
| 185328 |
+
"learning_rate": 8.933573717948719e-06,
|
| 185329 |
+
"loss": 0.8957,
|
| 185330 |
+
"step": 67570
|
| 185331 |
+
},
|
| 185332 |
+
{
|
| 185333 |
+
"epoch": 540.0,
|
| 185334 |
+
"eval_loss": 0.49278613924980164,
|
| 185335 |
+
"eval_runtime": 38.3149,
|
| 185336 |
+
"eval_samples_per_second": 21.898,
|
| 185337 |
+
"eval_steps_per_second": 0.705,
|
| 185338 |
+
"eval_wer": 0.192304851931172,
|
| 185339 |
+
"step": 67572
|
| 185340 |
+
},
|
| 185341 |
+
{
|
| 185342 |
+
"epoch": 540.02,
|
| 185343 |
+
"learning_rate": 8.93349358974359e-06,
|
| 185344 |
+
"loss": 0.3757,
|
| 185345 |
+
"step": 67575
|
| 185346 |
+
},
|
| 185347 |
+
{
|
| 185348 |
+
"epoch": 540.06,
|
| 185349 |
+
"learning_rate": 8.933413461538463e-06,
|
| 185350 |
+
"loss": 0.3056,
|
| 185351 |
+
"step": 67580
|
| 185352 |
+
},
|
| 185353 |
+
{
|
| 185354 |
+
"epoch": 540.1,
|
| 185355 |
+
"learning_rate": 8.933333333333333e-06,
|
| 185356 |
+
"loss": 0.3568,
|
| 185357 |
+
"step": 67585
|
| 185358 |
+
},
|
| 185359 |
+
{
|
| 185360 |
+
"epoch": 540.14,
|
| 185361 |
+
"learning_rate": 8.933253205128206e-06,
|
| 185362 |
+
"loss": 0.4253,
|
| 185363 |
+
"step": 67590
|
| 185364 |
+
},
|
| 185365 |
+
{
|
| 185366 |
+
"epoch": 540.18,
|
| 185367 |
+
"learning_rate": 8.933173076923077e-06,
|
| 185368 |
+
"loss": 0.9211,
|
| 185369 |
+
"step": 67595
|
| 185370 |
+
},
|
| 185371 |
+
{
|
| 185372 |
+
"epoch": 540.22,
|
| 185373 |
+
"learning_rate": 8.933092948717949e-06,
|
| 185374 |
+
"loss": 1.0014,
|
| 185375 |
+
"step": 67600
|
| 185376 |
+
},
|
| 185377 |
+
{
|
| 185378 |
+
"epoch": 540.26,
|
| 185379 |
+
"learning_rate": 8.93301282051282e-06,
|
| 185380 |
+
"loss": 0.5676,
|
| 185381 |
+
"step": 67605
|
| 185382 |
+
},
|
| 185383 |
+
{
|
| 185384 |
+
"epoch": 540.3,
|
| 185385 |
+
"learning_rate": 8.932932692307693e-06,
|
| 185386 |
+
"loss": 0.3648,
|
| 185387 |
+
"step": 67610
|
| 185388 |
+
},
|
| 185389 |
+
{
|
| 185390 |
+
"epoch": 540.34,
|
| 185391 |
+
"learning_rate": 8.932852564102565e-06,
|
| 185392 |
+
"loss": 0.4003,
|
| 185393 |
+
"step": 67615
|
| 185394 |
+
},
|
| 185395 |
+
{
|
| 185396 |
+
"epoch": 540.38,
|
| 185397 |
+
"learning_rate": 8.932772435897436e-06,
|
| 185398 |
+
"loss": 0.8113,
|
| 185399 |
+
"step": 67620
|
| 185400 |
+
},
|
| 185401 |
+
{
|
| 185402 |
+
"epoch": 540.42,
|
| 185403 |
+
"learning_rate": 8.932692307692309e-06,
|
| 185404 |
+
"loss": 1.0352,
|
| 185405 |
+
"step": 67625
|
| 185406 |
+
},
|
| 185407 |
+
{
|
| 185408 |
+
"epoch": 540.46,
|
| 185409 |
+
"learning_rate": 8.93261217948718e-06,
|
| 185410 |
+
"loss": 0.3178,
|
| 185411 |
+
"step": 67630
|
| 185412 |
+
},
|
| 185413 |
+
{
|
| 185414 |
+
"epoch": 540.5,
|
| 185415 |
+
"learning_rate": 8.932532051282052e-06,
|
| 185416 |
+
"loss": 0.5683,
|
| 185417 |
+
"step": 67635
|
| 185418 |
+
},
|
| 185419 |
+
{
|
| 185420 |
+
"epoch": 540.54,
|
| 185421 |
+
"learning_rate": 8.932451923076923e-06,
|
| 185422 |
+
"loss": 0.3428,
|
| 185423 |
+
"step": 67640
|
| 185424 |
+
},
|
| 185425 |
+
{
|
| 185426 |
+
"epoch": 540.58,
|
| 185427 |
+
"learning_rate": 8.932371794871796e-06,
|
| 185428 |
+
"loss": 0.7413,
|
| 185429 |
+
"step": 67645
|
| 185430 |
+
},
|
| 185431 |
+
{
|
| 185432 |
+
"epoch": 540.62,
|
| 185433 |
+
"learning_rate": 8.932291666666668e-06,
|
| 185434 |
+
"loss": 0.8567,
|
| 185435 |
+
"step": 67650
|
| 185436 |
+
},
|
| 185437 |
+
{
|
| 185438 |
+
"epoch": 540.66,
|
| 185439 |
+
"learning_rate": 8.932211538461539e-06,
|
| 185440 |
+
"loss": 0.314,
|
| 185441 |
+
"step": 67655
|
| 185442 |
+
},
|
| 185443 |
+
{
|
| 185444 |
+
"epoch": 540.7,
|
| 185445 |
+
"learning_rate": 8.93213141025641e-06,
|
| 185446 |
+
"loss": 0.2992,
|
| 185447 |
+
"step": 67660
|
| 185448 |
+
},
|
| 185449 |
+
{
|
| 185450 |
+
"epoch": 540.74,
|
| 185451 |
+
"learning_rate": 8.932051282051283e-06,
|
| 185452 |
+
"loss": 0.3365,
|
| 185453 |
+
"step": 67665
|
| 185454 |
+
},
|
| 185455 |
+
{
|
| 185456 |
+
"epoch": 540.78,
|
| 185457 |
+
"learning_rate": 8.931971153846155e-06,
|
| 185458 |
+
"loss": 0.7477,
|
| 185459 |
+
"step": 67670
|
| 185460 |
+
},
|
| 185461 |
+
{
|
| 185462 |
+
"epoch": 540.82,
|
| 185463 |
+
"learning_rate": 8.931891025641026e-06,
|
| 185464 |
+
"loss": 0.9346,
|
| 185465 |
+
"step": 67675
|
| 185466 |
+
},
|
| 185467 |
+
{
|
| 185468 |
+
"epoch": 540.86,
|
| 185469 |
+
"learning_rate": 8.931810897435899e-06,
|
| 185470 |
+
"loss": 0.3394,
|
| 185471 |
+
"step": 67680
|
| 185472 |
+
},
|
| 185473 |
+
{
|
| 185474 |
+
"epoch": 540.9,
|
| 185475 |
+
"learning_rate": 8.93173076923077e-06,
|
| 185476 |
+
"loss": 0.2948,
|
| 185477 |
+
"step": 67685
|
| 185478 |
+
},
|
| 185479 |
+
{
|
| 185480 |
+
"epoch": 540.94,
|
| 185481 |
+
"learning_rate": 8.931650641025642e-06,
|
| 185482 |
+
"loss": 0.4479,
|
| 185483 |
+
"step": 67690
|
| 185484 |
+
},
|
| 185485 |
+
{
|
| 185486 |
+
"epoch": 540.98,
|
| 185487 |
+
"learning_rate": 8.931570512820513e-06,
|
| 185488 |
+
"loss": 0.7604,
|
| 185489 |
+
"step": 67695
|
| 185490 |
+
},
|
| 185491 |
+
{
|
| 185492 |
+
"epoch": 541.0,
|
| 185493 |
+
"eval_loss": 0.44643455743789673,
|
| 185494 |
+
"eval_runtime": 38.3882,
|
| 185495 |
+
"eval_samples_per_second": 21.856,
|
| 185496 |
+
"eval_steps_per_second": 0.703,
|
| 185497 |
+
"eval_wer": 0.1991250455705432,
|
| 185498 |
+
"step": 67697
|
| 185499 |
+
},
|
| 185500 |
+
{
|
| 185501 |
+
"epoch": 541.02,
|
| 185502 |
+
"learning_rate": 8.931490384615386e-06,
|
| 185503 |
+
"loss": 0.3752,
|
| 185504 |
+
"step": 67700
|
| 185505 |
+
},
|
| 185506 |
+
{
|
| 185507 |
+
"epoch": 541.06,
|
| 185508 |
+
"learning_rate": 8.931410256410258e-06,
|
| 185509 |
+
"loss": 0.2947,
|
| 185510 |
+
"step": 67705
|
| 185511 |
+
},
|
| 185512 |
+
{
|
| 185513 |
+
"epoch": 541.1,
|
| 185514 |
+
"learning_rate": 8.931330128205129e-06,
|
| 185515 |
+
"loss": 0.3437,
|
| 185516 |
+
"step": 67710
|
| 185517 |
+
},
|
| 185518 |
+
{
|
| 185519 |
+
"epoch": 541.14,
|
| 185520 |
+
"learning_rate": 8.93125e-06,
|
| 185521 |
+
"loss": 0.3961,
|
| 185522 |
+
"step": 67715
|
| 185523 |
+
},
|
| 185524 |
+
{
|
| 185525 |
+
"epoch": 541.18,
|
| 185526 |
+
"learning_rate": 8.931169871794873e-06,
|
| 185527 |
+
"loss": 0.7735,
|
| 185528 |
+
"step": 67720
|
| 185529 |
+
},
|
| 185530 |
+
{
|
| 185531 |
+
"epoch": 541.22,
|
| 185532 |
+
"learning_rate": 8.931089743589745e-06,
|
| 185533 |
+
"loss": 1.0214,
|
| 185534 |
+
"step": 67725
|
| 185535 |
+
},
|
| 185536 |
+
{
|
| 185537 |
+
"epoch": 541.26,
|
| 185538 |
+
"learning_rate": 8.931009615384616e-06,
|
| 185539 |
+
"loss": 0.2891,
|
| 185540 |
+
"step": 67730
|
| 185541 |
+
},
|
| 185542 |
+
{
|
| 185543 |
+
"epoch": 541.3,
|
| 185544 |
+
"learning_rate": 8.930929487179489e-06,
|
| 185545 |
+
"loss": 0.2835,
|
| 185546 |
+
"step": 67735
|
| 185547 |
+
},
|
| 185548 |
+
{
|
| 185549 |
+
"epoch": 541.34,
|
| 185550 |
+
"learning_rate": 8.930849358974359e-06,
|
| 185551 |
+
"loss": 0.3882,
|
| 185552 |
+
"step": 67740
|
| 185553 |
+
},
|
| 185554 |
+
{
|
| 185555 |
+
"epoch": 541.38,
|
| 185556 |
+
"learning_rate": 8.930769230769232e-06,
|
| 185557 |
+
"loss": 0.769,
|
| 185558 |
+
"step": 67745
|
| 185559 |
+
},
|
| 185560 |
+
{
|
| 185561 |
+
"epoch": 541.42,
|
| 185562 |
+
"learning_rate": 8.930689102564103e-06,
|
| 185563 |
+
"loss": 0.7533,
|
| 185564 |
+
"step": 67750
|
| 185565 |
+
},
|
| 185566 |
+
{
|
| 185567 |
+
"epoch": 541.46,
|
| 185568 |
+
"learning_rate": 8.930608974358975e-06,
|
| 185569 |
+
"loss": 0.2833,
|
| 185570 |
+
"step": 67755
|
| 185571 |
+
},
|
| 185572 |
+
{
|
| 185573 |
+
"epoch": 541.5,
|
| 185574 |
+
"learning_rate": 8.930528846153846e-06,
|
| 185575 |
+
"loss": 0.2899,
|
| 185576 |
+
"step": 67760
|
| 185577 |
+
},
|
| 185578 |
+
{
|
| 185579 |
+
"epoch": 541.54,
|
| 185580 |
+
"learning_rate": 8.930448717948719e-06,
|
| 185581 |
+
"loss": 0.4753,
|
| 185582 |
+
"step": 67765
|
| 185583 |
+
},
|
| 185584 |
+
{
|
| 185585 |
+
"epoch": 541.58,
|
| 185586 |
+
"learning_rate": 8.93036858974359e-06,
|
| 185587 |
+
"loss": 0.7755,
|
| 185588 |
+
"step": 67770
|
| 185589 |
+
},
|
| 185590 |
+
{
|
| 185591 |
+
"epoch": 541.62,
|
| 185592 |
+
"learning_rate": 8.930288461538462e-06,
|
| 185593 |
+
"loss": 0.8648,
|
| 185594 |
+
"step": 67775
|
| 185595 |
+
},
|
| 185596 |
+
{
|
| 185597 |
+
"epoch": 541.66,
|
| 185598 |
+
"learning_rate": 8.930208333333335e-06,
|
| 185599 |
+
"loss": 0.2927,
|
| 185600 |
+
"step": 67780
|
| 185601 |
+
},
|
| 185602 |
+
{
|
| 185603 |
+
"epoch": 541.7,
|
| 185604 |
+
"learning_rate": 8.930128205128206e-06,
|
| 185605 |
+
"loss": 0.3099,
|
| 185606 |
+
"step": 67785
|
| 185607 |
+
},
|
| 185608 |
+
{
|
| 185609 |
+
"epoch": 541.74,
|
| 185610 |
+
"learning_rate": 8.930048076923077e-06,
|
| 185611 |
+
"loss": 0.3615,
|
| 185612 |
+
"step": 67790
|
| 185613 |
+
},
|
| 185614 |
+
{
|
| 185615 |
+
"epoch": 541.78,
|
| 185616 |
+
"learning_rate": 8.929967948717949e-06,
|
| 185617 |
+
"loss": 0.8285,
|
| 185618 |
+
"step": 67795
|
| 185619 |
+
},
|
| 185620 |
+
{
|
| 185621 |
+
"epoch": 541.82,
|
| 185622 |
+
"learning_rate": 8.929887820512822e-06,
|
| 185623 |
+
"loss": 0.8921,
|
| 185624 |
+
"step": 67800
|
| 185625 |
+
},
|
| 185626 |
+
{
|
| 185627 |
+
"epoch": 541.86,
|
| 185628 |
+
"learning_rate": 8.929807692307693e-06,
|
| 185629 |
+
"loss": 0.3251,
|
| 185630 |
+
"step": 67805
|
| 185631 |
+
},
|
| 185632 |
+
{
|
| 185633 |
+
"epoch": 541.9,
|
| 185634 |
+
"learning_rate": 8.929727564102565e-06,
|
| 185635 |
+
"loss": 0.338,
|
| 185636 |
+
"step": 67810
|
| 185637 |
+
},
|
| 185638 |
+
{
|
| 185639 |
+
"epoch": 541.94,
|
| 185640 |
+
"learning_rate": 8.929647435897436e-06,
|
| 185641 |
+
"loss": 0.408,
|
| 185642 |
+
"step": 67815
|
| 185643 |
+
},
|
| 185644 |
+
{
|
| 185645 |
+
"epoch": 541.98,
|
| 185646 |
+
"learning_rate": 8.929567307692309e-06,
|
| 185647 |
+
"loss": 0.7369,
|
| 185648 |
+
"step": 67820
|
| 185649 |
+
},
|
| 185650 |
+
{
|
| 185651 |
+
"epoch": 542.0,
|
| 185652 |
+
"eval_loss": 0.3617897629737854,
|
| 185653 |
+
"eval_runtime": 39.0831,
|
| 185654 |
+
"eval_samples_per_second": 21.441,
|
| 185655 |
+
"eval_steps_per_second": 0.691,
|
| 185656 |
+
"eval_wer": 0.19498813760379596,
|
| 185657 |
+
"step": 67822
|
| 185658 |
+
},
|
| 185659 |
+
{
|
| 185660 |
+
"epoch": 542.02,
|
| 185661 |
+
"learning_rate": 8.92948717948718e-06,
|
| 185662 |
+
"loss": 0.482,
|
| 185663 |
+
"step": 67825
|
| 185664 |
+
},
|
| 185665 |
+
{
|
| 185666 |
+
"epoch": 542.06,
|
| 185667 |
+
"learning_rate": 8.929407051282052e-06,
|
| 185668 |
+
"loss": 0.3239,
|
| 185669 |
+
"step": 67830
|
| 185670 |
+
},
|
| 185671 |
+
{
|
| 185672 |
+
"epoch": 542.1,
|
| 185673 |
+
"learning_rate": 8.929326923076925e-06,
|
| 185674 |
+
"loss": 0.2679,
|
| 185675 |
+
"step": 67835
|
| 185676 |
+
},
|
| 185677 |
+
{
|
| 185678 |
+
"epoch": 542.14,
|
| 185679 |
+
"learning_rate": 8.929246794871796e-06,
|
| 185680 |
+
"loss": 0.3645,
|
| 185681 |
+
"step": 67840
|
| 185682 |
+
},
|
| 185683 |
+
{
|
| 185684 |
+
"epoch": 542.18,
|
| 185685 |
+
"learning_rate": 8.929166666666667e-06,
|
| 185686 |
+
"loss": 0.8296,
|
| 185687 |
+
"step": 67845
|
| 185688 |
+
},
|
| 185689 |
+
{
|
| 185690 |
+
"epoch": 542.22,
|
| 185691 |
+
"learning_rate": 8.929086538461539e-06,
|
| 185692 |
+
"loss": 0.8821,
|
| 185693 |
+
"step": 67850
|
| 185694 |
+
},
|
| 185695 |
+
{
|
| 185696 |
+
"epoch": 542.26,
|
| 185697 |
+
"learning_rate": 8.929006410256412e-06,
|
| 185698 |
+
"loss": 0.3484,
|
| 185699 |
+
"step": 67855
|
| 185700 |
+
},
|
| 185701 |
+
{
|
| 185702 |
+
"epoch": 542.3,
|
| 185703 |
+
"learning_rate": 8.928926282051282e-06,
|
| 185704 |
+
"loss": 0.4008,
|
| 185705 |
+
"step": 67860
|
| 185706 |
+
},
|
| 185707 |
+
{
|
| 185708 |
+
"epoch": 542.34,
|
| 185709 |
+
"learning_rate": 8.928846153846155e-06,
|
| 185710 |
+
"loss": 0.3902,
|
| 185711 |
+
"step": 67865
|
| 185712 |
+
},
|
| 185713 |
+
{
|
| 185714 |
+
"epoch": 542.38,
|
| 185715 |
+
"learning_rate": 8.928766025641028e-06,
|
| 185716 |
+
"loss": 0.7359,
|
| 185717 |
+
"step": 67870
|
| 185718 |
+
},
|
| 185719 |
+
{
|
| 185720 |
+
"epoch": 542.42,
|
| 185721 |
+
"learning_rate": 8.928685897435897e-06,
|
| 185722 |
+
"loss": 1.0072,
|
| 185723 |
+
"step": 67875
|
| 185724 |
+
},
|
| 185725 |
+
{
|
| 185726 |
+
"epoch": 542.46,
|
| 185727 |
+
"learning_rate": 8.92860576923077e-06,
|
| 185728 |
+
"loss": 0.2912,
|
| 185729 |
+
"step": 67880
|
| 185730 |
+
},
|
| 185731 |
+
{
|
| 185732 |
+
"epoch": 542.5,
|
| 185733 |
+
"learning_rate": 8.928525641025642e-06,
|
| 185734 |
+
"loss": 0.3905,
|
| 185735 |
+
"step": 67885
|
| 185736 |
+
},
|
| 185737 |
+
{
|
| 185738 |
+
"epoch": 542.54,
|
| 185739 |
+
"learning_rate": 8.928445512820513e-06,
|
| 185740 |
+
"loss": 0.381,
|
| 185741 |
+
"step": 67890
|
| 185742 |
+
},
|
| 185743 |
+
{
|
| 185744 |
+
"epoch": 542.58,
|
| 185745 |
+
"learning_rate": 8.928365384615384e-06,
|
| 185746 |
+
"loss": 0.7648,
|
| 185747 |
+
"step": 67895
|
| 185748 |
+
},
|
| 185749 |
+
{
|
| 185750 |
+
"epoch": 542.62,
|
| 185751 |
+
"learning_rate": 8.928285256410257e-06,
|
| 185752 |
+
"loss": 0.8711,
|
| 185753 |
+
"step": 67900
|
| 185754 |
+
},
|
| 185755 |
+
{
|
| 185756 |
+
"epoch": 542.66,
|
| 185757 |
+
"learning_rate": 8.928205128205129e-06,
|
| 185758 |
+
"loss": 0.2766,
|
| 185759 |
+
"step": 67905
|
| 185760 |
+
},
|
| 185761 |
+
{
|
| 185762 |
+
"epoch": 542.7,
|
| 185763 |
+
"learning_rate": 8.928125e-06,
|
| 185764 |
+
"loss": 0.3258,
|
| 185765 |
+
"step": 67910
|
| 185766 |
+
},
|
| 185767 |
+
{
|
| 185768 |
+
"epoch": 542.74,
|
| 185769 |
+
"learning_rate": 8.928044871794872e-06,
|
| 185770 |
+
"loss": 0.3602,
|
| 185771 |
+
"step": 67915
|
| 185772 |
+
},
|
| 185773 |
+
{
|
| 185774 |
+
"epoch": 542.78,
|
| 185775 |
+
"learning_rate": 8.927964743589745e-06,
|
| 185776 |
+
"loss": 0.7964,
|
| 185777 |
+
"step": 67920
|
| 185778 |
+
},
|
| 185779 |
+
{
|
| 185780 |
+
"epoch": 542.82,
|
| 185781 |
+
"learning_rate": 8.927884615384616e-06,
|
| 185782 |
+
"loss": 1.025,
|
| 185783 |
+
"step": 67925
|
| 185784 |
+
},
|
| 185785 |
+
{
|
| 185786 |
+
"epoch": 542.86,
|
| 185787 |
+
"learning_rate": 8.927804487179487e-06,
|
| 185788 |
+
"loss": 0.2855,
|
| 185789 |
+
"step": 67930
|
| 185790 |
+
},
|
| 185791 |
+
{
|
| 185792 |
+
"epoch": 542.9,
|
| 185793 |
+
"learning_rate": 8.92772435897436e-06,
|
| 185794 |
+
"loss": 0.3043,
|
| 185795 |
+
"step": 67935
|
| 185796 |
+
},
|
| 185797 |
+
{
|
| 185798 |
+
"epoch": 542.94,
|
| 185799 |
+
"learning_rate": 8.927644230769232e-06,
|
| 185800 |
+
"loss": 0.3756,
|
| 185801 |
+
"step": 67940
|
| 185802 |
+
},
|
| 185803 |
+
{
|
| 185804 |
+
"epoch": 542.98,
|
| 185805 |
+
"learning_rate": 8.927564102564103e-06,
|
| 185806 |
+
"loss": 0.8851,
|
| 185807 |
+
"step": 67945
|
| 185808 |
+
},
|
| 185809 |
+
{
|
| 185810 |
+
"epoch": 543.0,
|
| 185811 |
+
"eval_loss": 0.3958238661289215,
|
| 185812 |
+
"eval_runtime": 39.4545,
|
| 185813 |
+
"eval_samples_per_second": 21.24,
|
| 185814 |
+
"eval_steps_per_second": 0.684,
|
| 185815 |
+
"eval_wer": 0.19044464075382803,
|
| 185816 |
+
"step": 67947
|
| 185817 |
+
},
|
| 185818 |
+
{
|
| 185819 |
+
"epoch": 547.02,
|
| 185820 |
+
"learning_rate": 8.927483974358974e-06,
|
| 185821 |
+
"loss": 0.3812,
|
| 185822 |
+
"step": 67950
|
| 185823 |
+
},
|
| 185824 |
+
{
|
| 185825 |
+
"epoch": 547.06,
|
| 185826 |
+
"learning_rate": 8.927403846153847e-06,
|
| 185827 |
+
"loss": 0.3034,
|
| 185828 |
+
"step": 67955
|
| 185829 |
+
},
|
| 185830 |
+
{
|
| 185831 |
+
"epoch": 547.1,
|
| 185832 |
+
"learning_rate": 8.927323717948719e-06,
|
| 185833 |
+
"loss": 0.3513,
|
| 185834 |
+
"step": 67960
|
| 185835 |
+
},
|
| 185836 |
+
{
|
| 185837 |
+
"epoch": 547.14,
|
| 185838 |
+
"learning_rate": 8.92724358974359e-06,
|
| 185839 |
+
"loss": 0.4248,
|
| 185840 |
+
"step": 67965
|
| 185841 |
+
},
|
| 185842 |
+
{
|
| 185843 |
+
"epoch": 547.18,
|
| 185844 |
+
"learning_rate": 8.927163461538463e-06,
|
| 185845 |
+
"loss": 0.579,
|
| 185846 |
+
"step": 67970
|
| 185847 |
+
},
|
| 185848 |
+
{
|
| 185849 |
+
"epoch": 547.22,
|
| 185850 |
+
"learning_rate": 8.927083333333335e-06,
|
| 185851 |
+
"loss": 0.9471,
|
| 185852 |
+
"step": 67975
|
| 185853 |
+
},
|
| 185854 |
+
{
|
| 185855 |
+
"epoch": 547.27,
|
| 185856 |
+
"learning_rate": 8.927003205128206e-06,
|
| 185857 |
+
"loss": 0.3662,
|
| 185858 |
+
"step": 67980
|
| 185859 |
+
},
|
| 185860 |
+
{
|
| 185861 |
+
"epoch": 547.31,
|
| 185862 |
+
"learning_rate": 8.926923076923077e-06,
|
| 185863 |
+
"loss": 0.3769,
|
| 185864 |
+
"step": 67985
|
| 185865 |
+
},
|
| 185866 |
+
{
|
| 185867 |
+
"epoch": 547.35,
|
| 185868 |
+
"learning_rate": 8.92684294871795e-06,
|
| 185869 |
+
"loss": 0.4178,
|
| 185870 |
+
"step": 67990
|
| 185871 |
+
},
|
| 185872 |
+
{
|
| 185873 |
+
"epoch": 547.39,
|
| 185874 |
+
"learning_rate": 8.926762820512822e-06,
|
| 185875 |
+
"loss": 0.703,
|
| 185876 |
+
"step": 67995
|
| 185877 |
+
},
|
| 185878 |
+
{
|
| 185879 |
+
"epoch": 547.43,
|
| 185880 |
+
"learning_rate": 8.926682692307693e-06,
|
| 185881 |
+
"loss": 1.0705,
|
| 185882 |
+
"step": 68000
|
| 185883 |
+
},
|
| 185884 |
+
{
|
| 185885 |
+
"epoch": 547.47,
|
| 185886 |
+
"learning_rate": 8.926602564102564e-06,
|
| 185887 |
+
"loss": 0.3506,
|
| 185888 |
+
"step": 68005
|
| 185889 |
+
},
|
| 185890 |
+
{
|
| 185891 |
+
"epoch": 547.51,
|
| 185892 |
+
"learning_rate": 8.926522435897438e-06,
|
| 185893 |
+
"loss": 0.3535,
|
| 185894 |
+
"step": 68010
|
| 185895 |
+
},
|
| 185896 |
+
{
|
| 185897 |
+
"epoch": 547.55,
|
| 185898 |
+
"learning_rate": 8.926442307692307e-06,
|
| 185899 |
+
"loss": 0.3949,
|
| 185900 |
+
"step": 68015
|
| 185901 |
+
},
|
| 185902 |
+
{
|
| 185903 |
+
"epoch": 547.59,
|
| 185904 |
+
"learning_rate": 8.92636217948718e-06,
|
| 185905 |
+
"loss": 0.7904,
|
| 185906 |
+
"step": 68020
|
| 185907 |
+
},
|
| 185908 |
+
{
|
| 185909 |
+
"epoch": 547.63,
|
| 185910 |
+
"learning_rate": 8.926282051282053e-06,
|
| 185911 |
+
"loss": 0.9079,
|
| 185912 |
+
"step": 68025
|
| 185913 |
+
},
|
| 185914 |
+
{
|
| 185915 |
+
"epoch": 547.67,
|
| 185916 |
+
"learning_rate": 8.926201923076923e-06,
|
| 185917 |
+
"loss": 0.4741,
|
| 185918 |
+
"step": 68030
|
| 185919 |
+
},
|
| 185920 |
+
{
|
| 185921 |
+
"epoch": 547.71,
|
| 185922 |
+
"learning_rate": 8.926121794871796e-06,
|
| 185923 |
+
"loss": 0.3193,
|
| 185924 |
+
"step": 68035
|
| 185925 |
+
},
|
| 185926 |
+
{
|
| 185927 |
+
"epoch": 547.75,
|
| 185928 |
+
"learning_rate": 8.926041666666667e-06,
|
| 185929 |
+
"loss": 0.3734,
|
| 185930 |
+
"step": 68040
|
| 185931 |
+
},
|
| 185932 |
+
{
|
| 185933 |
+
"epoch": 547.79,
|
| 185934 |
+
"learning_rate": 8.925961538461539e-06,
|
| 185935 |
+
"loss": 0.7489,
|
| 185936 |
+
"step": 68045
|
| 185937 |
+
},
|
| 185938 |
+
{
|
| 185939 |
+
"epoch": 547.83,
|
| 185940 |
+
"learning_rate": 8.92588141025641e-06,
|
| 185941 |
+
"loss": 1.1998,
|
| 185942 |
+
"step": 68050
|
| 185943 |
+
},
|
| 185944 |
+
{
|
| 185945 |
+
"epoch": 547.87,
|
| 185946 |
+
"learning_rate": 8.925801282051283e-06,
|
| 185947 |
+
"loss": 0.321,
|
| 185948 |
+
"step": 68055
|
| 185949 |
+
},
|
| 185950 |
+
{
|
| 185951 |
+
"epoch": 547.91,
|
| 185952 |
+
"learning_rate": 8.925721153846154e-06,
|
| 185953 |
+
"loss": 0.3205,
|
| 185954 |
+
"step": 68060
|
| 185955 |
+
},
|
| 185956 |
+
{
|
| 185957 |
+
"epoch": 547.95,
|
| 185958 |
+
"learning_rate": 8.925641025641026e-06,
|
| 185959 |
+
"loss": 0.4711,
|
| 185960 |
+
"step": 68065
|
| 185961 |
+
},
|
| 185962 |
+
{
|
| 185963 |
+
"epoch": 547.99,
|
| 185964 |
+
"learning_rate": 8.925560897435899e-06,
|
| 185965 |
+
"loss": 0.897,
|
| 185966 |
+
"step": 68070
|
| 185967 |
+
},
|
| 185968 |
+
{
|
| 185969 |
+
"epoch": 548.0,
|
| 185970 |
+
"eval_loss": 0.3655170798301697,
|
| 185971 |
+
"eval_runtime": 38.9264,
|
| 185972 |
+
"eval_samples_per_second": 21.553,
|
| 185973 |
+
"eval_steps_per_second": 0.694,
|
| 185974 |
+
"eval_wer": 0.19745080500894455,
|
| 185975 |
+
"step": 68071
|
| 185976 |
+
},
|
| 185977 |
+
{
|
| 185978 |
+
"epoch": 548.03,
|
| 185979 |
+
"learning_rate": 8.92548076923077e-06,
|
| 185980 |
+
"loss": 0.3636,
|
| 185981 |
+
"step": 68075
|
| 185982 |
+
},
|
| 185983 |
+
{
|
| 185984 |
+
"epoch": 548.07,
|
| 185985 |
+
"learning_rate": 8.925400641025642e-06,
|
| 185986 |
+
"loss": 0.3824,
|
| 185987 |
+
"step": 68080
|
| 185988 |
+
},
|
| 185989 |
+
{
|
| 185990 |
+
"epoch": 548.11,
|
| 185991 |
+
"learning_rate": 8.925320512820513e-06,
|
| 185992 |
+
"loss": 0.3019,
|
| 185993 |
+
"step": 68085
|
| 185994 |
+
},
|
| 185995 |
+
{
|
| 185996 |
+
"epoch": 548.15,
|
| 185997 |
+
"learning_rate": 8.925240384615386e-06,
|
| 185998 |
+
"loss": 0.3797,
|
| 185999 |
+
"step": 68090
|
| 186000 |
+
},
|
| 186001 |
+
{
|
| 186002 |
+
"epoch": 548.19,
|
| 186003 |
+
"learning_rate": 8.925160256410257e-06,
|
| 186004 |
+
"loss": 0.9859,
|
| 186005 |
+
"step": 68095
|
| 186006 |
+
},
|
| 186007 |
+
{
|
| 186008 |
+
"epoch": 548.23,
|
| 186009 |
+
"learning_rate": 8.925080128205129e-06,
|
| 186010 |
+
"loss": 0.7557,
|
| 186011 |
+
"step": 68100
|
| 186012 |
+
},
|
| 186013 |
+
{
|
| 186014 |
+
"epoch": 548.27,
|
| 186015 |
+
"learning_rate": 8.925e-06,
|
| 186016 |
+
"loss": 0.359,
|
| 186017 |
+
"step": 68105
|
| 186018 |
+
},
|
| 186019 |
+
{
|
| 186020 |
+
"epoch": 548.31,
|
| 186021 |
+
"learning_rate": 8.924919871794873e-06,
|
| 186022 |
+
"loss": 0.273,
|
| 186023 |
+
"step": 68110
|
| 186024 |
+
},
|
| 186025 |
+
{
|
| 186026 |
+
"epoch": 548.35,
|
| 186027 |
+
"learning_rate": 8.924839743589745e-06,
|
| 186028 |
+
"loss": 0.4286,
|
| 186029 |
+
"step": 68115
|
| 186030 |
+
},
|
| 186031 |
+
{
|
| 186032 |
+
"epoch": 548.39,
|
| 186033 |
+
"learning_rate": 8.924759615384616e-06,
|
| 186034 |
+
"loss": 0.8112,
|
| 186035 |
+
"step": 68120
|
| 186036 |
+
},
|
| 186037 |
+
{
|
| 186038 |
+
"epoch": 548.43,
|
| 186039 |
+
"learning_rate": 8.924679487179489e-06,
|
| 186040 |
+
"loss": 0.6704,
|
| 186041 |
+
"step": 68125
|
| 186042 |
+
},
|
| 186043 |
+
{
|
| 186044 |
+
"epoch": 548.47,
|
| 186045 |
+
"learning_rate": 8.92459935897436e-06,
|
| 186046 |
+
"loss": 0.3017,
|
| 186047 |
+
"step": 68130
|
| 186048 |
+
},
|
| 186049 |
+
{
|
| 186050 |
+
"epoch": 548.51,
|
| 186051 |
+
"learning_rate": 8.924519230769232e-06,
|
| 186052 |
+
"loss": 0.3403,
|
| 186053 |
+
"step": 68135
|
| 186054 |
+
},
|
| 186055 |
+
{
|
| 186056 |
+
"epoch": 548.55,
|
| 186057 |
+
"learning_rate": 8.924439102564103e-06,
|
| 186058 |
+
"loss": 0.4607,
|
| 186059 |
+
"step": 68140
|
| 186060 |
+
},
|
| 186061 |
+
{
|
| 186062 |
+
"epoch": 548.59,
|
| 186063 |
+
"learning_rate": 8.924358974358976e-06,
|
| 186064 |
+
"loss": 1.0333,
|
| 186065 |
+
"step": 68145
|
| 186066 |
+
},
|
| 186067 |
+
{
|
| 186068 |
+
"epoch": 548.63,
|
| 186069 |
+
"learning_rate": 8.924278846153846e-06,
|
| 186070 |
+
"loss": 0.8082,
|
| 186071 |
+
"step": 68150
|
| 186072 |
+
},
|
| 186073 |
+
{
|
| 186074 |
+
"epoch": 548.67,
|
| 186075 |
+
"learning_rate": 8.924198717948719e-06,
|
| 186076 |
+
"loss": 0.2737,
|
| 186077 |
+
"step": 68155
|
| 186078 |
+
},
|
| 186079 |
+
{
|
| 186080 |
+
"epoch": 548.71,
|
| 186081 |
+
"learning_rate": 8.92411858974359e-06,
|
| 186082 |
+
"loss": 0.283,
|
| 186083 |
+
"step": 68160
|
| 186084 |
+
},
|
| 186085 |
+
{
|
| 186086 |
+
"epoch": 548.76,
|
| 186087 |
+
"learning_rate": 8.924038461538461e-06,
|
| 186088 |
+
"loss": 0.5159,
|
| 186089 |
+
"step": 68165
|
| 186090 |
+
},
|
| 186091 |
+
{
|
| 186092 |
+
"epoch": 548.8,
|
| 186093 |
+
"learning_rate": 8.923958333333335e-06,
|
| 186094 |
+
"loss": 1.1066,
|
| 186095 |
+
"step": 68170
|
| 186096 |
+
},
|
| 186097 |
+
{
|
| 186098 |
+
"epoch": 548.84,
|
| 186099 |
+
"learning_rate": 8.923878205128206e-06,
|
| 186100 |
+
"loss": 0.7521,
|
| 186101 |
+
"step": 68175
|
| 186102 |
+
},
|
| 186103 |
+
{
|
| 186104 |
+
"epoch": 548.88,
|
| 186105 |
+
"learning_rate": 8.923798076923077e-06,
|
| 186106 |
+
"loss": 0.3451,
|
| 186107 |
+
"step": 68180
|
| 186108 |
+
},
|
| 186109 |
+
{
|
| 186110 |
+
"epoch": 548.92,
|
| 186111 |
+
"learning_rate": 8.923717948717949e-06,
|
| 186112 |
+
"loss": 0.3402,
|
| 186113 |
+
"step": 68185
|
| 186114 |
+
},
|
| 186115 |
+
{
|
| 186116 |
+
"epoch": 548.96,
|
| 186117 |
+
"learning_rate": 8.923637820512822e-06,
|
| 186118 |
+
"loss": 0.4594,
|
| 186119 |
+
"step": 68190
|
| 186120 |
+
},
|
| 186121 |
+
{
|
| 186122 |
+
"epoch": 549.0,
|
| 186123 |
+
"learning_rate": 8.923573717948719e-06,
|
| 186124 |
+
"loss": 1.3483,
|
| 186125 |
+
"step": 68195
|
| 186126 |
+
},
|
| 186127 |
+
{
|
| 186128 |
+
"epoch": 549.0,
|
| 186129 |
+
"eval_loss": 0.38058263063430786,
|
| 186130 |
+
"eval_runtime": 39.1105,
|
| 186131 |
+
"eval_samples_per_second": 21.452,
|
| 186132 |
+
"eval_steps_per_second": 0.69,
|
| 186133 |
+
"eval_wer": 0.1986700537955768,
|
| 186134 |
+
"step": 68195
|
| 186135 |
+
},
|
| 186136 |
+
{
|
| 186137 |
+
"epoch": 545.04,
|
| 186138 |
+
"learning_rate": 8.92349358974359e-06,
|
| 186139 |
+
"loss": 0.3853,
|
| 186140 |
+
"step": 68200
|
| 186141 |
+
},
|
| 186142 |
+
{
|
| 186143 |
+
"epoch": 545.08,
|
| 186144 |
+
"learning_rate": 8.923413461538464e-06,
|
| 186145 |
+
"loss": 0.2899,
|
| 186146 |
+
"step": 68205
|
| 186147 |
+
},
|
| 186148 |
+
{
|
| 186149 |
+
"epoch": 545.12,
|
| 186150 |
+
"learning_rate": 8.923333333333333e-06,
|
| 186151 |
+
"loss": 0.3688,
|
| 186152 |
+
"step": 68210
|
| 186153 |
+
},
|
| 186154 |
+
{
|
| 186155 |
+
"epoch": 545.16,
|
| 186156 |
+
"learning_rate": 8.923253205128206e-06,
|
| 186157 |
+
"loss": 0.6488,
|
| 186158 |
+
"step": 68215
|
| 186159 |
+
},
|
| 186160 |
+
{
|
| 186161 |
+
"epoch": 545.2,
|
| 186162 |
+
"learning_rate": 8.923173076923078e-06,
|
| 186163 |
+
"loss": 1.2436,
|
| 186164 |
+
"step": 68220
|
| 186165 |
+
},
|
| 186166 |
+
{
|
| 186167 |
+
"epoch": 545.24,
|
| 186168 |
+
"learning_rate": 8.923092948717949e-06,
|
| 186169 |
+
"loss": 0.3309,
|
| 186170 |
+
"step": 68225
|
| 186171 |
+
},
|
| 186172 |
+
{
|
| 186173 |
+
"epoch": 545.28,
|
| 186174 |
+
"learning_rate": 8.92301282051282e-06,
|
| 186175 |
+
"loss": 0.3145,
|
| 186176 |
+
"step": 68230
|
| 186177 |
+
},
|
| 186178 |
+
{
|
| 186179 |
+
"epoch": 545.32,
|
| 186180 |
+
"learning_rate": 8.922932692307693e-06,
|
| 186181 |
+
"loss": 0.3409,
|
| 186182 |
+
"step": 68235
|
| 186183 |
+
},
|
| 186184 |
+
{
|
| 186185 |
+
"epoch": 545.36,
|
| 186186 |
+
"learning_rate": 8.922852564102565e-06,
|
| 186187 |
+
"loss": 0.446,
|
| 186188 |
+
"step": 68240
|
| 186189 |
+
},
|
| 186190 |
+
{
|
| 186191 |
+
"epoch": 545.4,
|
| 186192 |
+
"learning_rate": 8.922772435897436e-06,
|
| 186193 |
+
"loss": 1.1688,
|
| 186194 |
+
"step": 68245
|
| 186195 |
+
},
|
| 186196 |
+
{
|
| 186197 |
+
"epoch": 545.44,
|
| 186198 |
+
"learning_rate": 8.922692307692309e-06,
|
| 186199 |
+
"loss": 0.3211,
|
| 186200 |
+
"step": 68250
|
| 186201 |
+
},
|
| 186202 |
+
{
|
| 186203 |
+
"epoch": 545.48,
|
| 186204 |
+
"learning_rate": 8.92261217948718e-06,
|
| 186205 |
+
"loss": 0.366,
|
| 186206 |
+
"step": 68255
|
| 186207 |
+
},
|
| 186208 |
+
{
|
| 186209 |
+
"epoch": 545.52,
|
| 186210 |
+
"learning_rate": 8.922532051282052e-06,
|
| 186211 |
+
"loss": 0.3367,
|
| 186212 |
+
"step": 68260
|
| 186213 |
+
},
|
| 186214 |
+
{
|
| 186215 |
+
"epoch": 545.56,
|
| 186216 |
+
"learning_rate": 8.922451923076923e-06,
|
| 186217 |
+
"loss": 0.4458,
|
| 186218 |
+
"step": 68265
|
| 186219 |
+
},
|
| 186220 |
+
{
|
| 186221 |
+
"epoch": 545.6,
|
| 186222 |
+
"learning_rate": 8.922371794871796e-06,
|
| 186223 |
+
"loss": 1.3543,
|
| 186224 |
+
"step": 68270
|
| 186225 |
+
},
|
| 186226 |
+
{
|
| 186227 |
+
"epoch": 545.64,
|
| 186228 |
+
"learning_rate": 8.922291666666668e-06,
|
| 186229 |
+
"loss": 0.4659,
|
| 186230 |
+
"step": 68275
|
| 186231 |
+
},
|
| 186232 |
+
{
|
| 186233 |
+
"epoch": 545.68,
|
| 186234 |
+
"learning_rate": 8.922211538461539e-06,
|
| 186235 |
+
"loss": 0.294,
|
| 186236 |
+
"step": 68280
|
| 186237 |
+
},
|
| 186238 |
+
{
|
| 186239 |
+
"epoch": 545.72,
|
| 186240 |
+
"learning_rate": 8.92213141025641e-06,
|
| 186241 |
+
"loss": 0.3322,
|
| 186242 |
+
"step": 68285
|
| 186243 |
+
},
|
| 186244 |
+
{
|
| 186245 |
+
"epoch": 545.76,
|
| 186246 |
+
"learning_rate": 8.922051282051283e-06,
|
| 186247 |
+
"loss": 0.7161,
|
| 186248 |
+
"step": 68290
|
| 186249 |
+
},
|
| 186250 |
+
{
|
| 186251 |
+
"epoch": 545.8,
|
| 186252 |
+
"learning_rate": 8.921971153846155e-06,
|
| 186253 |
+
"loss": 1.387,
|
| 186254 |
+
"step": 68295
|
| 186255 |
+
},
|
| 186256 |
+
{
|
| 186257 |
+
"epoch": 545.84,
|
| 186258 |
+
"learning_rate": 8.921891025641026e-06,
|
| 186259 |
+
"loss": 0.3497,
|
| 186260 |
+
"step": 68300
|
| 186261 |
+
},
|
| 186262 |
+
{
|
| 186263 |
+
"epoch": 545.88,
|
| 186264 |
+
"learning_rate": 8.921810897435899e-06,
|
| 186265 |
+
"loss": 0.6459,
|
| 186266 |
+
"step": 68305
|
| 186267 |
+
},
|
| 186268 |
+
{
|
| 186269 |
+
"epoch": 545.92,
|
| 186270 |
+
"learning_rate": 8.92173076923077e-06,
|
| 186271 |
+
"loss": 0.3734,
|
| 186272 |
+
"step": 68310
|
| 186273 |
+
},
|
| 186274 |
+
{
|
| 186275 |
+
"epoch": 545.96,
|
| 186276 |
+
"learning_rate": 8.921650641025642e-06,
|
| 186277 |
+
"loss": 0.5237,
|
| 186278 |
+
"step": 68315
|
| 186279 |
+
},
|
| 186280 |
+
{
|
| 186281 |
+
"epoch": 546.0,
|
| 186282 |
+
"learning_rate": 8.921570512820513e-06,
|
| 186283 |
+
"loss": 1.6727,
|
| 186284 |
+
"step": 68320
|
| 186285 |
+
},
|
| 186286 |
+
{
|
| 186287 |
+
"epoch": 546.0,
|
| 186288 |
+
"eval_loss": 0.47506964206695557,
|
| 186289 |
+
"eval_runtime": 41.5276,
|
| 186290 |
+
"eval_samples_per_second": 20.203,
|
| 186291 |
+
"eval_steps_per_second": 0.65,
|
| 186292 |
+
"eval_wer": 0.1952493381984689,
|
| 186293 |
+
"step": 68320
|
| 186294 |
+
},
|
| 186295 |
+
{
|
| 186296 |
+
"epoch": 550.04,
|
| 186297 |
+
"learning_rate": 8.921490384615386e-06,
|
| 186298 |
+
"loss": 0.4511,
|
| 186299 |
+
"step": 68325
|
| 186300 |
+
},
|
| 186301 |
+
{
|
| 186302 |
+
"epoch": 550.08,
|
| 186303 |
+
"learning_rate": 8.921410256410256e-06,
|
| 186304 |
+
"loss": 0.2694,
|
| 186305 |
+
"step": 68330
|
| 186306 |
+
},
|
| 186307 |
+
{
|
| 186308 |
+
"epoch": 550.12,
|
| 186309 |
+
"learning_rate": 8.921330128205129e-06,
|
| 186310 |
+
"loss": 0.3538,
|
| 186311 |
+
"step": 68335
|
| 186312 |
+
},
|
| 186313 |
+
{
|
| 186314 |
+
"epoch": 550.16,
|
| 186315 |
+
"learning_rate": 8.92125e-06,
|
| 186316 |
+
"loss": 0.449,
|
| 186317 |
+
"step": 68340
|
| 186318 |
+
},
|
| 186319 |
+
{
|
| 186320 |
+
"epoch": 550.2,
|
| 186321 |
+
"learning_rate": 8.921169871794872e-06,
|
| 186322 |
+
"loss": 1.2809,
|
| 186323 |
+
"step": 68345
|
| 186324 |
+
},
|
| 186325 |
+
{
|
| 186326 |
+
"epoch": 550.24,
|
| 186327 |
+
"learning_rate": 8.921089743589745e-06,
|
| 186328 |
+
"loss": 0.3631,
|
| 186329 |
+
"step": 68350
|
| 186330 |
+
},
|
| 186331 |
+
{
|
| 186332 |
+
"epoch": 550.28,
|
| 186333 |
+
"learning_rate": 8.921009615384616e-06,
|
| 186334 |
+
"loss": 0.3321,
|
| 186335 |
+
"step": 68355
|
| 186336 |
+
},
|
| 186337 |
+
{
|
| 186338 |
+
"epoch": 550.32,
|
| 186339 |
+
"learning_rate": 8.920929487179487e-06,
|
| 186340 |
+
"loss": 0.3193,
|
| 186341 |
+
"step": 68360
|
| 186342 |
+
},
|
| 186343 |
+
{
|
| 186344 |
+
"epoch": 550.36,
|
| 186345 |
+
"learning_rate": 8.920849358974359e-06,
|
| 186346 |
+
"loss": 0.4592,
|
| 186347 |
+
"step": 68365
|
| 186348 |
+
},
|
| 186349 |
+
{
|
| 186350 |
+
"epoch": 550.4,
|
| 186351 |
+
"learning_rate": 8.920769230769232e-06,
|
| 186352 |
+
"loss": 1.1757,
|
| 186353 |
+
"step": 68370
|
| 186354 |
+
},
|
| 186355 |
+
{
|
| 186356 |
+
"epoch": 550.44,
|
| 186357 |
+
"learning_rate": 8.920689102564103e-06,
|
| 186358 |
+
"loss": 0.3315,
|
| 186359 |
+
"step": 68375
|
| 186360 |
+
},
|
| 186361 |
+
{
|
| 186362 |
+
"epoch": 550.48,
|
| 186363 |
+
"learning_rate": 8.920608974358975e-06,
|
| 186364 |
+
"loss": 0.316,
|
| 186365 |
+
"step": 68380
|
| 186366 |
+
},
|
| 186367 |
+
{
|
| 186368 |
+
"epoch": 550.52,
|
| 186369 |
+
"learning_rate": 8.920528846153846e-06,
|
| 186370 |
+
"loss": 0.3592,
|
| 186371 |
+
"step": 68385
|
| 186372 |
+
},
|
| 186373 |
+
{
|
| 186374 |
+
"epoch": 550.56,
|
| 186375 |
+
"learning_rate": 8.920448717948719e-06,
|
| 186376 |
+
"loss": 0.5418,
|
| 186377 |
+
"step": 68390
|
| 186378 |
+
},
|
| 186379 |
+
{
|
| 186380 |
+
"epoch": 550.6,
|
| 186381 |
+
"learning_rate": 8.92036858974359e-06,
|
| 186382 |
+
"loss": 1.1373,
|
| 186383 |
+
"step": 68395
|
| 186384 |
+
},
|
| 186385 |
+
{
|
| 186386 |
+
"epoch": 550.64,
|
| 186387 |
+
"learning_rate": 8.920288461538462e-06,
|
| 186388 |
+
"loss": 0.4661,
|
| 186389 |
+
"step": 68400
|
| 186390 |
+
},
|
| 186391 |
+
{
|
| 186392 |
+
"epoch": 550.68,
|
| 186393 |
+
"learning_rate": 8.920208333333335e-06,
|
| 186394 |
+
"loss": 0.3396,
|
| 186395 |
+
"step": 68405
|
| 186396 |
+
},
|
| 186397 |
+
{
|
| 186398 |
+
"epoch": 550.72,
|
| 186399 |
+
"learning_rate": 8.920128205128206e-06,
|
| 186400 |
+
"loss": 0.3348,
|
| 186401 |
+
"step": 68410
|
| 186402 |
+
},
|
| 186403 |
+
{
|
| 186404 |
+
"epoch": 550.76,
|
| 186405 |
+
"learning_rate": 8.920048076923078e-06,
|
| 186406 |
+
"loss": 0.4885,
|
| 186407 |
+
"step": 68415
|
| 186408 |
+
},
|
| 186409 |
+
{
|
| 186410 |
+
"epoch": 550.8,
|
| 186411 |
+
"learning_rate": 8.919967948717949e-06,
|
| 186412 |
+
"loss": 1.2031,
|
| 186413 |
+
"step": 68420
|
| 186414 |
+
},
|
| 186415 |
+
{
|
| 186416 |
+
"epoch": 550.84,
|
| 186417 |
+
"learning_rate": 8.919887820512822e-06,
|
| 186418 |
+
"loss": 0.353,
|
| 186419 |
+
"step": 68425
|
| 186420 |
+
},
|
| 186421 |
+
{
|
| 186422 |
+
"epoch": 550.88,
|
| 186423 |
+
"learning_rate": 8.919807692307693e-06,
|
| 186424 |
+
"loss": 0.3036,
|
| 186425 |
+
"step": 68430
|
| 186426 |
+
},
|
| 186427 |
+
{
|
| 186428 |
+
"epoch": 550.92,
|
| 186429 |
+
"learning_rate": 8.919727564102565e-06,
|
| 186430 |
+
"loss": 0.3383,
|
| 186431 |
+
"step": 68435
|
| 186432 |
+
},
|
| 186433 |
+
{
|
| 186434 |
+
"epoch": 550.96,
|
| 186435 |
+
"learning_rate": 8.919647435897436e-06,
|
| 186436 |
+
"loss": 0.4424,
|
| 186437 |
+
"step": 68440
|
| 186438 |
+
},
|
| 186439 |
+
{
|
| 186440 |
+
"epoch": 551.0,
|
| 186441 |
+
"eval_loss": 0.3531026244163513,
|
| 186442 |
+
"eval_runtime": 41.0545,
|
| 186443 |
+
"eval_samples_per_second": 20.436,
|
| 186444 |
+
"eval_steps_per_second": 0.658,
|
| 186445 |
+
"eval_wer": 0.18525332578545145,
|
| 186446 |
+
"step": 68444
|
| 186447 |
+
},
|
| 186448 |
+
{
|
| 186449 |
+
"epoch": 551.01,
|
| 186450 |
+
"learning_rate": 8.919567307692309e-06,
|
| 186451 |
+
"loss": 0.3373,
|
| 186452 |
+
"step": 68445
|
| 186453 |
+
},
|
| 186454 |
+
{
|
| 186455 |
+
"epoch": 551.05,
|
| 186456 |
+
"learning_rate": 8.91948717948718e-06,
|
| 186457 |
+
"loss": 0.2903,
|
| 186458 |
+
"step": 68450
|
| 186459 |
+
},
|
| 186460 |
+
{
|
| 186461 |
+
"epoch": 551.09,
|
| 186462 |
+
"learning_rate": 8.919407051282052e-06,
|
| 186463 |
+
"loss": 0.2579,
|
| 186464 |
+
"step": 68455
|
| 186465 |
+
},
|
| 186466 |
+
{
|
| 186467 |
+
"epoch": 551.13,
|
| 186468 |
+
"learning_rate": 8.919326923076925e-06,
|
| 186469 |
+
"loss": 0.3771,
|
| 186470 |
+
"step": 68460
|
| 186471 |
+
},
|
| 186472 |
+
{
|
| 186473 |
+
"epoch": 551.17,
|
| 186474 |
+
"learning_rate": 8.919246794871796e-06,
|
| 186475 |
+
"loss": 0.5803,
|
| 186476 |
+
"step": 68465
|
| 186477 |
+
},
|
| 186478 |
+
{
|
| 186479 |
+
"epoch": 551.21,
|
| 186480 |
+
"learning_rate": 8.919166666666668e-06,
|
| 186481 |
+
"loss": 1.1431,
|
| 186482 |
+
"step": 68470
|
| 186483 |
+
},
|
| 186484 |
+
{
|
| 186485 |
+
"epoch": 551.25,
|
| 186486 |
+
"learning_rate": 8.919086538461539e-06,
|
| 186487 |
+
"loss": 0.3321,
|
| 186488 |
+
"step": 68475
|
| 186489 |
+
},
|
| 186490 |
+
{
|
| 186491 |
+
"epoch": 551.29,
|
| 186492 |
+
"learning_rate": 8.919006410256412e-06,
|
| 186493 |
+
"loss": 0.3079,
|
| 186494 |
+
"step": 68480
|
| 186495 |
+
},
|
| 186496 |
+
{
|
| 186497 |
+
"epoch": 551.33,
|
| 186498 |
+
"learning_rate": 8.918926282051282e-06,
|
| 186499 |
+
"loss": 0.3175,
|
| 186500 |
+
"step": 68485
|
| 186501 |
+
},
|
| 186502 |
+
{
|
| 186503 |
+
"epoch": 551.37,
|
| 186504 |
+
"learning_rate": 8.918846153846155e-06,
|
| 186505 |
+
"loss": 0.6332,
|
| 186506 |
+
"step": 68490
|
| 186507 |
+
},
|
| 186508 |
+
{
|
| 186509 |
+
"epoch": 551.41,
|
| 186510 |
+
"learning_rate": 8.918766025641028e-06,
|
| 186511 |
+
"loss": 1.0984,
|
| 186512 |
+
"step": 68495
|
| 186513 |
+
},
|
| 186514 |
+
{
|
| 186515 |
+
"epoch": 551.45,
|
| 186516 |
+
"learning_rate": 8.918685897435897e-06,
|
| 186517 |
+
"loss": 0.3226,
|
| 186518 |
+
"step": 68500
|
| 186519 |
+
},
|
| 186520 |
+
{
|
| 186521 |
+
"epoch": 551.49,
|
| 186522 |
+
"learning_rate": 8.91860576923077e-06,
|
| 186523 |
+
"loss": 0.277,
|
| 186524 |
+
"step": 68505
|
| 186525 |
+
},
|
| 186526 |
+
{
|
| 186527 |
+
"epoch": 551.53,
|
| 186528 |
+
"learning_rate": 8.918525641025642e-06,
|
| 186529 |
+
"loss": 0.3595,
|
| 186530 |
+
"step": 68510
|
| 186531 |
+
},
|
| 186532 |
+
{
|
| 186533 |
+
"epoch": 551.57,
|
| 186534 |
+
"learning_rate": 8.918445512820513e-06,
|
| 186535 |
+
"loss": 0.6192,
|
| 186536 |
+
"step": 68515
|
| 186537 |
+
},
|
| 186538 |
+
{
|
| 186539 |
+
"epoch": 551.61,
|
| 186540 |
+
"learning_rate": 8.918365384615385e-06,
|
| 186541 |
+
"loss": 1.1009,
|
| 186542 |
+
"step": 68520
|
| 186543 |
+
},
|
| 186544 |
+
{
|
| 186545 |
+
"epoch": 551.65,
|
| 186546 |
+
"learning_rate": 8.918285256410258e-06,
|
| 186547 |
+
"loss": 0.3705,
|
| 186548 |
+
"step": 68525
|
| 186549 |
+
},
|
| 186550 |
+
{
|
| 186551 |
+
"epoch": 551.69,
|
| 186552 |
+
"learning_rate": 8.918205128205129e-06,
|
| 186553 |
+
"loss": 0.2911,
|
| 186554 |
+
"step": 68530
|
| 186555 |
+
},
|
| 186556 |
+
{
|
| 186557 |
+
"epoch": 551.73,
|
| 186558 |
+
"learning_rate": 8.918125e-06,
|
| 186559 |
+
"loss": 0.3628,
|
| 186560 |
+
"step": 68535
|
| 186561 |
+
},
|
| 186562 |
+
{
|
| 186563 |
+
"epoch": 551.77,
|
| 186564 |
+
"learning_rate": 8.918044871794872e-06,
|
| 186565 |
+
"loss": 0.533,
|
| 186566 |
+
"step": 68540
|
| 186567 |
+
},
|
| 186568 |
+
{
|
| 186569 |
+
"epoch": 551.81,
|
| 186570 |
+
"learning_rate": 8.917964743589745e-06,
|
| 186571 |
+
"loss": 1.2326,
|
| 186572 |
+
"step": 68545
|
| 186573 |
+
},
|
| 186574 |
+
{
|
| 186575 |
+
"epoch": 551.85,
|
| 186576 |
+
"learning_rate": 8.917884615384616e-06,
|
| 186577 |
+
"loss": 0.3388,
|
| 186578 |
+
"step": 68550
|
| 186579 |
+
},
|
| 186580 |
+
{
|
| 186581 |
+
"epoch": 551.89,
|
| 186582 |
+
"learning_rate": 8.917804487179487e-06,
|
| 186583 |
+
"loss": 0.3131,
|
| 186584 |
+
"step": 68555
|
| 186585 |
+
},
|
| 186586 |
+
{
|
| 186587 |
+
"epoch": 551.93,
|
| 186588 |
+
"learning_rate": 8.91772435897436e-06,
|
| 186589 |
+
"loss": 0.3515,
|
| 186590 |
+
"step": 68560
|
| 186591 |
+
},
|
| 186592 |
+
{
|
| 186593 |
+
"epoch": 551.97,
|
| 186594 |
+
"learning_rate": 8.917644230769232e-06,
|
| 186595 |
+
"loss": 0.6563,
|
| 186596 |
+
"step": 68565
|
| 186597 |
+
},
|
| 186598 |
+
{
|
| 186599 |
+
"epoch": 552.0,
|
| 186600 |
+
"eval_loss": 0.4913657009601593,
|
| 186601 |
+
"eval_runtime": 39.6316,
|
| 186602 |
+
"eval_samples_per_second": 21.17,
|
| 186603 |
+
"eval_steps_per_second": 0.681,
|
| 186604 |
+
"eval_wer": 0.19267410087881248,
|
| 186605 |
+
"step": 68568
|
| 186606 |
+
},
|
| 186607 |
+
{
|
| 186608 |
+
"epoch": 548.02,
|
| 186609 |
+
"learning_rate": 8.917564102564103e-06,
|
| 186610 |
+
"loss": 0.4739,
|
| 186611 |
+
"step": 68570
|
| 186612 |
+
},
|
| 186613 |
+
{
|
| 186614 |
+
"epoch": 548.06,
|
| 186615 |
+
"learning_rate": 8.917483974358975e-06,
|
| 186616 |
+
"loss": 0.316,
|
| 186617 |
+
"step": 68575
|
| 186618 |
+
},
|
| 186619 |
+
{
|
| 186620 |
+
"epoch": 548.1,
|
| 186621 |
+
"learning_rate": 8.917403846153848e-06,
|
| 186622 |
+
"loss": 0.3383,
|
| 186623 |
+
"step": 68580
|
| 186624 |
+
},
|
| 186625 |
+
{
|
| 186626 |
+
"epoch": 548.14,
|
| 186627 |
+
"learning_rate": 8.917323717948719e-06,
|
| 186628 |
+
"loss": 0.385,
|
| 186629 |
+
"step": 68585
|
| 186630 |
+
},
|
| 186631 |
+
{
|
| 186632 |
+
"epoch": 548.18,
|
| 186633 |
+
"learning_rate": 8.91724358974359e-06,
|
| 186634 |
+
"loss": 0.6467,
|
| 186635 |
+
"step": 68590
|
| 186636 |
+
},
|
| 186637 |
+
{
|
| 186638 |
+
"epoch": 548.22,
|
| 186639 |
+
"learning_rate": 8.917163461538463e-06,
|
| 186640 |
+
"loss": 1.0687,
|
| 186641 |
+
"step": 68595
|
| 186642 |
+
},
|
| 186643 |
+
{
|
| 186644 |
+
"epoch": 548.26,
|
| 186645 |
+
"learning_rate": 8.917083333333335e-06,
|
| 186646 |
+
"loss": 0.33,
|
| 186647 |
+
"step": 68600
|
| 186648 |
+
},
|
| 186649 |
+
{
|
| 186650 |
+
"epoch": 548.3,
|
| 186651 |
+
"learning_rate": 8.917003205128206e-06,
|
| 186652 |
+
"loss": 0.3272,
|
| 186653 |
+
"step": 68605
|
| 186654 |
+
},
|
| 186655 |
+
{
|
| 186656 |
+
"epoch": 548.34,
|
| 186657 |
+
"learning_rate": 8.916923076923077e-06,
|
| 186658 |
+
"loss": 0.3578,
|
| 186659 |
+
"step": 68610
|
| 186660 |
+
},
|
| 186661 |
+
{
|
| 186662 |
+
"epoch": 548.38,
|
| 186663 |
+
"learning_rate": 8.91684294871795e-06,
|
| 186664 |
+
"loss": 0.6878,
|
| 186665 |
+
"step": 68615
|
| 186666 |
+
},
|
| 186667 |
+
{
|
| 186668 |
+
"epoch": 548.42,
|
| 186669 |
+
"learning_rate": 8.91676282051282e-06,
|
| 186670 |
+
"loss": 0.9201,
|
| 186671 |
+
"step": 68620
|
| 186672 |
+
},
|
| 186673 |
+
{
|
| 186674 |
+
"epoch": 548.46,
|
| 186675 |
+
"learning_rate": 8.916682692307693e-06,
|
| 186676 |
+
"loss": 0.2895,
|
| 186677 |
+
"step": 68625
|
| 186678 |
+
},
|
| 186679 |
+
{
|
| 186680 |
+
"epoch": 548.5,
|
| 186681 |
+
"learning_rate": 8.916602564102565e-06,
|
| 186682 |
+
"loss": 0.3369,
|
| 186683 |
+
"step": 68630
|
| 186684 |
+
},
|
| 186685 |
+
{
|
| 186686 |
+
"epoch": 548.54,
|
| 186687 |
+
"learning_rate": 8.916522435897436e-06,
|
| 186688 |
+
"loss": 0.3617,
|
| 186689 |
+
"step": 68635
|
| 186690 |
+
},
|
| 186691 |
+
{
|
| 186692 |
+
"epoch": 548.58,
|
| 186693 |
+
"learning_rate": 8.916442307692307e-06,
|
| 186694 |
+
"loss": 0.5913,
|
| 186695 |
+
"step": 68640
|
| 186696 |
+
},
|
| 186697 |
+
{
|
| 186698 |
+
"epoch": 548.62,
|
| 186699 |
+
"learning_rate": 8.91636217948718e-06,
|
| 186700 |
+
"loss": 1.1614,
|
| 186701 |
+
"step": 68645
|
| 186702 |
+
},
|
| 186703 |
+
{
|
| 186704 |
+
"epoch": 548.66,
|
| 186705 |
+
"learning_rate": 8.916282051282052e-06,
|
| 186706 |
+
"loss": 0.2917,
|
| 186707 |
+
"step": 68650
|
| 186708 |
+
},
|
| 186709 |
+
{
|
| 186710 |
+
"epoch": 548.7,
|
| 186711 |
+
"learning_rate": 8.916201923076923e-06,
|
| 186712 |
+
"loss": 0.3465,
|
| 186713 |
+
"step": 68655
|
| 186714 |
+
},
|
| 186715 |
+
{
|
| 186716 |
+
"epoch": 548.74,
|
| 186717 |
+
"learning_rate": 8.916121794871796e-06,
|
| 186718 |
+
"loss": 0.3442,
|
| 186719 |
+
"step": 68660
|
| 186720 |
+
},
|
| 186721 |
+
{
|
| 186722 |
+
"epoch": 548.78,
|
| 186723 |
+
"learning_rate": 8.916041666666667e-06,
|
| 186724 |
+
"loss": 0.7135,
|
| 186725 |
+
"step": 68665
|
| 186726 |
+
},
|
| 186727 |
+
{
|
| 186728 |
+
"epoch": 548.82,
|
| 186729 |
+
"learning_rate": 8.915961538461539e-06,
|
| 186730 |
+
"loss": 1.1436,
|
| 186731 |
+
"step": 68670
|
| 186732 |
+
},
|
| 186733 |
+
{
|
| 186734 |
+
"epoch": 548.86,
|
| 186735 |
+
"learning_rate": 8.91588141025641e-06,
|
| 186736 |
+
"loss": 0.3227,
|
| 186737 |
+
"step": 68675
|
| 186738 |
+
},
|
| 186739 |
+
{
|
| 186740 |
+
"epoch": 548.9,
|
| 186741 |
+
"learning_rate": 8.915801282051283e-06,
|
| 186742 |
+
"loss": 0.3273,
|
| 186743 |
+
"step": 68680
|
| 186744 |
+
},
|
| 186745 |
+
{
|
| 186746 |
+
"epoch": 548.94,
|
| 186747 |
+
"learning_rate": 8.915721153846155e-06,
|
| 186748 |
+
"loss": 0.3505,
|
| 186749 |
+
"step": 68685
|
| 186750 |
+
},
|
| 186751 |
+
{
|
| 186752 |
+
"epoch": 548.98,
|
| 186753 |
+
"learning_rate": 8.915641025641026e-06,
|
| 186754 |
+
"loss": 0.6269,
|
| 186755 |
+
"step": 68690
|
| 186756 |
+
},
|
| 186757 |
+
{
|
| 186758 |
+
"epoch": 549.0,
|
| 186759 |
+
"eval_loss": 0.4549594819545746,
|
| 186760 |
+
"eval_runtime": 40.2207,
|
| 186761 |
+
"eval_samples_per_second": 20.86,
|
| 186762 |
+
"eval_steps_per_second": 0.671,
|
| 186763 |
+
"eval_wer": 0.1973022238425082,
|
| 186764 |
+
"step": 68693
|
| 186765 |
+
},
|
| 186766 |
+
{
|
| 186767 |
+
"epoch": 549.02,
|
| 186768 |
+
"learning_rate": 8.915560897435899e-06,
|
| 186769 |
+
"loss": 0.4356,
|
| 186770 |
+
"step": 68695
|
| 186771 |
+
},
|
| 186772 |
+
{
|
| 186773 |
+
"epoch": 549.06,
|
| 186774 |
+
"learning_rate": 8.91548076923077e-06,
|
| 186775 |
+
"loss": 0.3655,
|
| 186776 |
+
"step": 68700
|
| 186777 |
+
},
|
| 186778 |
+
{
|
| 186779 |
+
"epoch": 549.1,
|
| 186780 |
+
"learning_rate": 8.915400641025642e-06,
|
| 186781 |
+
"loss": 0.3215,
|
| 186782 |
+
"step": 68705
|
| 186783 |
+
},
|
| 186784 |
+
{
|
| 186785 |
+
"epoch": 549.14,
|
| 186786 |
+
"learning_rate": 8.915320512820513e-06,
|
| 186787 |
+
"loss": 0.3813,
|
| 186788 |
+
"step": 68710
|
| 186789 |
+
},
|
| 186790 |
+
{
|
| 186791 |
+
"epoch": 549.18,
|
| 186792 |
+
"learning_rate": 8.915240384615386e-06,
|
| 186793 |
+
"loss": 0.6744,
|
| 186794 |
+
"step": 68715
|
| 186795 |
+
},
|
| 186796 |
+
{
|
| 186797 |
+
"epoch": 549.22,
|
| 186798 |
+
"learning_rate": 8.915160256410257e-06,
|
| 186799 |
+
"loss": 0.9949,
|
| 186800 |
+
"step": 68720
|
| 186801 |
+
},
|
| 186802 |
+
{
|
| 186803 |
+
"epoch": 549.25,
|
| 186804 |
+
"learning_rate": 8.915080128205129e-06,
|
| 186805 |
+
"loss": 0.3279,
|
| 186806 |
+
"step": 68725
|
| 186807 |
+
},
|
| 186808 |
+
{
|
| 186809 |
+
"epoch": 549.29,
|
| 186810 |
+
"learning_rate": 8.915e-06,
|
| 186811 |
+
"loss": 0.3263,
|
| 186812 |
+
"step": 68730
|
| 186813 |
+
},
|
| 186814 |
+
{
|
| 186815 |
+
"epoch": 549.33,
|
| 186816 |
+
"learning_rate": 8.914919871794873e-06,
|
| 186817 |
+
"loss": 0.3473,
|
| 186818 |
+
"step": 68735
|
| 186819 |
+
},
|
| 186820 |
+
{
|
| 186821 |
+
"epoch": 549.37,
|
| 186822 |
+
"learning_rate": 8.914839743589745e-06,
|
| 186823 |
+
"loss": 0.628,
|
| 186824 |
+
"step": 68740
|
| 186825 |
+
},
|
| 186826 |
+
{
|
| 186827 |
+
"epoch": 549.41,
|
| 186828 |
+
"learning_rate": 8.914759615384616e-06,
|
| 186829 |
+
"loss": 0.9994,
|
| 186830 |
+
"step": 68745
|
| 186831 |
+
},
|
| 186832 |
+
{
|
| 186833 |
+
"epoch": 549.45,
|
| 186834 |
+
"learning_rate": 8.914679487179489e-06,
|
| 186835 |
+
"loss": 0.3344,
|
| 186836 |
+
"step": 68750
|
| 186837 |
+
},
|
| 186838 |
+
{
|
| 186839 |
+
"epoch": 549.49,
|
| 186840 |
+
"learning_rate": 8.91459935897436e-06,
|
| 186841 |
+
"loss": 0.2747,
|
| 186842 |
+
"step": 68755
|
| 186843 |
+
},
|
| 186844 |
+
{
|
| 186845 |
+
"epoch": 549.53,
|
| 186846 |
+
"learning_rate": 8.914519230769232e-06,
|
| 186847 |
+
"loss": 0.4123,
|
| 186848 |
+
"step": 68760
|
| 186849 |
+
},
|
| 186850 |
+
{
|
| 186851 |
+
"epoch": 549.57,
|
| 186852 |
+
"learning_rate": 8.914439102564103e-06,
|
| 186853 |
+
"loss": 0.7106,
|
| 186854 |
+
"step": 68765
|
| 186855 |
+
},
|
| 186856 |
+
{
|
| 186857 |
+
"epoch": 549.61,
|
| 186858 |
+
"learning_rate": 8.914358974358976e-06,
|
| 186859 |
+
"loss": 1.1318,
|
| 186860 |
+
"step": 68770
|
| 186861 |
+
},
|
| 186862 |
+
{
|
| 186863 |
+
"epoch": 549.65,
|
| 186864 |
+
"learning_rate": 8.914278846153846e-06,
|
| 186865 |
+
"loss": 0.4238,
|
| 186866 |
+
"step": 68775
|
| 186867 |
+
},
|
| 186868 |
+
{
|
| 186869 |
+
"epoch": 549.69,
|
| 186870 |
+
"learning_rate": 8.914198717948719e-06,
|
| 186871 |
+
"loss": 0.4404,
|
| 186872 |
+
"step": 68780
|
| 186873 |
+
},
|
| 186874 |
+
{
|
| 186875 |
+
"epoch": 549.73,
|
| 186876 |
+
"learning_rate": 8.91411858974359e-06,
|
| 186877 |
+
"loss": 0.3851,
|
| 186878 |
+
"step": 68785
|
| 186879 |
+
},
|
| 186880 |
+
{
|
| 186881 |
+
"epoch": 549.77,
|
| 186882 |
+
"learning_rate": 8.914038461538462e-06,
|
| 186883 |
+
"loss": 0.6609,
|
| 186884 |
+
"step": 68790
|
| 186885 |
+
},
|
| 186886 |
+
{
|
| 186887 |
+
"epoch": 549.81,
|
| 186888 |
+
"learning_rate": 8.913958333333335e-06,
|
| 186889 |
+
"loss": 1.2404,
|
| 186890 |
+
"step": 68795
|
| 186891 |
+
},
|
| 186892 |
+
{
|
| 186893 |
+
"epoch": 549.85,
|
| 186894 |
+
"learning_rate": 8.913878205128206e-06,
|
| 186895 |
+
"loss": 0.3031,
|
| 186896 |
+
"step": 68800
|
| 186897 |
+
},
|
| 186898 |
+
{
|
| 186899 |
+
"epoch": 549.89,
|
| 186900 |
+
"learning_rate": 8.913798076923077e-06,
|
| 186901 |
+
"loss": 0.2821,
|
| 186902 |
+
"step": 68805
|
| 186903 |
+
},
|
| 186904 |
+
{
|
| 186905 |
+
"epoch": 549.93,
|
| 186906 |
+
"learning_rate": 8.913717948717949e-06,
|
| 186907 |
+
"loss": 0.3476,
|
| 186908 |
+
"step": 68810
|
| 186909 |
+
},
|
| 186910 |
+
{
|
| 186911 |
+
"epoch": 549.97,
|
| 186912 |
+
"learning_rate": 8.913637820512822e-06,
|
| 186913 |
+
"loss": 0.6489,
|
| 186914 |
+
"step": 68815
|
| 186915 |
+
},
|
| 186916 |
+
{
|
| 186917 |
+
"epoch": 550.0,
|
| 186918 |
+
"eval_loss": 0.351525217294693,
|
| 186919 |
+
"eval_runtime": 39.2564,
|
| 186920 |
+
"eval_samples_per_second": 21.347,
|
| 186921 |
+
"eval_steps_per_second": 0.688,
|
| 186922 |
+
"eval_wer": 0.1943950177935943,
|
| 186923 |
+
"step": 68818
|
| 186924 |
}
|
| 186925 |
],
|
| 186926 |
+
"max_steps": 625000,
|
| 186927 |
"num_train_epochs": 5000,
|
| 186928 |
+
"total_flos": 1.936554826303671e+20,
|
| 186929 |
"trial_name": null,
|
| 186930 |
"trial_params": null
|
| 186931 |
}
|
model-bin/finetune/base/{checkpoint-68195 β checkpoint-68818}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/log/1629840697.4590368/events.out.tfevents.1629840697.c435e1c5ee04.920.231
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d07a11d389d51048cd2ce4bcfef6ffa2e954f086370dcadcd4bb5c03ef9e360b
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629841338.3804567/events.out.tfevents.1629841338.c435e1c5ee04.920.233
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c829abf30deb7cfa6fa48b2639c9afe7c790fb210c6093e415740d15fb905077
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629841998.772923/events.out.tfevents.1629841998.c435e1c5ee04.920.235
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61c8438d267fe9e56c4349eced0d88eb8b8605160ee2ec76dd42531323356080
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629842644.4759989/events.out.tfevents.1629842644.c435e1c5ee04.920.237
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef2a3f81f631f4a49c90c62d5db72585e8a7d7955c646ae7afb49d7777047c0d
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629843285.8477933/events.out.tfevents.1629843285.c435e1c5ee04.920.239
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df7bb7495863188c8fc2968cdff628df60f30fd025fed46f8974d57e45a8043c
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/events.out.tfevents.1629840697.c435e1c5ee04.920.230
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df0714608e830691baa16d7b5e1885b5d2a2dcbdb03f679d3e245ca205a76e1e
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629841338.c435e1c5ee04.920.232
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb8646038d3ed73d99a1241bdec464d7fd7f3aa7d0837b1bc59606de7f15ac59
|
| 3 |
+
size 8462
|
model-bin/finetune/base/log/events.out.tfevents.1629841998.c435e1c5ee04.920.234
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baef5622116291ac5a532ca4c0d01e970ff516a3f405da6b1d4aa97502c3f3ff
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629842644.c435e1c5ee04.920.236
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e4e5082081d3757b14433504efa1eee4ffcb706f6a4d94b4b777ba0f89c6bae
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629843285.c435e1c5ee04.920.238
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e29ce135620281801cf6a5458af019b88f4a58e0ed575197fe9e714af8cf35f5
|
| 3 |
+
size 8622
|