Check commited on
Commit
fcc4b9c
Β·
1 Parent(s): 69dcd5b

"auto-commit"

Browse files
Files changed (28) hide show
  1. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/config.json +0 -0
  2. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/optimizer.pt +1 -1
  3. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/preprocessor_config.json +0 -0
  4. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/pytorch_model.bin +1 -1
  5. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/rng_state.pth +1 -1
  6. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/scaler.pt +1 -1
  7. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/scheduler.pt +1 -1
  8. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/trainer_state.json +317 -5
  9. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/training_args.bin +0 -0
  10. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/config.json +0 -0
  11. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/optimizer.pt +1 -1
  12. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/preprocessor_config.json +0 -0
  13. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/pytorch_model.bin +1 -1
  14. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/rng_state.pth +2 -2
  15. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/scaler.pt +1 -1
  16. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/scheduler.pt +1 -1
  17. model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/trainer_state.json +1749 -6
  18. model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/training_args.bin +0 -0
  19. model-bin/finetune/base/log/1629840697.4590368/events.out.tfevents.1629840697.c435e1c5ee04.920.231 +3 -0
  20. model-bin/finetune/base/log/1629841338.3804567/events.out.tfevents.1629841338.c435e1c5ee04.920.233 +3 -0
  21. model-bin/finetune/base/log/1629841998.772923/events.out.tfevents.1629841998.c435e1c5ee04.920.235 +3 -0
  22. model-bin/finetune/base/log/1629842644.4759989/events.out.tfevents.1629842644.c435e1c5ee04.920.237 +3 -0
  23. model-bin/finetune/base/log/1629843285.8477933/events.out.tfevents.1629843285.c435e1c5ee04.920.239 +3 -0
  24. model-bin/finetune/base/log/events.out.tfevents.1629840697.c435e1c5ee04.920.230 +3 -0
  25. model-bin/finetune/base/log/events.out.tfevents.1629841338.c435e1c5ee04.920.232 +3 -0
  26. model-bin/finetune/base/log/events.out.tfevents.1629841998.c435e1c5ee04.920.234 +3 -0
  27. model-bin/finetune/base/log/events.out.tfevents.1629842644.c435e1c5ee04.920.236 +3 -0
  28. model-bin/finetune/base/log/events.out.tfevents.1629843285.c435e1c5ee04.920.238 +3 -0
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f29be78508a07fb73240b11ffa7167d4632163ad60416d94009458f2a159529
3
  size 722165393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51466514ff46e014697533d374223efd8d0f28ef3d479f3f85da7a120794091a
3
  size 722165393
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb02808d8842791dc4116b2aee9127299d59ea802c1a61ba6ba3092f36c849da
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eca26b4d912d8163ec7356526a3cb6e0d499f65711c3a03987b8a7a7ab2f22b
3
  size 377909911
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:506537a02628941466ae681cb8b9d6d5f1a22f228b010fe75395d0621cb8473b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4efbd31df0b38278619f9e8e69d966325f5fcd10f44279c23b159dec6316e226
3
  size 14503
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6497b3560f4dce675de0441236126359cb27ad7919d73715dda52b9f6fbfb5a6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b01834bbb5ad7da18b208abc1899397cef1e36be4c74025882aee2138c95ff7
3
  size 559
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:754bdca3fd4dc1dd416ee07d27446b6b442be519e3b7e30a419105bdc19f4b05
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873ebfa7b0a1fec5638b4c9861dd2cc6b33ab6a77483e2566fffb863052317c3
3
  size 623
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68444}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.1855363713557883,
3
- "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-67447",
4
- "epoch": 548.995983935743,
5
- "global_step": 68195,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -186132,11 +186132,323 @@
186132
  "eval_steps_per_second": 0.69,
186133
  "eval_wer": 0.1986700537955768,
186134
  "step": 68195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186135
  }
186136
  ],
186137
  "max_steps": 620000,
186138
  "num_train_epochs": 5000,
186139
- "total_flos": 1.919002060140202e+20,
186140
  "trial_name": null,
186141
  "trial_params": null
186142
  }
 
1
  {
2
+ "best_metric": 0.18525332578545145,
3
+ "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-68444",
4
+ "epoch": 550.995983935743,
5
+ "global_step": 68444,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
186132
  "eval_steps_per_second": 0.69,
186133
  "eval_wer": 0.1986700537955768,
186134
  "step": 68195
186135
+ },
186136
+ {
186137
+ "epoch": 545.04,
186138
+ "learning_rate": 8.92349358974359e-06,
186139
+ "loss": 0.3853,
186140
+ "step": 68200
186141
+ },
186142
+ {
186143
+ "epoch": 545.08,
186144
+ "learning_rate": 8.923413461538464e-06,
186145
+ "loss": 0.2899,
186146
+ "step": 68205
186147
+ },
186148
+ {
186149
+ "epoch": 545.12,
186150
+ "learning_rate": 8.923333333333333e-06,
186151
+ "loss": 0.3688,
186152
+ "step": 68210
186153
+ },
186154
+ {
186155
+ "epoch": 545.16,
186156
+ "learning_rate": 8.923253205128206e-06,
186157
+ "loss": 0.6488,
186158
+ "step": 68215
186159
+ },
186160
+ {
186161
+ "epoch": 545.2,
186162
+ "learning_rate": 8.923173076923078e-06,
186163
+ "loss": 1.2436,
186164
+ "step": 68220
186165
+ },
186166
+ {
186167
+ "epoch": 545.24,
186168
+ "learning_rate": 8.923092948717949e-06,
186169
+ "loss": 0.3309,
186170
+ "step": 68225
186171
+ },
186172
+ {
186173
+ "epoch": 545.28,
186174
+ "learning_rate": 8.92301282051282e-06,
186175
+ "loss": 0.3145,
186176
+ "step": 68230
186177
+ },
186178
+ {
186179
+ "epoch": 545.32,
186180
+ "learning_rate": 8.922932692307693e-06,
186181
+ "loss": 0.3409,
186182
+ "step": 68235
186183
+ },
186184
+ {
186185
+ "epoch": 545.36,
186186
+ "learning_rate": 8.922852564102565e-06,
186187
+ "loss": 0.446,
186188
+ "step": 68240
186189
+ },
186190
+ {
186191
+ "epoch": 545.4,
186192
+ "learning_rate": 8.922772435897436e-06,
186193
+ "loss": 1.1688,
186194
+ "step": 68245
186195
+ },
186196
+ {
186197
+ "epoch": 545.44,
186198
+ "learning_rate": 8.922692307692309e-06,
186199
+ "loss": 0.3211,
186200
+ "step": 68250
186201
+ },
186202
+ {
186203
+ "epoch": 545.48,
186204
+ "learning_rate": 8.92261217948718e-06,
186205
+ "loss": 0.366,
186206
+ "step": 68255
186207
+ },
186208
+ {
186209
+ "epoch": 545.52,
186210
+ "learning_rate": 8.922532051282052e-06,
186211
+ "loss": 0.3367,
186212
+ "step": 68260
186213
+ },
186214
+ {
186215
+ "epoch": 545.56,
186216
+ "learning_rate": 8.922451923076923e-06,
186217
+ "loss": 0.4458,
186218
+ "step": 68265
186219
+ },
186220
+ {
186221
+ "epoch": 545.6,
186222
+ "learning_rate": 8.922371794871796e-06,
186223
+ "loss": 1.3543,
186224
+ "step": 68270
186225
+ },
186226
+ {
186227
+ "epoch": 545.64,
186228
+ "learning_rate": 8.922291666666668e-06,
186229
+ "loss": 0.4659,
186230
+ "step": 68275
186231
+ },
186232
+ {
186233
+ "epoch": 545.68,
186234
+ "learning_rate": 8.922211538461539e-06,
186235
+ "loss": 0.294,
186236
+ "step": 68280
186237
+ },
186238
+ {
186239
+ "epoch": 545.72,
186240
+ "learning_rate": 8.92213141025641e-06,
186241
+ "loss": 0.3322,
186242
+ "step": 68285
186243
+ },
186244
+ {
186245
+ "epoch": 545.76,
186246
+ "learning_rate": 8.922051282051283e-06,
186247
+ "loss": 0.7161,
186248
+ "step": 68290
186249
+ },
186250
+ {
186251
+ "epoch": 545.8,
186252
+ "learning_rate": 8.921971153846155e-06,
186253
+ "loss": 1.387,
186254
+ "step": 68295
186255
+ },
186256
+ {
186257
+ "epoch": 545.84,
186258
+ "learning_rate": 8.921891025641026e-06,
186259
+ "loss": 0.3497,
186260
+ "step": 68300
186261
+ },
186262
+ {
186263
+ "epoch": 545.88,
186264
+ "learning_rate": 8.921810897435899e-06,
186265
+ "loss": 0.6459,
186266
+ "step": 68305
186267
+ },
186268
+ {
186269
+ "epoch": 545.92,
186270
+ "learning_rate": 8.92173076923077e-06,
186271
+ "loss": 0.3734,
186272
+ "step": 68310
186273
+ },
186274
+ {
186275
+ "epoch": 545.96,
186276
+ "learning_rate": 8.921650641025642e-06,
186277
+ "loss": 0.5237,
186278
+ "step": 68315
186279
+ },
186280
+ {
186281
+ "epoch": 546.0,
186282
+ "learning_rate": 8.921570512820513e-06,
186283
+ "loss": 1.6727,
186284
+ "step": 68320
186285
+ },
186286
+ {
186287
+ "epoch": 546.0,
186288
+ "eval_loss": 0.47506964206695557,
186289
+ "eval_runtime": 41.5276,
186290
+ "eval_samples_per_second": 20.203,
186291
+ "eval_steps_per_second": 0.65,
186292
+ "eval_wer": 0.1952493381984689,
186293
+ "step": 68320
186294
+ },
186295
+ {
186296
+ "epoch": 550.04,
186297
+ "learning_rate": 8.921490384615386e-06,
186298
+ "loss": 0.4511,
186299
+ "step": 68325
186300
+ },
186301
+ {
186302
+ "epoch": 550.08,
186303
+ "learning_rate": 8.921410256410256e-06,
186304
+ "loss": 0.2694,
186305
+ "step": 68330
186306
+ },
186307
+ {
186308
+ "epoch": 550.12,
186309
+ "learning_rate": 8.921330128205129e-06,
186310
+ "loss": 0.3538,
186311
+ "step": 68335
186312
+ },
186313
+ {
186314
+ "epoch": 550.16,
186315
+ "learning_rate": 8.92125e-06,
186316
+ "loss": 0.449,
186317
+ "step": 68340
186318
+ },
186319
+ {
186320
+ "epoch": 550.2,
186321
+ "learning_rate": 8.921169871794872e-06,
186322
+ "loss": 1.2809,
186323
+ "step": 68345
186324
+ },
186325
+ {
186326
+ "epoch": 550.24,
186327
+ "learning_rate": 8.921089743589745e-06,
186328
+ "loss": 0.3631,
186329
+ "step": 68350
186330
+ },
186331
+ {
186332
+ "epoch": 550.28,
186333
+ "learning_rate": 8.921009615384616e-06,
186334
+ "loss": 0.3321,
186335
+ "step": 68355
186336
+ },
186337
+ {
186338
+ "epoch": 550.32,
186339
+ "learning_rate": 8.920929487179487e-06,
186340
+ "loss": 0.3193,
186341
+ "step": 68360
186342
+ },
186343
+ {
186344
+ "epoch": 550.36,
186345
+ "learning_rate": 8.920849358974359e-06,
186346
+ "loss": 0.4592,
186347
+ "step": 68365
186348
+ },
186349
+ {
186350
+ "epoch": 550.4,
186351
+ "learning_rate": 8.920769230769232e-06,
186352
+ "loss": 1.1757,
186353
+ "step": 68370
186354
+ },
186355
+ {
186356
+ "epoch": 550.44,
186357
+ "learning_rate": 8.920689102564103e-06,
186358
+ "loss": 0.3315,
186359
+ "step": 68375
186360
+ },
186361
+ {
186362
+ "epoch": 550.48,
186363
+ "learning_rate": 8.920608974358975e-06,
186364
+ "loss": 0.316,
186365
+ "step": 68380
186366
+ },
186367
+ {
186368
+ "epoch": 550.52,
186369
+ "learning_rate": 8.920528846153846e-06,
186370
+ "loss": 0.3592,
186371
+ "step": 68385
186372
+ },
186373
+ {
186374
+ "epoch": 550.56,
186375
+ "learning_rate": 8.920448717948719e-06,
186376
+ "loss": 0.5418,
186377
+ "step": 68390
186378
+ },
186379
+ {
186380
+ "epoch": 550.6,
186381
+ "learning_rate": 8.92036858974359e-06,
186382
+ "loss": 1.1373,
186383
+ "step": 68395
186384
+ },
186385
+ {
186386
+ "epoch": 550.64,
186387
+ "learning_rate": 8.920288461538462e-06,
186388
+ "loss": 0.4661,
186389
+ "step": 68400
186390
+ },
186391
+ {
186392
+ "epoch": 550.68,
186393
+ "learning_rate": 8.920208333333335e-06,
186394
+ "loss": 0.3396,
186395
+ "step": 68405
186396
+ },
186397
+ {
186398
+ "epoch": 550.72,
186399
+ "learning_rate": 8.920128205128206e-06,
186400
+ "loss": 0.3348,
186401
+ "step": 68410
186402
+ },
186403
+ {
186404
+ "epoch": 550.76,
186405
+ "learning_rate": 8.920048076923078e-06,
186406
+ "loss": 0.4885,
186407
+ "step": 68415
186408
+ },
186409
+ {
186410
+ "epoch": 550.8,
186411
+ "learning_rate": 8.919967948717949e-06,
186412
+ "loss": 1.2031,
186413
+ "step": 68420
186414
+ },
186415
+ {
186416
+ "epoch": 550.84,
186417
+ "learning_rate": 8.919887820512822e-06,
186418
+ "loss": 0.353,
186419
+ "step": 68425
186420
+ },
186421
+ {
186422
+ "epoch": 550.88,
186423
+ "learning_rate": 8.919807692307693e-06,
186424
+ "loss": 0.3036,
186425
+ "step": 68430
186426
+ },
186427
+ {
186428
+ "epoch": 550.92,
186429
+ "learning_rate": 8.919727564102565e-06,
186430
+ "loss": 0.3383,
186431
+ "step": 68435
186432
+ },
186433
+ {
186434
+ "epoch": 550.96,
186435
+ "learning_rate": 8.919647435897436e-06,
186436
+ "loss": 0.4424,
186437
+ "step": 68440
186438
+ },
186439
+ {
186440
+ "epoch": 551.0,
186441
+ "eval_loss": 0.3531026244163513,
186442
+ "eval_runtime": 41.0545,
186443
+ "eval_samples_per_second": 20.436,
186444
+ "eval_steps_per_second": 0.658,
186445
+ "eval_wer": 0.18525332578545145,
186446
+ "step": 68444
186447
  }
186448
  ],
186449
  "max_steps": 620000,
186450
  "num_train_epochs": 5000,
186451
+ "total_flos": 1.9260183589460607e+20,
186452
  "trial_name": null,
186453
  "trial_params": null
186454
  }
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68444}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bf43d968dbe02ae7a22a40fea3f3b35c92eea6da15a59b6a0bf2b3a511f4915
3
  size 722165393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36abf3a2c5bf4d3050eeafe51b7abe8283b26d662aa895d1c4b2a29d7a740e5
3
  size 722165393
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb6e5067d265b6d0ac7d38a072eaf79353dd3359b87c14179ec86e37964a2578
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93ff5fcd4e89802ee95abc960093c807c19054952e8b77db72016e9b0895371
3
  size 377909911
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e38e660e121c61cfbe53b56b9b19b73618319ff2d4b4f0c08beadc29f3062f73
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3184289183c73c4a5f506139f7799f696e37e4b880a814a87374ea84bfb05744
3
+ size 14503
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87e2d1745c681c91159da5acdbfd8bc474c3ecf40e467f65b9d5603d68c91173
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b48d35ca13554d6b818ee40b007b767588033ef3615fae55bbe7980615d8fa2
3
  size 559
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ca10994f54dd3af978a50aed2d65798b9b5d3bd7145e83c529f6a03bd9f42bf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eab293b67371458c5eedb98ca7c7368a43de31a3f787717c48d0bf8927ac86f1
3
  size 623
model-bin/finetune/base/{checkpoint-67447 β†’ checkpoint-68818}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.1855363713557883,
3
- "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-67447",
4
- "epoch": 542.995983935743,
5
- "global_step": 67447,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -185178,11 +185178,1754 @@
185178
  "eval_steps_per_second": 0.664,
185179
  "eval_wer": 0.1855363713557883,
185180
  "step": 67447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185181
  }
185182
  ],
185183
- "max_steps": 620000,
185184
  "num_train_epochs": 5000,
185185
- "total_flos": 1.897958418039559e+20,
185186
  "trial_name": null,
185187
  "trial_params": null
185188
  }
 
1
  {
2
+ "best_metric": 0.18525332578545145,
3
+ "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-68444",
4
+ "epoch": 549.9960159362549,
5
+ "global_step": 68818,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
185178
  "eval_steps_per_second": 0.664,
185179
  "eval_wer": 0.1855363713557883,
185180
  "step": 67447
185181
+ },
185182
+ {
185183
+ "epoch": 539.02,
185184
+ "learning_rate": 8.935496794871795e-06,
185185
+ "loss": 0.3408,
185186
+ "step": 67450
185187
+ },
185188
+ {
185189
+ "epoch": 539.06,
185190
+ "learning_rate": 8.935416666666668e-06,
185191
+ "loss": 0.3909,
185192
+ "step": 67455
185193
+ },
185194
+ {
185195
+ "epoch": 539.1,
185196
+ "learning_rate": 8.935336538461539e-06,
185197
+ "loss": 0.3071,
185198
+ "step": 67460
185199
+ },
185200
+ {
185201
+ "epoch": 539.14,
185202
+ "learning_rate": 8.93525641025641e-06,
185203
+ "loss": 0.446,
185204
+ "step": 67465
185205
+ },
185206
+ {
185207
+ "epoch": 539.18,
185208
+ "learning_rate": 8.935176282051283e-06,
185209
+ "loss": 0.9182,
185210
+ "step": 67470
185211
+ },
185212
+ {
185213
+ "epoch": 539.22,
185214
+ "learning_rate": 8.935096153846155e-06,
185215
+ "loss": 0.8721,
185216
+ "step": 67475
185217
+ },
185218
+ {
185219
+ "epoch": 539.26,
185220
+ "learning_rate": 8.935016025641026e-06,
185221
+ "loss": 0.2998,
185222
+ "step": 67480
185223
+ },
185224
+ {
185225
+ "epoch": 539.3,
185226
+ "learning_rate": 8.934935897435897e-06,
185227
+ "loss": 0.3355,
185228
+ "step": 67485
185229
+ },
185230
+ {
185231
+ "epoch": 539.34,
185232
+ "learning_rate": 8.93485576923077e-06,
185233
+ "loss": 0.3517,
185234
+ "step": 67490
185235
+ },
185236
+ {
185237
+ "epoch": 539.38,
185238
+ "learning_rate": 8.934775641025642e-06,
185239
+ "loss": 0.7553,
185240
+ "step": 67495
185241
+ },
185242
+ {
185243
+ "epoch": 539.42,
185244
+ "learning_rate": 8.934695512820513e-06,
185245
+ "loss": 0.9665,
185246
+ "step": 67500
185247
+ },
185248
+ {
185249
+ "epoch": 539.46,
185250
+ "learning_rate": 8.934615384615385e-06,
185251
+ "loss": 0.3252,
185252
+ "step": 67505
185253
+ },
185254
+ {
185255
+ "epoch": 539.5,
185256
+ "learning_rate": 8.934535256410258e-06,
185257
+ "loss": 0.4006,
185258
+ "step": 67510
185259
+ },
185260
+ {
185261
+ "epoch": 539.54,
185262
+ "learning_rate": 8.934455128205129e-06,
185263
+ "loss": 0.414,
185264
+ "step": 67515
185265
+ },
185266
+ {
185267
+ "epoch": 539.58,
185268
+ "learning_rate": 8.934375e-06,
185269
+ "loss": 0.7396,
185270
+ "step": 67520
185271
+ },
185272
+ {
185273
+ "epoch": 539.62,
185274
+ "learning_rate": 8.934294871794873e-06,
185275
+ "loss": 1.0143,
185276
+ "step": 67525
185277
+ },
185278
+ {
185279
+ "epoch": 539.66,
185280
+ "learning_rate": 8.934214743589745e-06,
185281
+ "loss": 0.2893,
185282
+ "step": 67530
185283
+ },
185284
+ {
185285
+ "epoch": 539.7,
185286
+ "learning_rate": 8.934134615384616e-06,
185287
+ "loss": 0.2996,
185288
+ "step": 67535
185289
+ },
185290
+ {
185291
+ "epoch": 539.74,
185292
+ "learning_rate": 8.934054487179487e-06,
185293
+ "loss": 0.4244,
185294
+ "step": 67540
185295
+ },
185296
+ {
185297
+ "epoch": 539.78,
185298
+ "learning_rate": 8.93397435897436e-06,
185299
+ "loss": 0.83,
185300
+ "step": 67545
185301
+ },
185302
+ {
185303
+ "epoch": 539.82,
185304
+ "learning_rate": 8.933894230769232e-06,
185305
+ "loss": 1.0249,
185306
+ "step": 67550
185307
+ },
185308
+ {
185309
+ "epoch": 539.86,
185310
+ "learning_rate": 8.933814102564103e-06,
185311
+ "loss": 0.3636,
185312
+ "step": 67555
185313
+ },
185314
+ {
185315
+ "epoch": 539.9,
185316
+ "learning_rate": 8.933733974358975e-06,
185317
+ "loss": 0.3307,
185318
+ "step": 67560
185319
+ },
185320
+ {
185321
+ "epoch": 539.94,
185322
+ "learning_rate": 8.933653846153848e-06,
185323
+ "loss": 0.3467,
185324
+ "step": 67565
185325
+ },
185326
+ {
185327
+ "epoch": 539.98,
185328
+ "learning_rate": 8.933573717948719e-06,
185329
+ "loss": 0.8957,
185330
+ "step": 67570
185331
+ },
185332
+ {
185333
+ "epoch": 540.0,
185334
+ "eval_loss": 0.49278613924980164,
185335
+ "eval_runtime": 38.3149,
185336
+ "eval_samples_per_second": 21.898,
185337
+ "eval_steps_per_second": 0.705,
185338
+ "eval_wer": 0.192304851931172,
185339
+ "step": 67572
185340
+ },
185341
+ {
185342
+ "epoch": 540.02,
185343
+ "learning_rate": 8.93349358974359e-06,
185344
+ "loss": 0.3757,
185345
+ "step": 67575
185346
+ },
185347
+ {
185348
+ "epoch": 540.06,
185349
+ "learning_rate": 8.933413461538463e-06,
185350
+ "loss": 0.3056,
185351
+ "step": 67580
185352
+ },
185353
+ {
185354
+ "epoch": 540.1,
185355
+ "learning_rate": 8.933333333333333e-06,
185356
+ "loss": 0.3568,
185357
+ "step": 67585
185358
+ },
185359
+ {
185360
+ "epoch": 540.14,
185361
+ "learning_rate": 8.933253205128206e-06,
185362
+ "loss": 0.4253,
185363
+ "step": 67590
185364
+ },
185365
+ {
185366
+ "epoch": 540.18,
185367
+ "learning_rate": 8.933173076923077e-06,
185368
+ "loss": 0.9211,
185369
+ "step": 67595
185370
+ },
185371
+ {
185372
+ "epoch": 540.22,
185373
+ "learning_rate": 8.933092948717949e-06,
185374
+ "loss": 1.0014,
185375
+ "step": 67600
185376
+ },
185377
+ {
185378
+ "epoch": 540.26,
185379
+ "learning_rate": 8.93301282051282e-06,
185380
+ "loss": 0.5676,
185381
+ "step": 67605
185382
+ },
185383
+ {
185384
+ "epoch": 540.3,
185385
+ "learning_rate": 8.932932692307693e-06,
185386
+ "loss": 0.3648,
185387
+ "step": 67610
185388
+ },
185389
+ {
185390
+ "epoch": 540.34,
185391
+ "learning_rate": 8.932852564102565e-06,
185392
+ "loss": 0.4003,
185393
+ "step": 67615
185394
+ },
185395
+ {
185396
+ "epoch": 540.38,
185397
+ "learning_rate": 8.932772435897436e-06,
185398
+ "loss": 0.8113,
185399
+ "step": 67620
185400
+ },
185401
+ {
185402
+ "epoch": 540.42,
185403
+ "learning_rate": 8.932692307692309e-06,
185404
+ "loss": 1.0352,
185405
+ "step": 67625
185406
+ },
185407
+ {
185408
+ "epoch": 540.46,
185409
+ "learning_rate": 8.93261217948718e-06,
185410
+ "loss": 0.3178,
185411
+ "step": 67630
185412
+ },
185413
+ {
185414
+ "epoch": 540.5,
185415
+ "learning_rate": 8.932532051282052e-06,
185416
+ "loss": 0.5683,
185417
+ "step": 67635
185418
+ },
185419
+ {
185420
+ "epoch": 540.54,
185421
+ "learning_rate": 8.932451923076923e-06,
185422
+ "loss": 0.3428,
185423
+ "step": 67640
185424
+ },
185425
+ {
185426
+ "epoch": 540.58,
185427
+ "learning_rate": 8.932371794871796e-06,
185428
+ "loss": 0.7413,
185429
+ "step": 67645
185430
+ },
185431
+ {
185432
+ "epoch": 540.62,
185433
+ "learning_rate": 8.932291666666668e-06,
185434
+ "loss": 0.8567,
185435
+ "step": 67650
185436
+ },
185437
+ {
185438
+ "epoch": 540.66,
185439
+ "learning_rate": 8.932211538461539e-06,
185440
+ "loss": 0.314,
185441
+ "step": 67655
185442
+ },
185443
+ {
185444
+ "epoch": 540.7,
185445
+ "learning_rate": 8.93213141025641e-06,
185446
+ "loss": 0.2992,
185447
+ "step": 67660
185448
+ },
185449
+ {
185450
+ "epoch": 540.74,
185451
+ "learning_rate": 8.932051282051283e-06,
185452
+ "loss": 0.3365,
185453
+ "step": 67665
185454
+ },
185455
+ {
185456
+ "epoch": 540.78,
185457
+ "learning_rate": 8.931971153846155e-06,
185458
+ "loss": 0.7477,
185459
+ "step": 67670
185460
+ },
185461
+ {
185462
+ "epoch": 540.82,
185463
+ "learning_rate": 8.931891025641026e-06,
185464
+ "loss": 0.9346,
185465
+ "step": 67675
185466
+ },
185467
+ {
185468
+ "epoch": 540.86,
185469
+ "learning_rate": 8.931810897435899e-06,
185470
+ "loss": 0.3394,
185471
+ "step": 67680
185472
+ },
185473
+ {
185474
+ "epoch": 540.9,
185475
+ "learning_rate": 8.93173076923077e-06,
185476
+ "loss": 0.2948,
185477
+ "step": 67685
185478
+ },
185479
+ {
185480
+ "epoch": 540.94,
185481
+ "learning_rate": 8.931650641025642e-06,
185482
+ "loss": 0.4479,
185483
+ "step": 67690
185484
+ },
185485
+ {
185486
+ "epoch": 540.98,
185487
+ "learning_rate": 8.931570512820513e-06,
185488
+ "loss": 0.7604,
185489
+ "step": 67695
185490
+ },
185491
+ {
185492
+ "epoch": 541.0,
185493
+ "eval_loss": 0.44643455743789673,
185494
+ "eval_runtime": 38.3882,
185495
+ "eval_samples_per_second": 21.856,
185496
+ "eval_steps_per_second": 0.703,
185497
+ "eval_wer": 0.1991250455705432,
185498
+ "step": 67697
185499
+ },
185500
+ {
185501
+ "epoch": 541.02,
185502
+ "learning_rate": 8.931490384615386e-06,
185503
+ "loss": 0.3752,
185504
+ "step": 67700
185505
+ },
185506
+ {
185507
+ "epoch": 541.06,
185508
+ "learning_rate": 8.931410256410258e-06,
185509
+ "loss": 0.2947,
185510
+ "step": 67705
185511
+ },
185512
+ {
185513
+ "epoch": 541.1,
185514
+ "learning_rate": 8.931330128205129e-06,
185515
+ "loss": 0.3437,
185516
+ "step": 67710
185517
+ },
185518
+ {
185519
+ "epoch": 541.14,
185520
+ "learning_rate": 8.93125e-06,
185521
+ "loss": 0.3961,
185522
+ "step": 67715
185523
+ },
185524
+ {
185525
+ "epoch": 541.18,
185526
+ "learning_rate": 8.931169871794873e-06,
185527
+ "loss": 0.7735,
185528
+ "step": 67720
185529
+ },
185530
+ {
185531
+ "epoch": 541.22,
185532
+ "learning_rate": 8.931089743589745e-06,
185533
+ "loss": 1.0214,
185534
+ "step": 67725
185535
+ },
185536
+ {
185537
+ "epoch": 541.26,
185538
+ "learning_rate": 8.931009615384616e-06,
185539
+ "loss": 0.2891,
185540
+ "step": 67730
185541
+ },
185542
+ {
185543
+ "epoch": 541.3,
185544
+ "learning_rate": 8.930929487179489e-06,
185545
+ "loss": 0.2835,
185546
+ "step": 67735
185547
+ },
185548
+ {
185549
+ "epoch": 541.34,
185550
+ "learning_rate": 8.930849358974359e-06,
185551
+ "loss": 0.3882,
185552
+ "step": 67740
185553
+ },
185554
+ {
185555
+ "epoch": 541.38,
185556
+ "learning_rate": 8.930769230769232e-06,
185557
+ "loss": 0.769,
185558
+ "step": 67745
185559
+ },
185560
+ {
185561
+ "epoch": 541.42,
185562
+ "learning_rate": 8.930689102564103e-06,
185563
+ "loss": 0.7533,
185564
+ "step": 67750
185565
+ },
185566
+ {
185567
+ "epoch": 541.46,
185568
+ "learning_rate": 8.930608974358975e-06,
185569
+ "loss": 0.2833,
185570
+ "step": 67755
185571
+ },
185572
+ {
185573
+ "epoch": 541.5,
185574
+ "learning_rate": 8.930528846153846e-06,
185575
+ "loss": 0.2899,
185576
+ "step": 67760
185577
+ },
185578
+ {
185579
+ "epoch": 541.54,
185580
+ "learning_rate": 8.930448717948719e-06,
185581
+ "loss": 0.4753,
185582
+ "step": 67765
185583
+ },
185584
+ {
185585
+ "epoch": 541.58,
185586
+ "learning_rate": 8.93036858974359e-06,
185587
+ "loss": 0.7755,
185588
+ "step": 67770
185589
+ },
185590
+ {
185591
+ "epoch": 541.62,
185592
+ "learning_rate": 8.930288461538462e-06,
185593
+ "loss": 0.8648,
185594
+ "step": 67775
185595
+ },
185596
+ {
185597
+ "epoch": 541.66,
185598
+ "learning_rate": 8.930208333333335e-06,
185599
+ "loss": 0.2927,
185600
+ "step": 67780
185601
+ },
185602
+ {
185603
+ "epoch": 541.7,
185604
+ "learning_rate": 8.930128205128206e-06,
185605
+ "loss": 0.3099,
185606
+ "step": 67785
185607
+ },
185608
+ {
185609
+ "epoch": 541.74,
185610
+ "learning_rate": 8.930048076923077e-06,
185611
+ "loss": 0.3615,
185612
+ "step": 67790
185613
+ },
185614
+ {
185615
+ "epoch": 541.78,
185616
+ "learning_rate": 8.929967948717949e-06,
185617
+ "loss": 0.8285,
185618
+ "step": 67795
185619
+ },
185620
+ {
185621
+ "epoch": 541.82,
185622
+ "learning_rate": 8.929887820512822e-06,
185623
+ "loss": 0.8921,
185624
+ "step": 67800
185625
+ },
185626
+ {
185627
+ "epoch": 541.86,
185628
+ "learning_rate": 8.929807692307693e-06,
185629
+ "loss": 0.3251,
185630
+ "step": 67805
185631
+ },
185632
+ {
185633
+ "epoch": 541.9,
185634
+ "learning_rate": 8.929727564102565e-06,
185635
+ "loss": 0.338,
185636
+ "step": 67810
185637
+ },
185638
+ {
185639
+ "epoch": 541.94,
185640
+ "learning_rate": 8.929647435897436e-06,
185641
+ "loss": 0.408,
185642
+ "step": 67815
185643
+ },
185644
+ {
185645
+ "epoch": 541.98,
185646
+ "learning_rate": 8.929567307692309e-06,
185647
+ "loss": 0.7369,
185648
+ "step": 67820
185649
+ },
185650
+ {
185651
+ "epoch": 542.0,
185652
+ "eval_loss": 0.3617897629737854,
185653
+ "eval_runtime": 39.0831,
185654
+ "eval_samples_per_second": 21.441,
185655
+ "eval_steps_per_second": 0.691,
185656
+ "eval_wer": 0.19498813760379596,
185657
+ "step": 67822
185658
+ },
185659
+ {
185660
+ "epoch": 542.02,
185661
+ "learning_rate": 8.92948717948718e-06,
185662
+ "loss": 0.482,
185663
+ "step": 67825
185664
+ },
185665
+ {
185666
+ "epoch": 542.06,
185667
+ "learning_rate": 8.929407051282052e-06,
185668
+ "loss": 0.3239,
185669
+ "step": 67830
185670
+ },
185671
+ {
185672
+ "epoch": 542.1,
185673
+ "learning_rate": 8.929326923076925e-06,
185674
+ "loss": 0.2679,
185675
+ "step": 67835
185676
+ },
185677
+ {
185678
+ "epoch": 542.14,
185679
+ "learning_rate": 8.929246794871796e-06,
185680
+ "loss": 0.3645,
185681
+ "step": 67840
185682
+ },
185683
+ {
185684
+ "epoch": 542.18,
185685
+ "learning_rate": 8.929166666666667e-06,
185686
+ "loss": 0.8296,
185687
+ "step": 67845
185688
+ },
185689
+ {
185690
+ "epoch": 542.22,
185691
+ "learning_rate": 8.929086538461539e-06,
185692
+ "loss": 0.8821,
185693
+ "step": 67850
185694
+ },
185695
+ {
185696
+ "epoch": 542.26,
185697
+ "learning_rate": 8.929006410256412e-06,
185698
+ "loss": 0.3484,
185699
+ "step": 67855
185700
+ },
185701
+ {
185702
+ "epoch": 542.3,
185703
+ "learning_rate": 8.928926282051282e-06,
185704
+ "loss": 0.4008,
185705
+ "step": 67860
185706
+ },
185707
+ {
185708
+ "epoch": 542.34,
185709
+ "learning_rate": 8.928846153846155e-06,
185710
+ "loss": 0.3902,
185711
+ "step": 67865
185712
+ },
185713
+ {
185714
+ "epoch": 542.38,
185715
+ "learning_rate": 8.928766025641028e-06,
185716
+ "loss": 0.7359,
185717
+ "step": 67870
185718
+ },
185719
+ {
185720
+ "epoch": 542.42,
185721
+ "learning_rate": 8.928685897435897e-06,
185722
+ "loss": 1.0072,
185723
+ "step": 67875
185724
+ },
185725
+ {
185726
+ "epoch": 542.46,
185727
+ "learning_rate": 8.92860576923077e-06,
185728
+ "loss": 0.2912,
185729
+ "step": 67880
185730
+ },
185731
+ {
185732
+ "epoch": 542.5,
185733
+ "learning_rate": 8.928525641025642e-06,
185734
+ "loss": 0.3905,
185735
+ "step": 67885
185736
+ },
185737
+ {
185738
+ "epoch": 542.54,
185739
+ "learning_rate": 8.928445512820513e-06,
185740
+ "loss": 0.381,
185741
+ "step": 67890
185742
+ },
185743
+ {
185744
+ "epoch": 542.58,
185745
+ "learning_rate": 8.928365384615384e-06,
185746
+ "loss": 0.7648,
185747
+ "step": 67895
185748
+ },
185749
+ {
185750
+ "epoch": 542.62,
185751
+ "learning_rate": 8.928285256410257e-06,
185752
+ "loss": 0.8711,
185753
+ "step": 67900
185754
+ },
185755
+ {
185756
+ "epoch": 542.66,
185757
+ "learning_rate": 8.928205128205129e-06,
185758
+ "loss": 0.2766,
185759
+ "step": 67905
185760
+ },
185761
+ {
185762
+ "epoch": 542.7,
185763
+ "learning_rate": 8.928125e-06,
185764
+ "loss": 0.3258,
185765
+ "step": 67910
185766
+ },
185767
+ {
185768
+ "epoch": 542.74,
185769
+ "learning_rate": 8.928044871794872e-06,
185770
+ "loss": 0.3602,
185771
+ "step": 67915
185772
+ },
185773
+ {
185774
+ "epoch": 542.78,
185775
+ "learning_rate": 8.927964743589745e-06,
185776
+ "loss": 0.7964,
185777
+ "step": 67920
185778
+ },
185779
+ {
185780
+ "epoch": 542.82,
185781
+ "learning_rate": 8.927884615384616e-06,
185782
+ "loss": 1.025,
185783
+ "step": 67925
185784
+ },
185785
+ {
185786
+ "epoch": 542.86,
185787
+ "learning_rate": 8.927804487179487e-06,
185788
+ "loss": 0.2855,
185789
+ "step": 67930
185790
+ },
185791
+ {
185792
+ "epoch": 542.9,
185793
+ "learning_rate": 8.92772435897436e-06,
185794
+ "loss": 0.3043,
185795
+ "step": 67935
185796
+ },
185797
+ {
185798
+ "epoch": 542.94,
185799
+ "learning_rate": 8.927644230769232e-06,
185800
+ "loss": 0.3756,
185801
+ "step": 67940
185802
+ },
185803
+ {
185804
+ "epoch": 542.98,
185805
+ "learning_rate": 8.927564102564103e-06,
185806
+ "loss": 0.8851,
185807
+ "step": 67945
185808
+ },
185809
+ {
185810
+ "epoch": 543.0,
185811
+ "eval_loss": 0.3958238661289215,
185812
+ "eval_runtime": 39.4545,
185813
+ "eval_samples_per_second": 21.24,
185814
+ "eval_steps_per_second": 0.684,
185815
+ "eval_wer": 0.19044464075382803,
185816
+ "step": 67947
185817
+ },
185818
+ {
185819
+ "epoch": 547.02,
185820
+ "learning_rate": 8.927483974358974e-06,
185821
+ "loss": 0.3812,
185822
+ "step": 67950
185823
+ },
185824
+ {
185825
+ "epoch": 547.06,
185826
+ "learning_rate": 8.927403846153847e-06,
185827
+ "loss": 0.3034,
185828
+ "step": 67955
185829
+ },
185830
+ {
185831
+ "epoch": 547.1,
185832
+ "learning_rate": 8.927323717948719e-06,
185833
+ "loss": 0.3513,
185834
+ "step": 67960
185835
+ },
185836
+ {
185837
+ "epoch": 547.14,
185838
+ "learning_rate": 8.92724358974359e-06,
185839
+ "loss": 0.4248,
185840
+ "step": 67965
185841
+ },
185842
+ {
185843
+ "epoch": 547.18,
185844
+ "learning_rate": 8.927163461538463e-06,
185845
+ "loss": 0.579,
185846
+ "step": 67970
185847
+ },
185848
+ {
185849
+ "epoch": 547.22,
185850
+ "learning_rate": 8.927083333333335e-06,
185851
+ "loss": 0.9471,
185852
+ "step": 67975
185853
+ },
185854
+ {
185855
+ "epoch": 547.27,
185856
+ "learning_rate": 8.927003205128206e-06,
185857
+ "loss": 0.3662,
185858
+ "step": 67980
185859
+ },
185860
+ {
185861
+ "epoch": 547.31,
185862
+ "learning_rate": 8.926923076923077e-06,
185863
+ "loss": 0.3769,
185864
+ "step": 67985
185865
+ },
185866
+ {
185867
+ "epoch": 547.35,
185868
+ "learning_rate": 8.92684294871795e-06,
185869
+ "loss": 0.4178,
185870
+ "step": 67990
185871
+ },
185872
+ {
185873
+ "epoch": 547.39,
185874
+ "learning_rate": 8.926762820512822e-06,
185875
+ "loss": 0.703,
185876
+ "step": 67995
185877
+ },
185878
+ {
185879
+ "epoch": 547.43,
185880
+ "learning_rate": 8.926682692307693e-06,
185881
+ "loss": 1.0705,
185882
+ "step": 68000
185883
+ },
185884
+ {
185885
+ "epoch": 547.47,
185886
+ "learning_rate": 8.926602564102564e-06,
185887
+ "loss": 0.3506,
185888
+ "step": 68005
185889
+ },
185890
+ {
185891
+ "epoch": 547.51,
185892
+ "learning_rate": 8.926522435897438e-06,
185893
+ "loss": 0.3535,
185894
+ "step": 68010
185895
+ },
185896
+ {
185897
+ "epoch": 547.55,
185898
+ "learning_rate": 8.926442307692307e-06,
185899
+ "loss": 0.3949,
185900
+ "step": 68015
185901
+ },
185902
+ {
185903
+ "epoch": 547.59,
185904
+ "learning_rate": 8.92636217948718e-06,
185905
+ "loss": 0.7904,
185906
+ "step": 68020
185907
+ },
185908
+ {
185909
+ "epoch": 547.63,
185910
+ "learning_rate": 8.926282051282053e-06,
185911
+ "loss": 0.9079,
185912
+ "step": 68025
185913
+ },
185914
+ {
185915
+ "epoch": 547.67,
185916
+ "learning_rate": 8.926201923076923e-06,
185917
+ "loss": 0.4741,
185918
+ "step": 68030
185919
+ },
185920
+ {
185921
+ "epoch": 547.71,
185922
+ "learning_rate": 8.926121794871796e-06,
185923
+ "loss": 0.3193,
185924
+ "step": 68035
185925
+ },
185926
+ {
185927
+ "epoch": 547.75,
185928
+ "learning_rate": 8.926041666666667e-06,
185929
+ "loss": 0.3734,
185930
+ "step": 68040
185931
+ },
185932
+ {
185933
+ "epoch": 547.79,
185934
+ "learning_rate": 8.925961538461539e-06,
185935
+ "loss": 0.7489,
185936
+ "step": 68045
185937
+ },
185938
+ {
185939
+ "epoch": 547.83,
185940
+ "learning_rate": 8.92588141025641e-06,
185941
+ "loss": 1.1998,
185942
+ "step": 68050
185943
+ },
185944
+ {
185945
+ "epoch": 547.87,
185946
+ "learning_rate": 8.925801282051283e-06,
185947
+ "loss": 0.321,
185948
+ "step": 68055
185949
+ },
185950
+ {
185951
+ "epoch": 547.91,
185952
+ "learning_rate": 8.925721153846154e-06,
185953
+ "loss": 0.3205,
185954
+ "step": 68060
185955
+ },
185956
+ {
185957
+ "epoch": 547.95,
185958
+ "learning_rate": 8.925641025641026e-06,
185959
+ "loss": 0.4711,
185960
+ "step": 68065
185961
+ },
185962
+ {
185963
+ "epoch": 547.99,
185964
+ "learning_rate": 8.925560897435899e-06,
185965
+ "loss": 0.897,
185966
+ "step": 68070
185967
+ },
185968
+ {
185969
+ "epoch": 548.0,
185970
+ "eval_loss": 0.3655170798301697,
185971
+ "eval_runtime": 38.9264,
185972
+ "eval_samples_per_second": 21.553,
185973
+ "eval_steps_per_second": 0.694,
185974
+ "eval_wer": 0.19745080500894455,
185975
+ "step": 68071
185976
+ },
185977
+ {
185978
+ "epoch": 548.03,
185979
+ "learning_rate": 8.92548076923077e-06,
185980
+ "loss": 0.3636,
185981
+ "step": 68075
185982
+ },
185983
+ {
185984
+ "epoch": 548.07,
185985
+ "learning_rate": 8.925400641025642e-06,
185986
+ "loss": 0.3824,
185987
+ "step": 68080
185988
+ },
185989
+ {
185990
+ "epoch": 548.11,
185991
+ "learning_rate": 8.925320512820513e-06,
185992
+ "loss": 0.3019,
185993
+ "step": 68085
185994
+ },
185995
+ {
185996
+ "epoch": 548.15,
185997
+ "learning_rate": 8.925240384615386e-06,
185998
+ "loss": 0.3797,
185999
+ "step": 68090
186000
+ },
186001
+ {
186002
+ "epoch": 548.19,
186003
+ "learning_rate": 8.925160256410257e-06,
186004
+ "loss": 0.9859,
186005
+ "step": 68095
186006
+ },
186007
+ {
186008
+ "epoch": 548.23,
186009
+ "learning_rate": 8.925080128205129e-06,
186010
+ "loss": 0.7557,
186011
+ "step": 68100
186012
+ },
186013
+ {
186014
+ "epoch": 548.27,
186015
+ "learning_rate": 8.925e-06,
186016
+ "loss": 0.359,
186017
+ "step": 68105
186018
+ },
186019
+ {
186020
+ "epoch": 548.31,
186021
+ "learning_rate": 8.924919871794873e-06,
186022
+ "loss": 0.273,
186023
+ "step": 68110
186024
+ },
186025
+ {
186026
+ "epoch": 548.35,
186027
+ "learning_rate": 8.924839743589745e-06,
186028
+ "loss": 0.4286,
186029
+ "step": 68115
186030
+ },
186031
+ {
186032
+ "epoch": 548.39,
186033
+ "learning_rate": 8.924759615384616e-06,
186034
+ "loss": 0.8112,
186035
+ "step": 68120
186036
+ },
186037
+ {
186038
+ "epoch": 548.43,
186039
+ "learning_rate": 8.924679487179489e-06,
186040
+ "loss": 0.6704,
186041
+ "step": 68125
186042
+ },
186043
+ {
186044
+ "epoch": 548.47,
186045
+ "learning_rate": 8.92459935897436e-06,
186046
+ "loss": 0.3017,
186047
+ "step": 68130
186048
+ },
186049
+ {
186050
+ "epoch": 548.51,
186051
+ "learning_rate": 8.924519230769232e-06,
186052
+ "loss": 0.3403,
186053
+ "step": 68135
186054
+ },
186055
+ {
186056
+ "epoch": 548.55,
186057
+ "learning_rate": 8.924439102564103e-06,
186058
+ "loss": 0.4607,
186059
+ "step": 68140
186060
+ },
186061
+ {
186062
+ "epoch": 548.59,
186063
+ "learning_rate": 8.924358974358976e-06,
186064
+ "loss": 1.0333,
186065
+ "step": 68145
186066
+ },
186067
+ {
186068
+ "epoch": 548.63,
186069
+ "learning_rate": 8.924278846153846e-06,
186070
+ "loss": 0.8082,
186071
+ "step": 68150
186072
+ },
186073
+ {
186074
+ "epoch": 548.67,
186075
+ "learning_rate": 8.924198717948719e-06,
186076
+ "loss": 0.2737,
186077
+ "step": 68155
186078
+ },
186079
+ {
186080
+ "epoch": 548.71,
186081
+ "learning_rate": 8.92411858974359e-06,
186082
+ "loss": 0.283,
186083
+ "step": 68160
186084
+ },
186085
+ {
186086
+ "epoch": 548.76,
186087
+ "learning_rate": 8.924038461538461e-06,
186088
+ "loss": 0.5159,
186089
+ "step": 68165
186090
+ },
186091
+ {
186092
+ "epoch": 548.8,
186093
+ "learning_rate": 8.923958333333335e-06,
186094
+ "loss": 1.1066,
186095
+ "step": 68170
186096
+ },
186097
+ {
186098
+ "epoch": 548.84,
186099
+ "learning_rate": 8.923878205128206e-06,
186100
+ "loss": 0.7521,
186101
+ "step": 68175
186102
+ },
186103
+ {
186104
+ "epoch": 548.88,
186105
+ "learning_rate": 8.923798076923077e-06,
186106
+ "loss": 0.3451,
186107
+ "step": 68180
186108
+ },
186109
+ {
186110
+ "epoch": 548.92,
186111
+ "learning_rate": 8.923717948717949e-06,
186112
+ "loss": 0.3402,
186113
+ "step": 68185
186114
+ },
186115
+ {
186116
+ "epoch": 548.96,
186117
+ "learning_rate": 8.923637820512822e-06,
186118
+ "loss": 0.4594,
186119
+ "step": 68190
186120
+ },
186121
+ {
186122
+ "epoch": 549.0,
186123
+ "learning_rate": 8.923573717948719e-06,
186124
+ "loss": 1.3483,
186125
+ "step": 68195
186126
+ },
186127
+ {
186128
+ "epoch": 549.0,
186129
+ "eval_loss": 0.38058263063430786,
186130
+ "eval_runtime": 39.1105,
186131
+ "eval_samples_per_second": 21.452,
186132
+ "eval_steps_per_second": 0.69,
186133
+ "eval_wer": 0.1986700537955768,
186134
+ "step": 68195
186135
+ },
186136
+ {
186137
+ "epoch": 545.04,
186138
+ "learning_rate": 8.92349358974359e-06,
186139
+ "loss": 0.3853,
186140
+ "step": 68200
186141
+ },
186142
+ {
186143
+ "epoch": 545.08,
186144
+ "learning_rate": 8.923413461538464e-06,
186145
+ "loss": 0.2899,
186146
+ "step": 68205
186147
+ },
186148
+ {
186149
+ "epoch": 545.12,
186150
+ "learning_rate": 8.923333333333333e-06,
186151
+ "loss": 0.3688,
186152
+ "step": 68210
186153
+ },
186154
+ {
186155
+ "epoch": 545.16,
186156
+ "learning_rate": 8.923253205128206e-06,
186157
+ "loss": 0.6488,
186158
+ "step": 68215
186159
+ },
186160
+ {
186161
+ "epoch": 545.2,
186162
+ "learning_rate": 8.923173076923078e-06,
186163
+ "loss": 1.2436,
186164
+ "step": 68220
186165
+ },
186166
+ {
186167
+ "epoch": 545.24,
186168
+ "learning_rate": 8.923092948717949e-06,
186169
+ "loss": 0.3309,
186170
+ "step": 68225
186171
+ },
186172
+ {
186173
+ "epoch": 545.28,
186174
+ "learning_rate": 8.92301282051282e-06,
186175
+ "loss": 0.3145,
186176
+ "step": 68230
186177
+ },
186178
+ {
186179
+ "epoch": 545.32,
186180
+ "learning_rate": 8.922932692307693e-06,
186181
+ "loss": 0.3409,
186182
+ "step": 68235
186183
+ },
186184
+ {
186185
+ "epoch": 545.36,
186186
+ "learning_rate": 8.922852564102565e-06,
186187
+ "loss": 0.446,
186188
+ "step": 68240
186189
+ },
186190
+ {
186191
+ "epoch": 545.4,
186192
+ "learning_rate": 8.922772435897436e-06,
186193
+ "loss": 1.1688,
186194
+ "step": 68245
186195
+ },
186196
+ {
186197
+ "epoch": 545.44,
186198
+ "learning_rate": 8.922692307692309e-06,
186199
+ "loss": 0.3211,
186200
+ "step": 68250
186201
+ },
186202
+ {
186203
+ "epoch": 545.48,
186204
+ "learning_rate": 8.92261217948718e-06,
186205
+ "loss": 0.366,
186206
+ "step": 68255
186207
+ },
186208
+ {
186209
+ "epoch": 545.52,
186210
+ "learning_rate": 8.922532051282052e-06,
186211
+ "loss": 0.3367,
186212
+ "step": 68260
186213
+ },
186214
+ {
186215
+ "epoch": 545.56,
186216
+ "learning_rate": 8.922451923076923e-06,
186217
+ "loss": 0.4458,
186218
+ "step": 68265
186219
+ },
186220
+ {
186221
+ "epoch": 545.6,
186222
+ "learning_rate": 8.922371794871796e-06,
186223
+ "loss": 1.3543,
186224
+ "step": 68270
186225
+ },
186226
+ {
186227
+ "epoch": 545.64,
186228
+ "learning_rate": 8.922291666666668e-06,
186229
+ "loss": 0.4659,
186230
+ "step": 68275
186231
+ },
186232
+ {
186233
+ "epoch": 545.68,
186234
+ "learning_rate": 8.922211538461539e-06,
186235
+ "loss": 0.294,
186236
+ "step": 68280
186237
+ },
186238
+ {
186239
+ "epoch": 545.72,
186240
+ "learning_rate": 8.92213141025641e-06,
186241
+ "loss": 0.3322,
186242
+ "step": 68285
186243
+ },
186244
+ {
186245
+ "epoch": 545.76,
186246
+ "learning_rate": 8.922051282051283e-06,
186247
+ "loss": 0.7161,
186248
+ "step": 68290
186249
+ },
186250
+ {
186251
+ "epoch": 545.8,
186252
+ "learning_rate": 8.921971153846155e-06,
186253
+ "loss": 1.387,
186254
+ "step": 68295
186255
+ },
186256
+ {
186257
+ "epoch": 545.84,
186258
+ "learning_rate": 8.921891025641026e-06,
186259
+ "loss": 0.3497,
186260
+ "step": 68300
186261
+ },
186262
+ {
186263
+ "epoch": 545.88,
186264
+ "learning_rate": 8.921810897435899e-06,
186265
+ "loss": 0.6459,
186266
+ "step": 68305
186267
+ },
186268
+ {
186269
+ "epoch": 545.92,
186270
+ "learning_rate": 8.92173076923077e-06,
186271
+ "loss": 0.3734,
186272
+ "step": 68310
186273
+ },
186274
+ {
186275
+ "epoch": 545.96,
186276
+ "learning_rate": 8.921650641025642e-06,
186277
+ "loss": 0.5237,
186278
+ "step": 68315
186279
+ },
186280
+ {
186281
+ "epoch": 546.0,
186282
+ "learning_rate": 8.921570512820513e-06,
186283
+ "loss": 1.6727,
186284
+ "step": 68320
186285
+ },
186286
+ {
186287
+ "epoch": 546.0,
186288
+ "eval_loss": 0.47506964206695557,
186289
+ "eval_runtime": 41.5276,
186290
+ "eval_samples_per_second": 20.203,
186291
+ "eval_steps_per_second": 0.65,
186292
+ "eval_wer": 0.1952493381984689,
186293
+ "step": 68320
186294
+ },
186295
+ {
186296
+ "epoch": 550.04,
186297
+ "learning_rate": 8.921490384615386e-06,
186298
+ "loss": 0.4511,
186299
+ "step": 68325
186300
+ },
186301
+ {
186302
+ "epoch": 550.08,
186303
+ "learning_rate": 8.921410256410256e-06,
186304
+ "loss": 0.2694,
186305
+ "step": 68330
186306
+ },
186307
+ {
186308
+ "epoch": 550.12,
186309
+ "learning_rate": 8.921330128205129e-06,
186310
+ "loss": 0.3538,
186311
+ "step": 68335
186312
+ },
186313
+ {
186314
+ "epoch": 550.16,
186315
+ "learning_rate": 8.92125e-06,
186316
+ "loss": 0.449,
186317
+ "step": 68340
186318
+ },
186319
+ {
186320
+ "epoch": 550.2,
186321
+ "learning_rate": 8.921169871794872e-06,
186322
+ "loss": 1.2809,
186323
+ "step": 68345
186324
+ },
186325
+ {
186326
+ "epoch": 550.24,
186327
+ "learning_rate": 8.921089743589745e-06,
186328
+ "loss": 0.3631,
186329
+ "step": 68350
186330
+ },
186331
+ {
186332
+ "epoch": 550.28,
186333
+ "learning_rate": 8.921009615384616e-06,
186334
+ "loss": 0.3321,
186335
+ "step": 68355
186336
+ },
186337
+ {
186338
+ "epoch": 550.32,
186339
+ "learning_rate": 8.920929487179487e-06,
186340
+ "loss": 0.3193,
186341
+ "step": 68360
186342
+ },
186343
+ {
186344
+ "epoch": 550.36,
186345
+ "learning_rate": 8.920849358974359e-06,
186346
+ "loss": 0.4592,
186347
+ "step": 68365
186348
+ },
186349
+ {
186350
+ "epoch": 550.4,
186351
+ "learning_rate": 8.920769230769232e-06,
186352
+ "loss": 1.1757,
186353
+ "step": 68370
186354
+ },
186355
+ {
186356
+ "epoch": 550.44,
186357
+ "learning_rate": 8.920689102564103e-06,
186358
+ "loss": 0.3315,
186359
+ "step": 68375
186360
+ },
186361
+ {
186362
+ "epoch": 550.48,
186363
+ "learning_rate": 8.920608974358975e-06,
186364
+ "loss": 0.316,
186365
+ "step": 68380
186366
+ },
186367
+ {
186368
+ "epoch": 550.52,
186369
+ "learning_rate": 8.920528846153846e-06,
186370
+ "loss": 0.3592,
186371
+ "step": 68385
186372
+ },
186373
+ {
186374
+ "epoch": 550.56,
186375
+ "learning_rate": 8.920448717948719e-06,
186376
+ "loss": 0.5418,
186377
+ "step": 68390
186378
+ },
186379
+ {
186380
+ "epoch": 550.6,
186381
+ "learning_rate": 8.92036858974359e-06,
186382
+ "loss": 1.1373,
186383
+ "step": 68395
186384
+ },
186385
+ {
186386
+ "epoch": 550.64,
186387
+ "learning_rate": 8.920288461538462e-06,
186388
+ "loss": 0.4661,
186389
+ "step": 68400
186390
+ },
186391
+ {
186392
+ "epoch": 550.68,
186393
+ "learning_rate": 8.920208333333335e-06,
186394
+ "loss": 0.3396,
186395
+ "step": 68405
186396
+ },
186397
+ {
186398
+ "epoch": 550.72,
186399
+ "learning_rate": 8.920128205128206e-06,
186400
+ "loss": 0.3348,
186401
+ "step": 68410
186402
+ },
186403
+ {
186404
+ "epoch": 550.76,
186405
+ "learning_rate": 8.920048076923078e-06,
186406
+ "loss": 0.4885,
186407
+ "step": 68415
186408
+ },
186409
+ {
186410
+ "epoch": 550.8,
186411
+ "learning_rate": 8.919967948717949e-06,
186412
+ "loss": 1.2031,
186413
+ "step": 68420
186414
+ },
186415
+ {
186416
+ "epoch": 550.84,
186417
+ "learning_rate": 8.919887820512822e-06,
186418
+ "loss": 0.353,
186419
+ "step": 68425
186420
+ },
186421
+ {
186422
+ "epoch": 550.88,
186423
+ "learning_rate": 8.919807692307693e-06,
186424
+ "loss": 0.3036,
186425
+ "step": 68430
186426
+ },
186427
+ {
186428
+ "epoch": 550.92,
186429
+ "learning_rate": 8.919727564102565e-06,
186430
+ "loss": 0.3383,
186431
+ "step": 68435
186432
+ },
186433
+ {
186434
+ "epoch": 550.96,
186435
+ "learning_rate": 8.919647435897436e-06,
186436
+ "loss": 0.4424,
186437
+ "step": 68440
186438
+ },
186439
+ {
186440
+ "epoch": 551.0,
186441
+ "eval_loss": 0.3531026244163513,
186442
+ "eval_runtime": 41.0545,
186443
+ "eval_samples_per_second": 20.436,
186444
+ "eval_steps_per_second": 0.658,
186445
+ "eval_wer": 0.18525332578545145,
186446
+ "step": 68444
186447
+ },
186448
+ {
186449
+ "epoch": 551.01,
186450
+ "learning_rate": 8.919567307692309e-06,
186451
+ "loss": 0.3373,
186452
+ "step": 68445
186453
+ },
186454
+ {
186455
+ "epoch": 551.05,
186456
+ "learning_rate": 8.91948717948718e-06,
186457
+ "loss": 0.2903,
186458
+ "step": 68450
186459
+ },
186460
+ {
186461
+ "epoch": 551.09,
186462
+ "learning_rate": 8.919407051282052e-06,
186463
+ "loss": 0.2579,
186464
+ "step": 68455
186465
+ },
186466
+ {
186467
+ "epoch": 551.13,
186468
+ "learning_rate": 8.919326923076925e-06,
186469
+ "loss": 0.3771,
186470
+ "step": 68460
186471
+ },
186472
+ {
186473
+ "epoch": 551.17,
186474
+ "learning_rate": 8.919246794871796e-06,
186475
+ "loss": 0.5803,
186476
+ "step": 68465
186477
+ },
186478
+ {
186479
+ "epoch": 551.21,
186480
+ "learning_rate": 8.919166666666668e-06,
186481
+ "loss": 1.1431,
186482
+ "step": 68470
186483
+ },
186484
+ {
186485
+ "epoch": 551.25,
186486
+ "learning_rate": 8.919086538461539e-06,
186487
+ "loss": 0.3321,
186488
+ "step": 68475
186489
+ },
186490
+ {
186491
+ "epoch": 551.29,
186492
+ "learning_rate": 8.919006410256412e-06,
186493
+ "loss": 0.3079,
186494
+ "step": 68480
186495
+ },
186496
+ {
186497
+ "epoch": 551.33,
186498
+ "learning_rate": 8.918926282051282e-06,
186499
+ "loss": 0.3175,
186500
+ "step": 68485
186501
+ },
186502
+ {
186503
+ "epoch": 551.37,
186504
+ "learning_rate": 8.918846153846155e-06,
186505
+ "loss": 0.6332,
186506
+ "step": 68490
186507
+ },
186508
+ {
186509
+ "epoch": 551.41,
186510
+ "learning_rate": 8.918766025641028e-06,
186511
+ "loss": 1.0984,
186512
+ "step": 68495
186513
+ },
186514
+ {
186515
+ "epoch": 551.45,
186516
+ "learning_rate": 8.918685897435897e-06,
186517
+ "loss": 0.3226,
186518
+ "step": 68500
186519
+ },
186520
+ {
186521
+ "epoch": 551.49,
186522
+ "learning_rate": 8.91860576923077e-06,
186523
+ "loss": 0.277,
186524
+ "step": 68505
186525
+ },
186526
+ {
186527
+ "epoch": 551.53,
186528
+ "learning_rate": 8.918525641025642e-06,
186529
+ "loss": 0.3595,
186530
+ "step": 68510
186531
+ },
186532
+ {
186533
+ "epoch": 551.57,
186534
+ "learning_rate": 8.918445512820513e-06,
186535
+ "loss": 0.6192,
186536
+ "step": 68515
186537
+ },
186538
+ {
186539
+ "epoch": 551.61,
186540
+ "learning_rate": 8.918365384615385e-06,
186541
+ "loss": 1.1009,
186542
+ "step": 68520
186543
+ },
186544
+ {
186545
+ "epoch": 551.65,
186546
+ "learning_rate": 8.918285256410258e-06,
186547
+ "loss": 0.3705,
186548
+ "step": 68525
186549
+ },
186550
+ {
186551
+ "epoch": 551.69,
186552
+ "learning_rate": 8.918205128205129e-06,
186553
+ "loss": 0.2911,
186554
+ "step": 68530
186555
+ },
186556
+ {
186557
+ "epoch": 551.73,
186558
+ "learning_rate": 8.918125e-06,
186559
+ "loss": 0.3628,
186560
+ "step": 68535
186561
+ },
186562
+ {
186563
+ "epoch": 551.77,
186564
+ "learning_rate": 8.918044871794872e-06,
186565
+ "loss": 0.533,
186566
+ "step": 68540
186567
+ },
186568
+ {
186569
+ "epoch": 551.81,
186570
+ "learning_rate": 8.917964743589745e-06,
186571
+ "loss": 1.2326,
186572
+ "step": 68545
186573
+ },
186574
+ {
186575
+ "epoch": 551.85,
186576
+ "learning_rate": 8.917884615384616e-06,
186577
+ "loss": 0.3388,
186578
+ "step": 68550
186579
+ },
186580
+ {
186581
+ "epoch": 551.89,
186582
+ "learning_rate": 8.917804487179487e-06,
186583
+ "loss": 0.3131,
186584
+ "step": 68555
186585
+ },
186586
+ {
186587
+ "epoch": 551.93,
186588
+ "learning_rate": 8.91772435897436e-06,
186589
+ "loss": 0.3515,
186590
+ "step": 68560
186591
+ },
186592
+ {
186593
+ "epoch": 551.97,
186594
+ "learning_rate": 8.917644230769232e-06,
186595
+ "loss": 0.6563,
186596
+ "step": 68565
186597
+ },
186598
+ {
186599
+ "epoch": 552.0,
186600
+ "eval_loss": 0.4913657009601593,
186601
+ "eval_runtime": 39.6316,
186602
+ "eval_samples_per_second": 21.17,
186603
+ "eval_steps_per_second": 0.681,
186604
+ "eval_wer": 0.19267410087881248,
186605
+ "step": 68568
186606
+ },
186607
+ {
186608
+ "epoch": 548.02,
186609
+ "learning_rate": 8.917564102564103e-06,
186610
+ "loss": 0.4739,
186611
+ "step": 68570
186612
+ },
186613
+ {
186614
+ "epoch": 548.06,
186615
+ "learning_rate": 8.917483974358975e-06,
186616
+ "loss": 0.316,
186617
+ "step": 68575
186618
+ },
186619
+ {
186620
+ "epoch": 548.1,
186621
+ "learning_rate": 8.917403846153848e-06,
186622
+ "loss": 0.3383,
186623
+ "step": 68580
186624
+ },
186625
+ {
186626
+ "epoch": 548.14,
186627
+ "learning_rate": 8.917323717948719e-06,
186628
+ "loss": 0.385,
186629
+ "step": 68585
186630
+ },
186631
+ {
186632
+ "epoch": 548.18,
186633
+ "learning_rate": 8.91724358974359e-06,
186634
+ "loss": 0.6467,
186635
+ "step": 68590
186636
+ },
186637
+ {
186638
+ "epoch": 548.22,
186639
+ "learning_rate": 8.917163461538463e-06,
186640
+ "loss": 1.0687,
186641
+ "step": 68595
186642
+ },
186643
+ {
186644
+ "epoch": 548.26,
186645
+ "learning_rate": 8.917083333333335e-06,
186646
+ "loss": 0.33,
186647
+ "step": 68600
186648
+ },
186649
+ {
186650
+ "epoch": 548.3,
186651
+ "learning_rate": 8.917003205128206e-06,
186652
+ "loss": 0.3272,
186653
+ "step": 68605
186654
+ },
186655
+ {
186656
+ "epoch": 548.34,
186657
+ "learning_rate": 8.916923076923077e-06,
186658
+ "loss": 0.3578,
186659
+ "step": 68610
186660
+ },
186661
+ {
186662
+ "epoch": 548.38,
186663
+ "learning_rate": 8.91684294871795e-06,
186664
+ "loss": 0.6878,
186665
+ "step": 68615
186666
+ },
186667
+ {
186668
+ "epoch": 548.42,
186669
+ "learning_rate": 8.91676282051282e-06,
186670
+ "loss": 0.9201,
186671
+ "step": 68620
186672
+ },
186673
+ {
186674
+ "epoch": 548.46,
186675
+ "learning_rate": 8.916682692307693e-06,
186676
+ "loss": 0.2895,
186677
+ "step": 68625
186678
+ },
186679
+ {
186680
+ "epoch": 548.5,
186681
+ "learning_rate": 8.916602564102565e-06,
186682
+ "loss": 0.3369,
186683
+ "step": 68630
186684
+ },
186685
+ {
186686
+ "epoch": 548.54,
186687
+ "learning_rate": 8.916522435897436e-06,
186688
+ "loss": 0.3617,
186689
+ "step": 68635
186690
+ },
186691
+ {
186692
+ "epoch": 548.58,
186693
+ "learning_rate": 8.916442307692307e-06,
186694
+ "loss": 0.5913,
186695
+ "step": 68640
186696
+ },
186697
+ {
186698
+ "epoch": 548.62,
186699
+ "learning_rate": 8.91636217948718e-06,
186700
+ "loss": 1.1614,
186701
+ "step": 68645
186702
+ },
186703
+ {
186704
+ "epoch": 548.66,
186705
+ "learning_rate": 8.916282051282052e-06,
186706
+ "loss": 0.2917,
186707
+ "step": 68650
186708
+ },
186709
+ {
186710
+ "epoch": 548.7,
186711
+ "learning_rate": 8.916201923076923e-06,
186712
+ "loss": 0.3465,
186713
+ "step": 68655
186714
+ },
186715
+ {
186716
+ "epoch": 548.74,
186717
+ "learning_rate": 8.916121794871796e-06,
186718
+ "loss": 0.3442,
186719
+ "step": 68660
186720
+ },
186721
+ {
186722
+ "epoch": 548.78,
186723
+ "learning_rate": 8.916041666666667e-06,
186724
+ "loss": 0.7135,
186725
+ "step": 68665
186726
+ },
186727
+ {
186728
+ "epoch": 548.82,
186729
+ "learning_rate": 8.915961538461539e-06,
186730
+ "loss": 1.1436,
186731
+ "step": 68670
186732
+ },
186733
+ {
186734
+ "epoch": 548.86,
186735
+ "learning_rate": 8.91588141025641e-06,
186736
+ "loss": 0.3227,
186737
+ "step": 68675
186738
+ },
186739
+ {
186740
+ "epoch": 548.9,
186741
+ "learning_rate": 8.915801282051283e-06,
186742
+ "loss": 0.3273,
186743
+ "step": 68680
186744
+ },
186745
+ {
186746
+ "epoch": 548.94,
186747
+ "learning_rate": 8.915721153846155e-06,
186748
+ "loss": 0.3505,
186749
+ "step": 68685
186750
+ },
186751
+ {
186752
+ "epoch": 548.98,
186753
+ "learning_rate": 8.915641025641026e-06,
186754
+ "loss": 0.6269,
186755
+ "step": 68690
186756
+ },
186757
+ {
186758
+ "epoch": 549.0,
186759
+ "eval_loss": 0.4549594819545746,
186760
+ "eval_runtime": 40.2207,
186761
+ "eval_samples_per_second": 20.86,
186762
+ "eval_steps_per_second": 0.671,
186763
+ "eval_wer": 0.1973022238425082,
186764
+ "step": 68693
186765
+ },
186766
+ {
186767
+ "epoch": 549.02,
186768
+ "learning_rate": 8.915560897435899e-06,
186769
+ "loss": 0.4356,
186770
+ "step": 68695
186771
+ },
186772
+ {
186773
+ "epoch": 549.06,
186774
+ "learning_rate": 8.91548076923077e-06,
186775
+ "loss": 0.3655,
186776
+ "step": 68700
186777
+ },
186778
+ {
186779
+ "epoch": 549.1,
186780
+ "learning_rate": 8.915400641025642e-06,
186781
+ "loss": 0.3215,
186782
+ "step": 68705
186783
+ },
186784
+ {
186785
+ "epoch": 549.14,
186786
+ "learning_rate": 8.915320512820513e-06,
186787
+ "loss": 0.3813,
186788
+ "step": 68710
186789
+ },
186790
+ {
186791
+ "epoch": 549.18,
186792
+ "learning_rate": 8.915240384615386e-06,
186793
+ "loss": 0.6744,
186794
+ "step": 68715
186795
+ },
186796
+ {
186797
+ "epoch": 549.22,
186798
+ "learning_rate": 8.915160256410257e-06,
186799
+ "loss": 0.9949,
186800
+ "step": 68720
186801
+ },
186802
+ {
186803
+ "epoch": 549.25,
186804
+ "learning_rate": 8.915080128205129e-06,
186805
+ "loss": 0.3279,
186806
+ "step": 68725
186807
+ },
186808
+ {
186809
+ "epoch": 549.29,
186810
+ "learning_rate": 8.915e-06,
186811
+ "loss": 0.3263,
186812
+ "step": 68730
186813
+ },
186814
+ {
186815
+ "epoch": 549.33,
186816
+ "learning_rate": 8.914919871794873e-06,
186817
+ "loss": 0.3473,
186818
+ "step": 68735
186819
+ },
186820
+ {
186821
+ "epoch": 549.37,
186822
+ "learning_rate": 8.914839743589745e-06,
186823
+ "loss": 0.628,
186824
+ "step": 68740
186825
+ },
186826
+ {
186827
+ "epoch": 549.41,
186828
+ "learning_rate": 8.914759615384616e-06,
186829
+ "loss": 0.9994,
186830
+ "step": 68745
186831
+ },
186832
+ {
186833
+ "epoch": 549.45,
186834
+ "learning_rate": 8.914679487179489e-06,
186835
+ "loss": 0.3344,
186836
+ "step": 68750
186837
+ },
186838
+ {
186839
+ "epoch": 549.49,
186840
+ "learning_rate": 8.91459935897436e-06,
186841
+ "loss": 0.2747,
186842
+ "step": 68755
186843
+ },
186844
+ {
186845
+ "epoch": 549.53,
186846
+ "learning_rate": 8.914519230769232e-06,
186847
+ "loss": 0.4123,
186848
+ "step": 68760
186849
+ },
186850
+ {
186851
+ "epoch": 549.57,
186852
+ "learning_rate": 8.914439102564103e-06,
186853
+ "loss": 0.7106,
186854
+ "step": 68765
186855
+ },
186856
+ {
186857
+ "epoch": 549.61,
186858
+ "learning_rate": 8.914358974358976e-06,
186859
+ "loss": 1.1318,
186860
+ "step": 68770
186861
+ },
186862
+ {
186863
+ "epoch": 549.65,
186864
+ "learning_rate": 8.914278846153846e-06,
186865
+ "loss": 0.4238,
186866
+ "step": 68775
186867
+ },
186868
+ {
186869
+ "epoch": 549.69,
186870
+ "learning_rate": 8.914198717948719e-06,
186871
+ "loss": 0.4404,
186872
+ "step": 68780
186873
+ },
186874
+ {
186875
+ "epoch": 549.73,
186876
+ "learning_rate": 8.91411858974359e-06,
186877
+ "loss": 0.3851,
186878
+ "step": 68785
186879
+ },
186880
+ {
186881
+ "epoch": 549.77,
186882
+ "learning_rate": 8.914038461538462e-06,
186883
+ "loss": 0.6609,
186884
+ "step": 68790
186885
+ },
186886
+ {
186887
+ "epoch": 549.81,
186888
+ "learning_rate": 8.913958333333335e-06,
186889
+ "loss": 1.2404,
186890
+ "step": 68795
186891
+ },
186892
+ {
186893
+ "epoch": 549.85,
186894
+ "learning_rate": 8.913878205128206e-06,
186895
+ "loss": 0.3031,
186896
+ "step": 68800
186897
+ },
186898
+ {
186899
+ "epoch": 549.89,
186900
+ "learning_rate": 8.913798076923077e-06,
186901
+ "loss": 0.2821,
186902
+ "step": 68805
186903
+ },
186904
+ {
186905
+ "epoch": 549.93,
186906
+ "learning_rate": 8.913717948717949e-06,
186907
+ "loss": 0.3476,
186908
+ "step": 68810
186909
+ },
186910
+ {
186911
+ "epoch": 549.97,
186912
+ "learning_rate": 8.913637820512822e-06,
186913
+ "loss": 0.6489,
186914
+ "step": 68815
186915
+ },
186916
+ {
186917
+ "epoch": 550.0,
186918
+ "eval_loss": 0.351525217294693,
186919
+ "eval_runtime": 39.2564,
186920
+ "eval_samples_per_second": 21.347,
186921
+ "eval_steps_per_second": 0.688,
186922
+ "eval_wer": 0.1943950177935943,
186923
+ "step": 68818
186924
  }
186925
  ],
186926
+ "max_steps": 625000,
186927
  "num_train_epochs": 5000,
186928
+ "total_flos": 1.936554826303671e+20,
186929
  "trial_name": null,
186930
  "trial_params": null
186931
  }
model-bin/finetune/base/{checkpoint-68195 β†’ checkpoint-68818}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/log/1629840697.4590368/events.out.tfevents.1629840697.c435e1c5ee04.920.231 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07a11d389d51048cd2ce4bcfef6ffa2e954f086370dcadcd4bb5c03ef9e360b
3
+ size 4194
model-bin/finetune/base/log/1629841338.3804567/events.out.tfevents.1629841338.c435e1c5ee04.920.233 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c829abf30deb7cfa6fa48b2639c9afe7c790fb210c6093e415740d15fb905077
3
+ size 4194
model-bin/finetune/base/log/1629841998.772923/events.out.tfevents.1629841998.c435e1c5ee04.920.235 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61c8438d267fe9e56c4349eced0d88eb8b8605160ee2ec76dd42531323356080
3
+ size 4194
model-bin/finetune/base/log/1629842644.4759989/events.out.tfevents.1629842644.c435e1c5ee04.920.237 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2a3f81f631f4a49c90c62d5db72585e8a7d7955c646ae7afb49d7777047c0d
3
+ size 4194
model-bin/finetune/base/log/1629843285.8477933/events.out.tfevents.1629843285.c435e1c5ee04.920.239 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7bb7495863188c8fc2968cdff628df60f30fd025fed46f8974d57e45a8043c
3
+ size 4194
model-bin/finetune/base/log/events.out.tfevents.1629840697.c435e1c5ee04.920.230 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df0714608e830691baa16d7b5e1885b5d2a2dcbdb03f679d3e245ca205a76e1e
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629841338.c435e1c5ee04.920.232 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8646038d3ed73d99a1241bdec464d7fd7f3aa7d0837b1bc59606de7f15ac59
3
+ size 8462
model-bin/finetune/base/log/events.out.tfevents.1629841998.c435e1c5ee04.920.234 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baef5622116291ac5a532ca4c0d01e970ff516a3f405da6b1d4aa97502c3f3ff
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629842644.c435e1c5ee04.920.236 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4e5082081d3757b14433504efa1eee4ffcb706f6a4d94b4b777ba0f89c6bae
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629843285.c435e1c5ee04.920.238 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29ce135620281801cf6a5458af019b88f4a58e0ed575197fe9e714af8cf35f5
3
+ size 8622