Check commited on
Commit
b894548
Β·
1 Parent(s): 6861a9f

"auto-commit"

Browse files
Files changed (19) hide show
  1. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/config.json +0 -0
  2. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/optimizer.pt +1 -1
  3. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/preprocessor_config.json +0 -0
  4. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/pytorch_model.bin +1 -1
  5. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/rng_state.pth +2 -2
  6. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/scaler.pt +1 -1
  7. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/scheduler.pt +1 -1
  8. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/trainer_state.json +793 -4
  9. model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/training_args.bin +0 -0
  10. model-bin/finetune/base/log/1629896881.4320064/events.out.tfevents.1629896881.7e498afd5545.7645.35 +3 -0
  11. model-bin/finetune/base/log/1629897354.142085/events.out.tfevents.1629897354.7e498afd5545.7645.37 +3 -0
  12. model-bin/finetune/base/log/1629897817.6067963/events.out.tfevents.1629897817.7e498afd5545.7645.39 +3 -0
  13. model-bin/finetune/base/log/1629898283.2853954/events.out.tfevents.1629898283.7e498afd5545.7645.41 +3 -0
  14. model-bin/finetune/base/log/1629898756.3973496/events.out.tfevents.1629898756.7e498afd5545.7645.43 +3 -0
  15. model-bin/finetune/base/log/events.out.tfevents.1629896881.7e498afd5545.7645.34 +3 -0
  16. model-bin/finetune/base/log/events.out.tfevents.1629897354.7e498afd5545.7645.36 +3 -0
  17. model-bin/finetune/base/log/events.out.tfevents.1629897817.7e498afd5545.7645.38 +3 -0
  18. model-bin/finetune/base/log/events.out.tfevents.1629898283.7e498afd5545.7645.40 +3 -0
  19. model-bin/finetune/base/log/events.out.tfevents.1629898756.7e498afd5545.7645.42 +3 -0
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61a8b0a7adecf1a8e9ffdcbc7ea1d0c430e0f4915a37015f6f24f65a184c4dcf
3
  size 722165393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f64e4cff009a5eded3493ab62103df495f530c979f6cf26437d1f53ff63216
3
  size 722165393
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ed4f9f480dc04b7c1b9a03c96d135c3ea038740074fc4dbb931f7003319cc2
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c7efd6688eb76b3c85ac4778a8f34067f39e8f7792b58fe6f5237972aecfccd
3
  size 377909911
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08c2647acfb5b49bae969d7133c47291a13692196c268162e693bd3db7b0b8e5
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e2855477cf4f44f6936d902653afb56e3aec4404923ab7e41a06c920deac1b3
3
+ size 14567
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dedda88080e30bde75dd07d24e3957d07cf65d683f3fcca5da54a4d262734ab
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dd04c6be986b933dbf36885b7a308ab00202782f956c433993fd7d5ef14eee
3
  size 559
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c94b37533bc725b49922bd0401ee468e8837e745c87213756a34d1c1ab20c1c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5953ed201d78a6b8ec30853512b2de7c9867c7338b9888cde9a2224b7fb7f365
3
  size 623
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.18412114350410416,
3
  "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-69565",
4
- "epoch": 627.0,
5
- "global_step": 78401,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -199116,11 +199116,800 @@
199116
  "eval_steps_per_second": 0.65,
199117
  "eval_wer": 0.19250363901018921,
199118
  "step": 78401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199119
  }
199120
  ],
199121
- "max_steps": 625000,
199122
  "num_train_epochs": 5000,
199123
- "total_flos": 2.2063508310558517e+20,
199124
  "trial_name": null,
199125
  "trial_params": null
199126
  }
 
1
  {
2
  "best_metric": 0.18412114350410416,
3
  "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-69565",
4
+ "epoch": 636.995983935743,
5
+ "global_step": 79024,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
199116
  "eval_steps_per_second": 0.65,
199117
  "eval_wer": 0.19250363901018921,
199118
  "step": 78401
199119
+ },
199120
+ {
199121
+ "epoch": 627.03,
199122
+ "learning_rate": 8.750016155088854e-06,
199123
+ "loss": 0.3852,
199124
+ "step": 78405
199125
+ },
199126
+ {
199127
+ "epoch": 627.07,
199128
+ "learning_rate": 8.749935379644588e-06,
199129
+ "loss": 0.334,
199130
+ "step": 78410
199131
+ },
199132
+ {
199133
+ "epoch": 627.11,
199134
+ "learning_rate": 8.749854604200324e-06,
199135
+ "loss": 0.299,
199136
+ "step": 78415
199137
+ },
199138
+ {
199139
+ "epoch": 627.15,
199140
+ "learning_rate": 8.749773828756058e-06,
199141
+ "loss": 0.4201,
199142
+ "step": 78420
199143
+ },
199144
+ {
199145
+ "epoch": 627.19,
199146
+ "learning_rate": 8.749693053311794e-06,
199147
+ "loss": 0.9708,
199148
+ "step": 78425
199149
+ },
199150
+ {
199151
+ "epoch": 627.23,
199152
+ "learning_rate": 8.749612277867528e-06,
199153
+ "loss": 0.6433,
199154
+ "step": 78430
199155
+ },
199156
+ {
199157
+ "epoch": 627.27,
199158
+ "learning_rate": 8.749531502423264e-06,
199159
+ "loss": 0.3363,
199160
+ "step": 78435
199161
+ },
199162
+ {
199163
+ "epoch": 627.31,
199164
+ "learning_rate": 8.749450726978998e-06,
199165
+ "loss": 0.3599,
199166
+ "step": 78440
199167
+ },
199168
+ {
199169
+ "epoch": 627.35,
199170
+ "learning_rate": 8.749369951534734e-06,
199171
+ "loss": 0.4116,
199172
+ "step": 78445
199173
+ },
199174
+ {
199175
+ "epoch": 627.39,
199176
+ "learning_rate": 8.74928917609047e-06,
199177
+ "loss": 1.0044,
199178
+ "step": 78450
199179
+ },
199180
+ {
199181
+ "epoch": 627.43,
199182
+ "learning_rate": 8.749208400646204e-06,
199183
+ "loss": 0.8095,
199184
+ "step": 78455
199185
+ },
199186
+ {
199187
+ "epoch": 627.47,
199188
+ "learning_rate": 8.74912762520194e-06,
199189
+ "loss": 0.3486,
199190
+ "step": 78460
199191
+ },
199192
+ {
199193
+ "epoch": 627.51,
199194
+ "learning_rate": 8.749046849757674e-06,
199195
+ "loss": 0.2948,
199196
+ "step": 78465
199197
+ },
199198
+ {
199199
+ "epoch": 627.55,
199200
+ "learning_rate": 8.74896607431341e-06,
199201
+ "loss": 0.4802,
199202
+ "step": 78470
199203
+ },
199204
+ {
199205
+ "epoch": 627.59,
199206
+ "learning_rate": 8.748885298869144e-06,
199207
+ "loss": 0.9969,
199208
+ "step": 78475
199209
+ },
199210
+ {
199211
+ "epoch": 627.63,
199212
+ "learning_rate": 8.74880452342488e-06,
199213
+ "loss": 0.662,
199214
+ "step": 78480
199215
+ },
199216
+ {
199217
+ "epoch": 627.67,
199218
+ "learning_rate": 8.748723747980614e-06,
199219
+ "loss": 0.3773,
199220
+ "step": 78485
199221
+ },
199222
+ {
199223
+ "epoch": 627.71,
199224
+ "learning_rate": 8.74864297253635e-06,
199225
+ "loss": 0.3748,
199226
+ "step": 78490
199227
+ },
199228
+ {
199229
+ "epoch": 627.75,
199230
+ "learning_rate": 8.748562197092084e-06,
199231
+ "loss": 0.4032,
199232
+ "step": 78495
199233
+ },
199234
+ {
199235
+ "epoch": 627.79,
199236
+ "learning_rate": 8.74848142164782e-06,
199237
+ "loss": 0.9498,
199238
+ "step": 78500
199239
+ },
199240
+ {
199241
+ "epoch": 627.83,
199242
+ "learning_rate": 8.748400646203556e-06,
199243
+ "loss": 0.6955,
199244
+ "step": 78505
199245
+ },
199246
+ {
199247
+ "epoch": 627.87,
199248
+ "learning_rate": 8.74831987075929e-06,
199249
+ "loss": 0.3037,
199250
+ "step": 78510
199251
+ },
199252
+ {
199253
+ "epoch": 627.91,
199254
+ "learning_rate": 8.748239095315026e-06,
199255
+ "loss": 0.3241,
199256
+ "step": 78515
199257
+ },
199258
+ {
199259
+ "epoch": 627.95,
199260
+ "learning_rate": 8.74815831987076e-06,
199261
+ "loss": 0.4145,
199262
+ "step": 78520
199263
+ },
199264
+ {
199265
+ "epoch": 627.99,
199266
+ "learning_rate": 8.748077544426495e-06,
199267
+ "loss": 0.907,
199268
+ "step": 78525
199269
+ },
199270
+ {
199271
+ "epoch": 628.0,
199272
+ "eval_loss": 0.4018295109272003,
199273
+ "eval_runtime": 44.0091,
199274
+ "eval_samples_per_second": 18.996,
199275
+ "eval_steps_per_second": 0.614,
199276
+ "eval_wer": 0.1865575183110728,
199277
+ "step": 78526
199278
+ },
199279
+ {
199280
+ "epoch": 628.03,
199281
+ "learning_rate": 8.74799676898223e-06,
199282
+ "loss": 0.3616,
199283
+ "step": 78530
199284
+ },
199285
+ {
199286
+ "epoch": 628.07,
199287
+ "learning_rate": 8.747915993537965e-06,
199288
+ "loss": 0.3102,
199289
+ "step": 78535
199290
+ },
199291
+ {
199292
+ "epoch": 628.11,
199293
+ "learning_rate": 8.7478352180937e-06,
199294
+ "loss": 0.327,
199295
+ "step": 78540
199296
+ },
199297
+ {
199298
+ "epoch": 628.15,
199299
+ "learning_rate": 8.747754442649435e-06,
199300
+ "loss": 0.4266,
199301
+ "step": 78545
199302
+ },
199303
+ {
199304
+ "epoch": 628.19,
199305
+ "learning_rate": 8.74767366720517e-06,
199306
+ "loss": 1.018,
199307
+ "step": 78550
199308
+ },
199309
+ {
199310
+ "epoch": 628.23,
199311
+ "learning_rate": 8.747592891760905e-06,
199312
+ "loss": 0.6763,
199313
+ "step": 78555
199314
+ },
199315
+ {
199316
+ "epoch": 628.27,
199317
+ "learning_rate": 8.747512116316641e-06,
199318
+ "loss": 0.2833,
199319
+ "step": 78560
199320
+ },
199321
+ {
199322
+ "epoch": 628.31,
199323
+ "learning_rate": 8.747431340872375e-06,
199324
+ "loss": 0.3231,
199325
+ "step": 78565
199326
+ },
199327
+ {
199328
+ "epoch": 628.35,
199329
+ "learning_rate": 8.747350565428111e-06,
199330
+ "loss": 0.362,
199331
+ "step": 78570
199332
+ },
199333
+ {
199334
+ "epoch": 628.39,
199335
+ "learning_rate": 8.747269789983845e-06,
199336
+ "loss": 0.8538,
199337
+ "step": 78575
199338
+ },
199339
+ {
199340
+ "epoch": 628.43,
199341
+ "learning_rate": 8.747189014539581e-06,
199342
+ "loss": 0.6172,
199343
+ "step": 78580
199344
+ },
199345
+ {
199346
+ "epoch": 628.47,
199347
+ "learning_rate": 8.747108239095315e-06,
199348
+ "loss": 0.2573,
199349
+ "step": 78585
199350
+ },
199351
+ {
199352
+ "epoch": 628.51,
199353
+ "learning_rate": 8.747027463651051e-06,
199354
+ "loss": 0.2711,
199355
+ "step": 78590
199356
+ },
199357
+ {
199358
+ "epoch": 628.55,
199359
+ "learning_rate": 8.746946688206785e-06,
199360
+ "loss": 0.5042,
199361
+ "step": 78595
199362
+ },
199363
+ {
199364
+ "epoch": 628.59,
199365
+ "learning_rate": 8.746865912762521e-06,
199366
+ "loss": 1.0298,
199367
+ "step": 78600
199368
+ },
199369
+ {
199370
+ "epoch": 628.63,
199371
+ "learning_rate": 8.746785137318255e-06,
199372
+ "loss": 0.7017,
199373
+ "step": 78605
199374
+ },
199375
+ {
199376
+ "epoch": 628.67,
199377
+ "learning_rate": 8.746704361873991e-06,
199378
+ "loss": 0.3638,
199379
+ "step": 78610
199380
+ },
199381
+ {
199382
+ "epoch": 628.71,
199383
+ "learning_rate": 8.746623586429725e-06,
199384
+ "loss": 0.3454,
199385
+ "step": 78615
199386
+ },
199387
+ {
199388
+ "epoch": 628.75,
199389
+ "learning_rate": 8.746542810985461e-06,
199390
+ "loss": 0.4958,
199391
+ "step": 78620
199392
+ },
199393
+ {
199394
+ "epoch": 628.79,
199395
+ "learning_rate": 8.746462035541197e-06,
199396
+ "loss": 1.0455,
199397
+ "step": 78625
199398
+ },
199399
+ {
199400
+ "epoch": 628.83,
199401
+ "learning_rate": 8.746381260096931e-06,
199402
+ "loss": 0.7148,
199403
+ "step": 78630
199404
+ },
199405
+ {
199406
+ "epoch": 628.87,
199407
+ "learning_rate": 8.746300484652667e-06,
199408
+ "loss": 0.2964,
199409
+ "step": 78635
199410
+ },
199411
+ {
199412
+ "epoch": 628.91,
199413
+ "learning_rate": 8.746219709208401e-06,
199414
+ "loss": 0.3861,
199415
+ "step": 78640
199416
+ },
199417
+ {
199418
+ "epoch": 628.95,
199419
+ "learning_rate": 8.746138933764137e-06,
199420
+ "loss": 0.4008,
199421
+ "step": 78645
199422
+ },
199423
+ {
199424
+ "epoch": 628.99,
199425
+ "learning_rate": 8.746058158319871e-06,
199426
+ "loss": 0.9843,
199427
+ "step": 78650
199428
+ },
199429
+ {
199430
+ "epoch": 629.0,
199431
+ "eval_loss": 0.40389513969421387,
199432
+ "eval_runtime": 41.4895,
199433
+ "eval_samples_per_second": 20.174,
199434
+ "eval_steps_per_second": 0.651,
199435
+ "eval_wer": 0.19676647574856812,
199436
+ "step": 78651
199437
+ },
199438
+ {
199439
+ "epoch": 634.03,
199440
+ "learning_rate": 8.745977382875607e-06,
199441
+ "loss": 0.3614,
199442
+ "step": 78655
199443
+ },
199444
+ {
199445
+ "epoch": 634.07,
199446
+ "learning_rate": 8.745896607431341e-06,
199447
+ "loss": 0.3363,
199448
+ "step": 78660
199449
+ },
199450
+ {
199451
+ "epoch": 634.11,
199452
+ "learning_rate": 8.745815831987077e-06,
199453
+ "loss": 0.291,
199454
+ "step": 78665
199455
+ },
199456
+ {
199457
+ "epoch": 634.15,
199458
+ "learning_rate": 8.745735056542811e-06,
199459
+ "loss": 0.4561,
199460
+ "step": 78670
199461
+ },
199462
+ {
199463
+ "epoch": 634.19,
199464
+ "learning_rate": 8.745654281098547e-06,
199465
+ "loss": 1.0624,
199466
+ "step": 78675
199467
+ },
199468
+ {
199469
+ "epoch": 634.23,
199470
+ "learning_rate": 8.745573505654283e-06,
199471
+ "loss": 0.7522,
199472
+ "step": 78680
199473
+ },
199474
+ {
199475
+ "epoch": 634.27,
199476
+ "learning_rate": 8.745492730210017e-06,
199477
+ "loss": 0.3011,
199478
+ "step": 78685
199479
+ },
199480
+ {
199481
+ "epoch": 634.31,
199482
+ "learning_rate": 8.745411954765753e-06,
199483
+ "loss": 0.2983,
199484
+ "step": 78690
199485
+ },
199486
+ {
199487
+ "epoch": 634.35,
199488
+ "learning_rate": 8.745331179321487e-06,
199489
+ "loss": 0.4159,
199490
+ "step": 78695
199491
+ },
199492
+ {
199493
+ "epoch": 634.39,
199494
+ "learning_rate": 8.745250403877223e-06,
199495
+ "loss": 0.8889,
199496
+ "step": 78700
199497
+ },
199498
+ {
199499
+ "epoch": 634.43,
199500
+ "learning_rate": 8.745169628432957e-06,
199501
+ "loss": 0.6537,
199502
+ "step": 78705
199503
+ },
199504
+ {
199505
+ "epoch": 634.47,
199506
+ "learning_rate": 8.745088852988693e-06,
199507
+ "loss": 0.2519,
199508
+ "step": 78710
199509
+ },
199510
+ {
199511
+ "epoch": 634.51,
199512
+ "learning_rate": 8.745008077544427e-06,
199513
+ "loss": 0.3454,
199514
+ "step": 78715
199515
+ },
199516
+ {
199517
+ "epoch": 634.55,
199518
+ "learning_rate": 8.744927302100163e-06,
199519
+ "loss": 0.6675,
199520
+ "step": 78720
199521
+ },
199522
+ {
199523
+ "epoch": 634.59,
199524
+ "learning_rate": 8.744846526655897e-06,
199525
+ "loss": 0.8333,
199526
+ "step": 78725
199527
+ },
199528
+ {
199529
+ "epoch": 634.63,
199530
+ "learning_rate": 8.744765751211633e-06,
199531
+ "loss": 0.6796,
199532
+ "step": 78730
199533
+ },
199534
+ {
199535
+ "epoch": 634.67,
199536
+ "learning_rate": 8.744684975767368e-06,
199537
+ "loss": 0.3397,
199538
+ "step": 78735
199539
+ },
199540
+ {
199541
+ "epoch": 634.71,
199542
+ "learning_rate": 8.744604200323102e-06,
199543
+ "loss": 0.3553,
199544
+ "step": 78740
199545
+ },
199546
+ {
199547
+ "epoch": 634.76,
199548
+ "learning_rate": 8.744523424878838e-06,
199549
+ "loss": 0.4072,
199550
+ "step": 78745
199551
+ },
199552
+ {
199553
+ "epoch": 634.8,
199554
+ "learning_rate": 8.744442649434572e-06,
199555
+ "loss": 0.8611,
199556
+ "step": 78750
199557
+ },
199558
+ {
199559
+ "epoch": 634.84,
199560
+ "learning_rate": 8.744361873990308e-06,
199561
+ "loss": 0.6651,
199562
+ "step": 78755
199563
+ },
199564
+ {
199565
+ "epoch": 634.88,
199566
+ "learning_rate": 8.744281098546042e-06,
199567
+ "loss": 0.3281,
199568
+ "step": 78760
199569
+ },
199570
+ {
199571
+ "epoch": 634.92,
199572
+ "learning_rate": 8.744200323101778e-06,
199573
+ "loss": 0.3102,
199574
+ "step": 78765
199575
+ },
199576
+ {
199577
+ "epoch": 634.96,
199578
+ "learning_rate": 8.744119547657512e-06,
199579
+ "loss": 0.4566,
199580
+ "step": 78770
199581
+ },
199582
+ {
199583
+ "epoch": 635.0,
199584
+ "learning_rate": 8.744038772213248e-06,
199585
+ "loss": 1.1649,
199586
+ "step": 78775
199587
+ },
199588
+ {
199589
+ "epoch": 635.0,
199590
+ "eval_loss": 0.42814478278160095,
199591
+ "eval_runtime": 40.8229,
199592
+ "eval_samples_per_second": 20.503,
199593
+ "eval_steps_per_second": 0.661,
199594
+ "eval_wer": 0.1919994007041726,
199595
+ "step": 78775
199596
+ },
199597
+ {
199598
+ "epoch": 630.04,
199599
+ "learning_rate": 8.743957996768982e-06,
199600
+ "loss": 0.3452,
199601
+ "step": 78780
199602
+ },
199603
+ {
199604
+ "epoch": 630.08,
199605
+ "learning_rate": 8.743877221324718e-06,
199606
+ "loss": 0.3256,
199607
+ "step": 78785
199608
+ },
199609
+ {
199610
+ "epoch": 630.12,
199611
+ "learning_rate": 8.743796445880452e-06,
199612
+ "loss": 0.4387,
199613
+ "step": 78790
199614
+ },
199615
+ {
199616
+ "epoch": 630.16,
199617
+ "learning_rate": 8.743715670436188e-06,
199618
+ "loss": 0.395,
199619
+ "step": 78795
199620
+ },
199621
+ {
199622
+ "epoch": 630.2,
199623
+ "learning_rate": 8.743634894991924e-06,
199624
+ "loss": 1.3145,
199625
+ "step": 78800
199626
+ },
199627
+ {
199628
+ "epoch": 630.24,
199629
+ "learning_rate": 8.743554119547658e-06,
199630
+ "loss": 0.3666,
199631
+ "step": 78805
199632
+ },
199633
+ {
199634
+ "epoch": 630.28,
199635
+ "learning_rate": 8.743473344103394e-06,
199636
+ "loss": 0.3053,
199637
+ "step": 78810
199638
+ },
199639
+ {
199640
+ "epoch": 630.32,
199641
+ "learning_rate": 8.743392568659128e-06,
199642
+ "loss": 0.3094,
199643
+ "step": 78815
199644
+ },
199645
+ {
199646
+ "epoch": 630.36,
199647
+ "learning_rate": 8.743311793214864e-06,
199648
+ "loss": 0.4993,
199649
+ "step": 78820
199650
+ },
199651
+ {
199652
+ "epoch": 630.4,
199653
+ "learning_rate": 8.743231017770598e-06,
199654
+ "loss": 1.2233,
199655
+ "step": 78825
199656
+ },
199657
+ {
199658
+ "epoch": 630.44,
199659
+ "learning_rate": 8.743150242326334e-06,
199660
+ "loss": 0.3475,
199661
+ "step": 78830
199662
+ },
199663
+ {
199664
+ "epoch": 630.48,
199665
+ "learning_rate": 8.743069466882068e-06,
199666
+ "loss": 0.5334,
199667
+ "step": 78835
199668
+ },
199669
+ {
199670
+ "epoch": 630.52,
199671
+ "learning_rate": 8.742988691437804e-06,
199672
+ "loss": 0.3864,
199673
+ "step": 78840
199674
+ },
199675
+ {
199676
+ "epoch": 630.56,
199677
+ "learning_rate": 8.742907915993538e-06,
199678
+ "loss": 0.483,
199679
+ "step": 78845
199680
+ },
199681
+ {
199682
+ "epoch": 630.6,
199683
+ "learning_rate": 8.742827140549274e-06,
199684
+ "loss": 1.0561,
199685
+ "step": 78850
199686
+ },
199687
+ {
199688
+ "epoch": 630.64,
199689
+ "learning_rate": 8.74274636510501e-06,
199690
+ "loss": 0.3234,
199691
+ "step": 78855
199692
+ },
199693
+ {
199694
+ "epoch": 630.68,
199695
+ "learning_rate": 8.742665589660744e-06,
199696
+ "loss": 0.3517,
199697
+ "step": 78860
199698
+ },
199699
+ {
199700
+ "epoch": 630.72,
199701
+ "learning_rate": 8.74258481421648e-06,
199702
+ "loss": 0.3457,
199703
+ "step": 78865
199704
+ },
199705
+ {
199706
+ "epoch": 630.76,
199707
+ "learning_rate": 8.742504038772214e-06,
199708
+ "loss": 0.4469,
199709
+ "step": 78870
199710
+ },
199711
+ {
199712
+ "epoch": 630.8,
199713
+ "learning_rate": 8.74242326332795e-06,
199714
+ "loss": 1.2126,
199715
+ "step": 78875
199716
+ },
199717
+ {
199718
+ "epoch": 630.84,
199719
+ "learning_rate": 8.742342487883684e-06,
199720
+ "loss": 0.3057,
199721
+ "step": 78880
199722
+ },
199723
+ {
199724
+ "epoch": 630.88,
199725
+ "learning_rate": 8.74226171243942e-06,
199726
+ "loss": 0.3012,
199727
+ "step": 78885
199728
+ },
199729
+ {
199730
+ "epoch": 630.92,
199731
+ "learning_rate": 8.742180936995154e-06,
199732
+ "loss": 0.3956,
199733
+ "step": 78890
199734
+ },
199735
+ {
199736
+ "epoch": 630.96,
199737
+ "learning_rate": 8.74210016155089e-06,
199738
+ "loss": 0.4191,
199739
+ "step": 78895
199740
+ },
199741
+ {
199742
+ "epoch": 631.0,
199743
+ "learning_rate": 8.742019386106624e-06,
199744
+ "loss": 1.1315,
199745
+ "step": 78900
199746
+ },
199747
+ {
199748
+ "epoch": 631.0,
199749
+ "eval_loss": 0.35649973154067993,
199750
+ "eval_runtime": 41.8167,
199751
+ "eval_samples_per_second": 19.992,
199752
+ "eval_steps_per_second": 0.646,
199753
+ "eval_wer": 0.19188435573818607,
199754
+ "step": 78900
199755
+ },
199756
+ {
199757
+ "epoch": 636.04,
199758
+ "learning_rate": 8.74193861066236e-06,
199759
+ "loss": 0.3542,
199760
+ "step": 78905
199761
+ },
199762
+ {
199763
+ "epoch": 636.08,
199764
+ "learning_rate": 8.741857835218095e-06,
199765
+ "loss": 0.3388,
199766
+ "step": 78910
199767
+ },
199768
+ {
199769
+ "epoch": 636.12,
199770
+ "learning_rate": 8.74177705977383e-06,
199771
+ "loss": 0.3729,
199772
+ "step": 78915
199773
+ },
199774
+ {
199775
+ "epoch": 636.16,
199776
+ "learning_rate": 8.741696284329565e-06,
199777
+ "loss": 0.4893,
199778
+ "step": 78920
199779
+ },
199780
+ {
199781
+ "epoch": 636.2,
199782
+ "learning_rate": 8.7416155088853e-06,
199783
+ "loss": 1.2058,
199784
+ "step": 78925
199785
+ },
199786
+ {
199787
+ "epoch": 636.24,
199788
+ "learning_rate": 8.741534733441035e-06,
199789
+ "loss": 0.3539,
199790
+ "step": 78930
199791
+ },
199792
+ {
199793
+ "epoch": 636.28,
199794
+ "learning_rate": 8.74145395799677e-06,
199795
+ "loss": 0.2974,
199796
+ "step": 78935
199797
+ },
199798
+ {
199799
+ "epoch": 636.32,
199800
+ "learning_rate": 8.741373182552505e-06,
199801
+ "loss": 0.3459,
199802
+ "step": 78940
199803
+ },
199804
+ {
199805
+ "epoch": 636.36,
199806
+ "learning_rate": 8.74129240710824e-06,
199807
+ "loss": 0.4149,
199808
+ "step": 78945
199809
+ },
199810
+ {
199811
+ "epoch": 636.4,
199812
+ "learning_rate": 8.741211631663975e-06,
199813
+ "loss": 1.3084,
199814
+ "step": 78950
199815
+ },
199816
+ {
199817
+ "epoch": 636.44,
199818
+ "learning_rate": 8.74113085621971e-06,
199819
+ "loss": 0.3186,
199820
+ "step": 78955
199821
+ },
199822
+ {
199823
+ "epoch": 636.48,
199824
+ "learning_rate": 8.741050080775445e-06,
199825
+ "loss": 0.3063,
199826
+ "step": 78960
199827
+ },
199828
+ {
199829
+ "epoch": 636.52,
199830
+ "learning_rate": 8.74096930533118e-06,
199831
+ "loss": 0.3157,
199832
+ "step": 78965
199833
+ },
199834
+ {
199835
+ "epoch": 636.56,
199836
+ "learning_rate": 8.740888529886915e-06,
199837
+ "loss": 0.4941,
199838
+ "step": 78970
199839
+ },
199840
+ {
199841
+ "epoch": 636.6,
199842
+ "learning_rate": 8.740807754442651e-06,
199843
+ "loss": 1.0257,
199844
+ "step": 78975
199845
+ },
199846
+ {
199847
+ "epoch": 636.64,
199848
+ "learning_rate": 8.740726978998385e-06,
199849
+ "loss": 0.344,
199850
+ "step": 78980
199851
+ },
199852
+ {
199853
+ "epoch": 636.68,
199854
+ "learning_rate": 8.740646203554121e-06,
199855
+ "loss": 0.2603,
199856
+ "step": 78985
199857
+ },
199858
+ {
199859
+ "epoch": 636.72,
199860
+ "learning_rate": 8.740565428109855e-06,
199861
+ "loss": 0.2939,
199862
+ "step": 78990
199863
+ },
199864
+ {
199865
+ "epoch": 636.76,
199866
+ "learning_rate": 8.740484652665591e-06,
199867
+ "loss": 0.4707,
199868
+ "step": 78995
199869
+ },
199870
+ {
199871
+ "epoch": 636.8,
199872
+ "learning_rate": 8.740403877221325e-06,
199873
+ "loss": 1.1715,
199874
+ "step": 79000
199875
+ },
199876
+ {
199877
+ "epoch": 636.84,
199878
+ "learning_rate": 8.740323101777061e-06,
199879
+ "loss": 0.3283,
199880
+ "step": 79005
199881
+ },
199882
+ {
199883
+ "epoch": 636.88,
199884
+ "learning_rate": 8.740242326332795e-06,
199885
+ "loss": 0.2645,
199886
+ "step": 79010
199887
+ },
199888
+ {
199889
+ "epoch": 636.92,
199890
+ "learning_rate": 8.740161550888531e-06,
199891
+ "loss": 0.3081,
199892
+ "step": 79015
199893
+ },
199894
+ {
199895
+ "epoch": 636.96,
199896
+ "learning_rate": 8.740080775444265e-06,
199897
+ "loss": 0.491,
199898
+ "step": 79020
199899
+ },
199900
+ {
199901
+ "epoch": 637.0,
199902
+ "eval_loss": 0.39863112568855286,
199903
+ "eval_runtime": 40.3355,
199904
+ "eval_samples_per_second": 20.726,
199905
+ "eval_steps_per_second": 0.669,
199906
+ "eval_wer": 0.1863764670925568,
199907
+ "step": 79024
199908
  }
199909
  ],
199910
+ "max_steps": 620000,
199911
  "num_train_epochs": 5000,
199912
+ "total_flos": 2.2238986676431526e+20,
199913
  "trial_name": null,
199914
  "trial_params": null
199915
  }
model-bin/finetune/base/{checkpoint-78401 β†’ checkpoint-79024}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/log/1629896881.4320064/events.out.tfevents.1629896881.7e498afd5545.7645.35 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2654328f8eb2972242ff195060b7e2ab5745785dce1e3aa2dd62f9fba5e6cf37
3
+ size 4194
model-bin/finetune/base/log/1629897354.142085/events.out.tfevents.1629897354.7e498afd5545.7645.37 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e876a4c6e9579d18bd67460b6d4403656601d59319132b57b07d3703268315
3
+ size 4194
model-bin/finetune/base/log/1629897817.6067963/events.out.tfevents.1629897817.7e498afd5545.7645.39 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b6c4b2e3cf368dd19d47f635771416602cd6108e603471825175c4b150f007
3
+ size 4194
model-bin/finetune/base/log/1629898283.2853954/events.out.tfevents.1629898283.7e498afd5545.7645.41 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf63c7933e59788a935ec6e51c974ba66c36185074fe974c88aee444f29f02f
3
+ size 4194
model-bin/finetune/base/log/1629898756.3973496/events.out.tfevents.1629898756.7e498afd5545.7645.43 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2877a983cea7887a0baf4c92bf33531660d3ac39f0478f013de74a70b9c0320
3
+ size 4194
model-bin/finetune/base/log/events.out.tfevents.1629896881.7e498afd5545.7645.34 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49bed7e3d1f265a6baefb1fb4f870ec4edb4c0fdfc381df3c3306c993024f9c
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629897354.7e498afd5545.7645.36 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c396bc68f17f59ee41387235b6cfa971788d25684437bd81454582c3f502fa2b
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629897817.7e498afd5545.7645.38 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c696411aa48c0bd7cba5b62f03cb9b3eb3e25287a9369c0711589f0254373e0b
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629898283.7e498afd5545.7645.40 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c76a338c0ab4deba65a322b8810557a14527eb1f36aa5a4ac8fb422c98db0c4
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1629898756.7e498afd5545.7645.42 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3869591dc06ff391d5a92da9491bd29cc792c57606d90693c46e7c82e17f86
3
+ size 8462