Check commited on
Commit
6696c75
Β·
1 Parent(s): 504737e

"auto-commit"

Browse files
Files changed (28) hide show
  1. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/config.json +0 -0
  2. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/optimizer.pt +1 -1
  3. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/preprocessor_config.json +0 -0
  4. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/pytorch_model.bin +1 -1
  5. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/rng_state.pth +1 -1
  6. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/scaler.pt +1 -1
  7. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/scheduler.pt +1 -1
  8. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/trainer_state.json +165 -6
  9. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/training_args.bin +0 -0
  10. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/config.json +0 -0
  11. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/optimizer.pt +1 -1
  12. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/preprocessor_config.json +0 -0
  13. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/pytorch_model.bin +1 -1
  14. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/rng_state.pth +1 -1
  15. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/scaler.pt +1 -1
  16. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/scheduler.pt +1 -1
  17. model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/trainer_state.json +2862 -6
  18. model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/training_args.bin +0 -0
  19. model-bin/finetune/base/log/1630144693.9967587/events.out.tfevents.1630144693.86bb0ddabf9b.4092.111 +3 -0
  20. model-bin/finetune/base/log/1630145081.8079932/events.out.tfevents.1630145081.86bb0ddabf9b.4092.113 +3 -0
  21. model-bin/finetune/base/log/1630145470.2638097/events.out.tfevents.1630145470.86bb0ddabf9b.4092.115 +3 -0
  22. model-bin/finetune/base/log/1630145861.462048/events.out.tfevents.1630145861.86bb0ddabf9b.4092.117 +3 -0
  23. model-bin/finetune/base/log/1630146253.3092854/events.out.tfevents.1630146253.86bb0ddabf9b.4092.119 +3 -0
  24. model-bin/finetune/base/log/events.out.tfevents.1630144693.86bb0ddabf9b.4092.110 +3 -0
  25. model-bin/finetune/base/log/events.out.tfevents.1630145081.86bb0ddabf9b.4092.112 +3 -0
  26. model-bin/finetune/base/log/events.out.tfevents.1630145470.86bb0ddabf9b.4092.114 +3 -0
  27. model-bin/finetune/base/log/events.out.tfevents.1630145861.86bb0ddabf9b.4092.116 +3 -0
  28. model-bin/finetune/base/log/events.out.tfevents.1630146253.86bb0ddabf9b.4092.118 +3 -0
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e6a125c1f22cc48319045fee252e243c04cd00278a7c172ffc782059f67cc26
3
  size 722165393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c481493840bb46f502f2ebe26c318c6cbdfa398b4c18ed1a11445357f752cba
3
  size 722165393
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:193548cc6244643fe0875e36a0b274952b57c6ff3787884ea6e9340b59be33d1
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b9038ce120de0a3a5a882271ef5d35d74a7469e36ded058c1e73fc288ef0c2
3
  size 377909911
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fe129e917b4c2af29771c1725b443c2df423d938257d23f158bd186a2a90ae4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb81231801c8b64899afee81ae774afd38c523616c443d886f27b6610e8a070d
3
  size 14503
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d391da4854332be30a64a76369bfbbbfa856719cae3d860e11c28d7890a695af
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d51bc5718931d58357028f7580d533189cb1ff6e800fd1903239afa0e373d2
3
  size 559
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f010bc16d14c91a80a5757b57e2aa17704d54320c75c008d1d5c00b5edce0e1c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f57148b4051a23da2341f6695d9723fffbe8bec0ac7125cd428cfce1f8a98fb2
3
  size 623
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-124947}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.1735723097017633,
3
- "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-123203",
4
- "epoch": 998.0,
5
- "global_step": 124823,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -258177,11 +258177,170 @@
258177
  "eval_steps_per_second": 0.731,
258178
  "eval_wer": 0.17628693821244465,
258179
  "step": 124823
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258180
  }
258181
  ],
258182
- "max_steps": 625000,
258183
  "num_train_epochs": 5000,
258184
- "total_flos": 3.512665253182511e+20,
258185
  "trial_name": null,
258186
  "trial_params": null
258187
  }
 
1
  {
2
+ "best_metric": 0.17162025681719809,
3
+ "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-124947",
4
+ "epoch": 1007.0,
5
+ "global_step": 124947,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
258177
  "eval_steps_per_second": 0.731,
258178
  "eval_wer": 0.17628693821244465,
258179
  "step": 124823
258180
+ },
258181
+ {
258182
+ "epoch": 1006.02,
258183
+ "learning_rate": 8.016410256410257e-06,
258184
+ "loss": 0.3007,
258185
+ "step": 124825
258186
+ },
258187
+ {
258188
+ "epoch": 1006.06,
258189
+ "learning_rate": 8.016330128205128e-06,
258190
+ "loss": 0.3029,
258191
+ "step": 124830
258192
+ },
258193
+ {
258194
+ "epoch": 1006.1,
258195
+ "learning_rate": 8.016250000000001e-06,
258196
+ "loss": 0.2823,
258197
+ "step": 124835
258198
+ },
258199
+ {
258200
+ "epoch": 1006.14,
258201
+ "learning_rate": 8.016169871794873e-06,
258202
+ "loss": 0.3448,
258203
+ "step": 124840
258204
+ },
258205
+ {
258206
+ "epoch": 1006.18,
258207
+ "learning_rate": 8.016089743589744e-06,
258208
+ "loss": 0.6987,
258209
+ "step": 124845
258210
+ },
258211
+ {
258212
+ "epoch": 1006.22,
258213
+ "learning_rate": 8.016009615384615e-06,
258214
+ "loss": 1.0145,
258215
+ "step": 124850
258216
+ },
258217
+ {
258218
+ "epoch": 1006.26,
258219
+ "learning_rate": 8.015929487179489e-06,
258220
+ "loss": 0.2459,
258221
+ "step": 124855
258222
+ },
258223
+ {
258224
+ "epoch": 1006.3,
258225
+ "learning_rate": 8.01584935897436e-06,
258226
+ "loss": 0.3105,
258227
+ "step": 124860
258228
+ },
258229
+ {
258230
+ "epoch": 1006.34,
258231
+ "learning_rate": 8.015769230769231e-06,
258232
+ "loss": 0.3416,
258233
+ "step": 124865
258234
+ },
258235
+ {
258236
+ "epoch": 1006.38,
258237
+ "learning_rate": 8.015689102564104e-06,
258238
+ "loss": 0.547,
258239
+ "step": 124870
258240
+ },
258241
+ {
258242
+ "epoch": 1006.42,
258243
+ "learning_rate": 8.015608974358976e-06,
258244
+ "loss": 0.9755,
258245
+ "step": 124875
258246
+ },
258247
+ {
258248
+ "epoch": 1006.46,
258249
+ "learning_rate": 8.015528846153847e-06,
258250
+ "loss": 0.2603,
258251
+ "step": 124880
258252
+ },
258253
+ {
258254
+ "epoch": 1006.5,
258255
+ "learning_rate": 8.015448717948718e-06,
258256
+ "loss": 0.3211,
258257
+ "step": 124885
258258
+ },
258259
+ {
258260
+ "epoch": 1006.54,
258261
+ "learning_rate": 8.015368589743591e-06,
258262
+ "loss": 0.2985,
258263
+ "step": 124890
258264
+ },
258265
+ {
258266
+ "epoch": 1006.58,
258267
+ "learning_rate": 8.015288461538461e-06,
258268
+ "loss": 0.577,
258269
+ "step": 124895
258270
+ },
258271
+ {
258272
+ "epoch": 1006.62,
258273
+ "learning_rate": 8.015208333333334e-06,
258274
+ "loss": 1.0838,
258275
+ "step": 124900
258276
+ },
258277
+ {
258278
+ "epoch": 1006.66,
258279
+ "learning_rate": 8.015128205128205e-06,
258280
+ "loss": 0.2679,
258281
+ "step": 124905
258282
+ },
258283
+ {
258284
+ "epoch": 1006.7,
258285
+ "learning_rate": 8.015048076923077e-06,
258286
+ "loss": 0.2726,
258287
+ "step": 124910
258288
+ },
258289
+ {
258290
+ "epoch": 1006.74,
258291
+ "learning_rate": 8.01496794871795e-06,
258292
+ "loss": 0.3246,
258293
+ "step": 124915
258294
+ },
258295
+ {
258296
+ "epoch": 1006.78,
258297
+ "learning_rate": 8.014887820512821e-06,
258298
+ "loss": 0.5986,
258299
+ "step": 124920
258300
+ },
258301
+ {
258302
+ "epoch": 1006.82,
258303
+ "learning_rate": 8.014807692307693e-06,
258304
+ "loss": 1.0309,
258305
+ "step": 124925
258306
+ },
258307
+ {
258308
+ "epoch": 1006.86,
258309
+ "learning_rate": 8.014727564102564e-06,
258310
+ "loss": 0.2936,
258311
+ "step": 124930
258312
+ },
258313
+ {
258314
+ "epoch": 1006.9,
258315
+ "learning_rate": 8.014647435897437e-06,
258316
+ "loss": 0.3178,
258317
+ "step": 124935
258318
+ },
258319
+ {
258320
+ "epoch": 1006.94,
258321
+ "learning_rate": 8.014567307692308e-06,
258322
+ "loss": 0.3584,
258323
+ "step": 124940
258324
+ },
258325
+ {
258326
+ "epoch": 1006.98,
258327
+ "learning_rate": 8.01448717948718e-06,
258328
+ "loss": 0.716,
258329
+ "step": 124945
258330
+ },
258331
+ {
258332
+ "epoch": 1007.0,
258333
+ "eval_loss": 0.3555839955806732,
258334
+ "eval_runtime": 36.5503,
258335
+ "eval_samples_per_second": 22.955,
258336
+ "eval_steps_per_second": 0.739,
258337
+ "eval_wer": 0.17162025681719809,
258338
+ "step": 124947
258339
  }
258340
  ],
258341
+ "max_steps": 620000,
258342
  "num_train_epochs": 5000,
258343
+ "total_flos": 3.516146523707203e+20,
258344
  "trial_name": null,
258345
  "trial_params": null
258346
  }
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-124947}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7c4a3231088e372b641bcbecefb9d8f7b78420423afc4a419e40f17dac6b497
3
  size 722165393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63c3bb2813226bca109201f437ebf67c43cfef3ce4b37fc6773b00451c6afd3
3
  size 722165393
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4991cf9566d39f391967058c555dd2bd7c744f264326296fe7df7bd860a29de5
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f31e81ef246b6d82c092a74a34ec533837f2f19d8bb978eb447cf77e84cd3ccd
3
  size 377909911
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32df719a42d7f5b4cb1229df3210e6e2ee9affa77432670bf3159df58a6ba2ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b2eee16926d8653ad01b2f3c1ecdf61861739c3ae6c1de09f83de384c7912f
3
  size 14503
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc1a7a79ad993600c86fc5a033565a49ad8634a8131af2e3278b03b1b2b7fdb
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27dcd6b89e9ca24fa782487c54a3e4aa32dcd9bb3a40752f5c73ade334c653ba
3
  size 559
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d851a34b78ff2a34ac3461ac6fc45d251f315b586c0be20f8792ee63881fd06e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38924a63617ddcff0e33da0ada89550be5f4f3a143fab0f47de35854193ff038
3
  size 623
model-bin/finetune/base/{checkpoint-123203 β†’ checkpoint-125446}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.1735723097017633,
3
- "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-123203",
4
- "epoch": 984.9960159362549,
5
- "global_step": 123203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -256116,11 +256116,2867 @@
256116
  "eval_steps_per_second": 0.751,
256117
  "eval_wer": 0.1735723097017633,
256118
  "step": 123203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256119
  }
256120
  ],
256121
- "max_steps": 625000,
256122
  "num_train_epochs": 5000,
256123
- "total_flos": 3.467197138225218e+20,
256124
  "trial_name": null,
256125
  "trial_params": null
256126
  }
 
1
  {
2
+ "best_metric": 0.17162025681719809,
3
+ "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-124947",
4
+ "epoch": 1010.995983935743,
5
+ "global_step": 125446,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
256116
  "eval_steps_per_second": 0.751,
256117
  "eval_wer": 0.1735723097017633,
256118
  "step": 123203
256119
+ },
256120
+ {
256121
+ "epoch": 993.02,
256122
+ "learning_rate": 8.042371794871795e-06,
256123
+ "loss": 0.4265,
256124
+ "step": 123205
256125
+ },
256126
+ {
256127
+ "epoch": 993.06,
256128
+ "learning_rate": 8.042291666666668e-06,
256129
+ "loss": 0.2717,
256130
+ "step": 123210
256131
+ },
256132
+ {
256133
+ "epoch": 993.1,
256134
+ "learning_rate": 8.042211538461539e-06,
256135
+ "loss": 0.2704,
256136
+ "step": 123215
256137
+ },
256138
+ {
256139
+ "epoch": 993.14,
256140
+ "learning_rate": 8.04213141025641e-06,
256141
+ "loss": 0.3523,
256142
+ "step": 123220
256143
+ },
256144
+ {
256145
+ "epoch": 993.18,
256146
+ "learning_rate": 8.042051282051283e-06,
256147
+ "loss": 0.6089,
256148
+ "step": 123225
256149
+ },
256150
+ {
256151
+ "epoch": 993.22,
256152
+ "learning_rate": 8.041971153846155e-06,
256153
+ "loss": 1.0852,
256154
+ "step": 123230
256155
+ },
256156
+ {
256157
+ "epoch": 993.26,
256158
+ "learning_rate": 8.041891025641026e-06,
256159
+ "loss": 0.2695,
256160
+ "step": 123235
256161
+ },
256162
+ {
256163
+ "epoch": 993.3,
256164
+ "learning_rate": 8.041810897435897e-06,
256165
+ "loss": 0.2521,
256166
+ "step": 123240
256167
+ },
256168
+ {
256169
+ "epoch": 993.34,
256170
+ "learning_rate": 8.04173076923077e-06,
256171
+ "loss": 0.3001,
256172
+ "step": 123245
256173
+ },
256174
+ {
256175
+ "epoch": 993.38,
256176
+ "learning_rate": 8.041650641025642e-06,
256177
+ "loss": 0.6213,
256178
+ "step": 123250
256179
+ },
256180
+ {
256181
+ "epoch": 993.42,
256182
+ "learning_rate": 8.041570512820513e-06,
256183
+ "loss": 1.2583,
256184
+ "step": 123255
256185
+ },
256186
+ {
256187
+ "epoch": 993.46,
256188
+ "learning_rate": 8.041490384615386e-06,
256189
+ "loss": 0.2463,
256190
+ "step": 123260
256191
+ },
256192
+ {
256193
+ "epoch": 993.5,
256194
+ "learning_rate": 8.041410256410258e-06,
256195
+ "loss": 0.2687,
256196
+ "step": 123265
256197
+ },
256198
+ {
256199
+ "epoch": 993.54,
256200
+ "learning_rate": 8.041330128205129e-06,
256201
+ "loss": 0.3732,
256202
+ "step": 123270
256203
+ },
256204
+ {
256205
+ "epoch": 993.58,
256206
+ "learning_rate": 8.04125e-06,
256207
+ "loss": 0.6509,
256208
+ "step": 123275
256209
+ },
256210
+ {
256211
+ "epoch": 993.62,
256212
+ "learning_rate": 8.041169871794873e-06,
256213
+ "loss": 0.9863,
256214
+ "step": 123280
256215
+ },
256216
+ {
256217
+ "epoch": 993.66,
256218
+ "learning_rate": 8.041089743589745e-06,
256219
+ "loss": 0.2735,
256220
+ "step": 123285
256221
+ },
256222
+ {
256223
+ "epoch": 993.7,
256224
+ "learning_rate": 8.041009615384616e-06,
256225
+ "loss": 0.3355,
256226
+ "step": 123290
256227
+ },
256228
+ {
256229
+ "epoch": 993.74,
256230
+ "learning_rate": 8.040929487179487e-06,
256231
+ "loss": 0.3496,
256232
+ "step": 123295
256233
+ },
256234
+ {
256235
+ "epoch": 993.78,
256236
+ "learning_rate": 8.04084935897436e-06,
256237
+ "loss": 0.581,
256238
+ "step": 123300
256239
+ },
256240
+ {
256241
+ "epoch": 993.82,
256242
+ "learning_rate": 8.04076923076923e-06,
256243
+ "loss": 1.0977,
256244
+ "step": 123305
256245
+ },
256246
+ {
256247
+ "epoch": 993.86,
256248
+ "learning_rate": 8.040689102564103e-06,
256249
+ "loss": 0.2883,
256250
+ "step": 123310
256251
+ },
256252
+ {
256253
+ "epoch": 993.9,
256254
+ "learning_rate": 8.040608974358976e-06,
256255
+ "loss": 0.2839,
256256
+ "step": 123315
256257
+ },
256258
+ {
256259
+ "epoch": 993.94,
256260
+ "learning_rate": 8.040528846153846e-06,
256261
+ "loss": 0.3296,
256262
+ "step": 123320
256263
+ },
256264
+ {
256265
+ "epoch": 993.98,
256266
+ "learning_rate": 8.040448717948719e-06,
256267
+ "loss": 0.6853,
256268
+ "step": 123325
256269
+ },
256270
+ {
256271
+ "epoch": 994.0,
256272
+ "eval_loss": 0.3523680567741394,
256273
+ "eval_runtime": 36.441,
256274
+ "eval_samples_per_second": 23.106,
256275
+ "eval_steps_per_second": 0.741,
256276
+ "eval_wer": 0.17611336032388664,
256277
+ "step": 123327
256278
+ },
256279
+ {
256280
+ "epoch": 986.02,
256281
+ "learning_rate": 8.04036858974359e-06,
256282
+ "loss": 0.4323,
256283
+ "step": 123330
256284
+ },
256285
+ {
256286
+ "epoch": 986.06,
256287
+ "learning_rate": 8.040288461538462e-06,
256288
+ "loss": 0.2315,
256289
+ "step": 123335
256290
+ },
256291
+ {
256292
+ "epoch": 986.1,
256293
+ "learning_rate": 8.040208333333333e-06,
256294
+ "loss": 0.2802,
256295
+ "step": 123340
256296
+ },
256297
+ {
256298
+ "epoch": 986.14,
256299
+ "learning_rate": 8.040128205128206e-06,
256300
+ "loss": 0.3715,
256301
+ "step": 123345
256302
+ },
256303
+ {
256304
+ "epoch": 986.18,
256305
+ "learning_rate": 8.040048076923077e-06,
256306
+ "loss": 0.7037,
256307
+ "step": 123350
256308
+ },
256309
+ {
256310
+ "epoch": 986.22,
256311
+ "learning_rate": 8.039967948717949e-06,
256312
+ "loss": 0.8474,
256313
+ "step": 123355
256314
+ },
256315
+ {
256316
+ "epoch": 986.26,
256317
+ "learning_rate": 8.039887820512822e-06,
256318
+ "loss": 0.329,
256319
+ "step": 123360
256320
+ },
256321
+ {
256322
+ "epoch": 986.3,
256323
+ "learning_rate": 8.039807692307693e-06,
256324
+ "loss": 0.3398,
256325
+ "step": 123365
256326
+ },
256327
+ {
256328
+ "epoch": 986.34,
256329
+ "learning_rate": 8.039727564102565e-06,
256330
+ "loss": 0.3763,
256331
+ "step": 123370
256332
+ },
256333
+ {
256334
+ "epoch": 986.38,
256335
+ "learning_rate": 8.039647435897436e-06,
256336
+ "loss": 0.6972,
256337
+ "step": 123375
256338
+ },
256339
+ {
256340
+ "epoch": 986.42,
256341
+ "learning_rate": 8.039567307692309e-06,
256342
+ "loss": 0.944,
256343
+ "step": 123380
256344
+ },
256345
+ {
256346
+ "epoch": 986.46,
256347
+ "learning_rate": 8.03948717948718e-06,
256348
+ "loss": 0.4065,
256349
+ "step": 123385
256350
+ },
256351
+ {
256352
+ "epoch": 986.5,
256353
+ "learning_rate": 8.039407051282052e-06,
256354
+ "loss": 0.3496,
256355
+ "step": 123390
256356
+ },
256357
+ {
256358
+ "epoch": 986.54,
256359
+ "learning_rate": 8.039326923076923e-06,
256360
+ "loss": 0.3175,
256361
+ "step": 123395
256362
+ },
256363
+ {
256364
+ "epoch": 986.58,
256365
+ "learning_rate": 8.039246794871796e-06,
256366
+ "loss": 0.7698,
256367
+ "step": 123400
256368
+ },
256369
+ {
256370
+ "epoch": 986.62,
256371
+ "learning_rate": 8.039166666666667e-06,
256372
+ "loss": 0.781,
256373
+ "step": 123405
256374
+ },
256375
+ {
256376
+ "epoch": 986.66,
256377
+ "learning_rate": 8.039086538461539e-06,
256378
+ "loss": 0.2947,
256379
+ "step": 123410
256380
+ },
256381
+ {
256382
+ "epoch": 986.7,
256383
+ "learning_rate": 8.039006410256412e-06,
256384
+ "loss": 0.2531,
256385
+ "step": 123415
256386
+ },
256387
+ {
256388
+ "epoch": 986.74,
256389
+ "learning_rate": 8.038926282051283e-06,
256390
+ "loss": 0.3222,
256391
+ "step": 123420
256392
+ },
256393
+ {
256394
+ "epoch": 986.78,
256395
+ "learning_rate": 8.038846153846155e-06,
256396
+ "loss": 0.7026,
256397
+ "step": 123425
256398
+ },
256399
+ {
256400
+ "epoch": 986.82,
256401
+ "learning_rate": 8.038766025641026e-06,
256402
+ "loss": 0.8483,
256403
+ "step": 123430
256404
+ },
256405
+ {
256406
+ "epoch": 986.86,
256407
+ "learning_rate": 8.038685897435899e-06,
256408
+ "loss": 0.2922,
256409
+ "step": 123435
256410
+ },
256411
+ {
256412
+ "epoch": 986.9,
256413
+ "learning_rate": 8.038605769230769e-06,
256414
+ "loss": 0.292,
256415
+ "step": 123440
256416
+ },
256417
+ {
256418
+ "epoch": 986.94,
256419
+ "learning_rate": 8.038525641025642e-06,
256420
+ "loss": 0.357,
256421
+ "step": 123445
256422
+ },
256423
+ {
256424
+ "epoch": 986.98,
256425
+ "learning_rate": 8.038445512820513e-06,
256426
+ "loss": 0.8068,
256427
+ "step": 123450
256428
+ },
256429
+ {
256430
+ "epoch": 987.0,
256431
+ "eval_loss": 0.42160657048225403,
256432
+ "eval_runtime": 35.9543,
256433
+ "eval_samples_per_second": 23.419,
256434
+ "eval_steps_per_second": 0.751,
256435
+ "eval_wer": 0.1864942317583952,
256436
+ "step": 123452
256437
+ },
256438
+ {
256439
+ "epoch": 987.02,
256440
+ "learning_rate": 8.038365384615384e-06,
256441
+ "loss": 0.3205,
256442
+ "step": 123455
256443
+ },
256444
+ {
256445
+ "epoch": 987.06,
256446
+ "learning_rate": 8.038285256410258e-06,
256447
+ "loss": 0.3331,
256448
+ "step": 123460
256449
+ },
256450
+ {
256451
+ "epoch": 987.1,
256452
+ "learning_rate": 8.038205128205129e-06,
256453
+ "loss": 0.2965,
256454
+ "step": 123465
256455
+ },
256456
+ {
256457
+ "epoch": 987.14,
256458
+ "learning_rate": 8.038125e-06,
256459
+ "loss": 0.3104,
256460
+ "step": 123470
256461
+ },
256462
+ {
256463
+ "epoch": 987.18,
256464
+ "learning_rate": 8.038044871794872e-06,
256465
+ "loss": 0.7666,
256466
+ "step": 123475
256467
+ },
256468
+ {
256469
+ "epoch": 987.22,
256470
+ "learning_rate": 8.037964743589745e-06,
256471
+ "loss": 0.8616,
256472
+ "step": 123480
256473
+ },
256474
+ {
256475
+ "epoch": 987.26,
256476
+ "learning_rate": 8.037884615384616e-06,
256477
+ "loss": 0.2989,
256478
+ "step": 123485
256479
+ },
256480
+ {
256481
+ "epoch": 987.3,
256482
+ "learning_rate": 8.037804487179487e-06,
256483
+ "loss": 0.2439,
256484
+ "step": 123490
256485
+ },
256486
+ {
256487
+ "epoch": 987.34,
256488
+ "learning_rate": 8.037724358974359e-06,
256489
+ "loss": 0.3469,
256490
+ "step": 123495
256491
+ },
256492
+ {
256493
+ "epoch": 987.38,
256494
+ "learning_rate": 8.037644230769232e-06,
256495
+ "loss": 0.738,
256496
+ "step": 123500
256497
+ },
256498
+ {
256499
+ "epoch": 987.42,
256500
+ "learning_rate": 8.037564102564103e-06,
256501
+ "loss": 0.8478,
256502
+ "step": 123505
256503
+ },
256504
+ {
256505
+ "epoch": 987.46,
256506
+ "learning_rate": 8.037483974358974e-06,
256507
+ "loss": 0.3079,
256508
+ "step": 123510
256509
+ },
256510
+ {
256511
+ "epoch": 987.5,
256512
+ "learning_rate": 8.037403846153848e-06,
256513
+ "loss": 0.3345,
256514
+ "step": 123515
256515
+ },
256516
+ {
256517
+ "epoch": 987.54,
256518
+ "learning_rate": 8.037323717948719e-06,
256519
+ "loss": 0.3569,
256520
+ "step": 123520
256521
+ },
256522
+ {
256523
+ "epoch": 987.58,
256524
+ "learning_rate": 8.03724358974359e-06,
256525
+ "loss": 0.6428,
256526
+ "step": 123525
256527
+ },
256528
+ {
256529
+ "epoch": 987.62,
256530
+ "learning_rate": 8.037163461538462e-06,
256531
+ "loss": 0.8882,
256532
+ "step": 123530
256533
+ },
256534
+ {
256535
+ "epoch": 987.66,
256536
+ "learning_rate": 8.037083333333335e-06,
256537
+ "loss": 0.2542,
256538
+ "step": 123535
256539
+ },
256540
+ {
256541
+ "epoch": 987.7,
256542
+ "learning_rate": 8.037003205128206e-06,
256543
+ "loss": 0.2579,
256544
+ "step": 123540
256545
+ },
256546
+ {
256547
+ "epoch": 987.74,
256548
+ "learning_rate": 8.036923076923077e-06,
256549
+ "loss": 0.3307,
256550
+ "step": 123545
256551
+ },
256552
+ {
256553
+ "epoch": 987.78,
256554
+ "learning_rate": 8.036842948717949e-06,
256555
+ "loss": 0.6938,
256556
+ "step": 123550
256557
+ },
256558
+ {
256559
+ "epoch": 987.82,
256560
+ "learning_rate": 8.036762820512822e-06,
256561
+ "loss": 0.7395,
256562
+ "step": 123555
256563
+ },
256564
+ {
256565
+ "epoch": 987.86,
256566
+ "learning_rate": 8.036682692307693e-06,
256567
+ "loss": 0.2917,
256568
+ "step": 123560
256569
+ },
256570
+ {
256571
+ "epoch": 987.9,
256572
+ "learning_rate": 8.036602564102565e-06,
256573
+ "loss": 0.3108,
256574
+ "step": 123565
256575
+ },
256576
+ {
256577
+ "epoch": 987.94,
256578
+ "learning_rate": 8.036522435897438e-06,
256579
+ "loss": 0.3496,
256580
+ "step": 123570
256581
+ },
256582
+ {
256583
+ "epoch": 987.98,
256584
+ "learning_rate": 8.036442307692309e-06,
256585
+ "loss": 0.7931,
256586
+ "step": 123575
256587
+ },
256588
+ {
256589
+ "epoch": 988.0,
256590
+ "eval_loss": 0.3540716767311096,
256591
+ "eval_runtime": 36.6095,
256592
+ "eval_samples_per_second": 23.0,
256593
+ "eval_steps_per_second": 0.738,
256594
+ "eval_wer": 0.17904090475141368,
256595
+ "step": 123577
256596
+ },
256597
+ {
256598
+ "epoch": 996.02,
256599
+ "learning_rate": 8.03636217948718e-06,
256600
+ "loss": 0.2697,
256601
+ "step": 123580
256602
+ },
256603
+ {
256604
+ "epoch": 996.06,
256605
+ "learning_rate": 8.036282051282052e-06,
256606
+ "loss": 0.2864,
256607
+ "step": 123585
256608
+ },
256609
+ {
256610
+ "epoch": 996.1,
256611
+ "learning_rate": 8.036201923076925e-06,
256612
+ "loss": 0.2807,
256613
+ "step": 123590
256614
+ },
256615
+ {
256616
+ "epoch": 996.15,
256617
+ "learning_rate": 8.036121794871794e-06,
256618
+ "loss": 0.3349,
256619
+ "step": 123595
256620
+ },
256621
+ {
256622
+ "epoch": 996.19,
256623
+ "learning_rate": 8.036041666666667e-06,
256624
+ "loss": 0.7529,
256625
+ "step": 123600
256626
+ },
256627
+ {
256628
+ "epoch": 996.23,
256629
+ "learning_rate": 8.035961538461539e-06,
256630
+ "loss": 0.8701,
256631
+ "step": 123605
256632
+ },
256633
+ {
256634
+ "epoch": 996.27,
256635
+ "learning_rate": 8.03588141025641e-06,
256636
+ "loss": 0.2396,
256637
+ "step": 123610
256638
+ },
256639
+ {
256640
+ "epoch": 996.31,
256641
+ "learning_rate": 8.035801282051283e-06,
256642
+ "loss": 0.3098,
256643
+ "step": 123615
256644
+ },
256645
+ {
256646
+ "epoch": 996.35,
256647
+ "learning_rate": 8.035721153846155e-06,
256648
+ "loss": 0.3529,
256649
+ "step": 123620
256650
+ },
256651
+ {
256652
+ "epoch": 996.39,
256653
+ "learning_rate": 8.035641025641026e-06,
256654
+ "loss": 0.7107,
256655
+ "step": 123625
256656
+ },
256657
+ {
256658
+ "epoch": 996.43,
256659
+ "learning_rate": 8.035560897435897e-06,
256660
+ "loss": 0.8661,
256661
+ "step": 123630
256662
+ },
256663
+ {
256664
+ "epoch": 996.47,
256665
+ "learning_rate": 8.03548076923077e-06,
256666
+ "loss": 0.2525,
256667
+ "step": 123635
256668
+ },
256669
+ {
256670
+ "epoch": 996.51,
256671
+ "learning_rate": 8.035400641025642e-06,
256672
+ "loss": 0.277,
256673
+ "step": 123640
256674
+ },
256675
+ {
256676
+ "epoch": 996.55,
256677
+ "learning_rate": 8.035320512820513e-06,
256678
+ "loss": 0.3795,
256679
+ "step": 123645
256680
+ },
256681
+ {
256682
+ "epoch": 996.59,
256683
+ "learning_rate": 8.035240384615384e-06,
256684
+ "loss": 0.6803,
256685
+ "step": 123650
256686
+ },
256687
+ {
256688
+ "epoch": 996.63,
256689
+ "learning_rate": 8.035160256410257e-06,
256690
+ "loss": 0.8381,
256691
+ "step": 123655
256692
+ },
256693
+ {
256694
+ "epoch": 996.67,
256695
+ "learning_rate": 8.035080128205129e-06,
256696
+ "loss": 0.2627,
256697
+ "step": 123660
256698
+ },
256699
+ {
256700
+ "epoch": 996.71,
256701
+ "learning_rate": 8.035e-06,
256702
+ "loss": 0.2651,
256703
+ "step": 123665
256704
+ },
256705
+ {
256706
+ "epoch": 996.75,
256707
+ "learning_rate": 8.034919871794873e-06,
256708
+ "loss": 0.3103,
256709
+ "step": 123670
256710
+ },
256711
+ {
256712
+ "epoch": 996.79,
256713
+ "learning_rate": 8.034839743589745e-06,
256714
+ "loss": 0.7412,
256715
+ "step": 123675
256716
+ },
256717
+ {
256718
+ "epoch": 996.83,
256719
+ "learning_rate": 8.034759615384616e-06,
256720
+ "loss": 0.816,
256721
+ "step": 123680
256722
+ },
256723
+ {
256724
+ "epoch": 996.87,
256725
+ "learning_rate": 8.034679487179487e-06,
256726
+ "loss": 0.2746,
256727
+ "step": 123685
256728
+ },
256729
+ {
256730
+ "epoch": 996.91,
256731
+ "learning_rate": 8.03459935897436e-06,
256732
+ "loss": 0.3075,
256733
+ "step": 123690
256734
+ },
256735
+ {
256736
+ "epoch": 996.95,
256737
+ "learning_rate": 8.034519230769232e-06,
256738
+ "loss": 0.4634,
256739
+ "step": 123695
256740
+ },
256741
+ {
256742
+ "epoch": 996.99,
256743
+ "learning_rate": 8.034439102564103e-06,
256744
+ "loss": 0.9143,
256745
+ "step": 123700
256746
+ },
256747
+ {
256748
+ "epoch": 997.0,
256749
+ "eval_loss": 0.35271042585372925,
256750
+ "eval_runtime": 37.4395,
256751
+ "eval_samples_per_second": 22.49,
256752
+ "eval_steps_per_second": 0.721,
256753
+ "eval_wer": 0.18502449265220433,
256754
+ "step": 123701
256755
+ },
256756
+ {
256757
+ "epoch": 989.03,
256758
+ "learning_rate": 8.034358974358974e-06,
256759
+ "loss": 0.3226,
256760
+ "step": 123705
256761
+ },
256762
+ {
256763
+ "epoch": 989.07,
256764
+ "learning_rate": 8.034278846153847e-06,
256765
+ "loss": 0.3369,
256766
+ "step": 123710
256767
+ },
256768
+ {
256769
+ "epoch": 989.11,
256770
+ "learning_rate": 8.034198717948719e-06,
256771
+ "loss": 0.2795,
256772
+ "step": 123715
256773
+ },
256774
+ {
256775
+ "epoch": 989.15,
256776
+ "learning_rate": 8.03411858974359e-06,
256777
+ "loss": 0.407,
256778
+ "step": 123720
256779
+ },
256780
+ {
256781
+ "epoch": 989.19,
256782
+ "learning_rate": 8.034038461538463e-06,
256783
+ "loss": 0.9145,
256784
+ "step": 123725
256785
+ },
256786
+ {
256787
+ "epoch": 989.23,
256788
+ "learning_rate": 8.033958333333335e-06,
256789
+ "loss": 0.6577,
256790
+ "step": 123730
256791
+ },
256792
+ {
256793
+ "epoch": 989.27,
256794
+ "learning_rate": 8.033878205128206e-06,
256795
+ "loss": 0.2938,
256796
+ "step": 123735
256797
+ },
256798
+ {
256799
+ "epoch": 989.31,
256800
+ "learning_rate": 8.033798076923077e-06,
256801
+ "loss": 0.3101,
256802
+ "step": 123740
256803
+ },
256804
+ {
256805
+ "epoch": 989.35,
256806
+ "learning_rate": 8.03371794871795e-06,
256807
+ "loss": 0.3811,
256808
+ "step": 123745
256809
+ },
256810
+ {
256811
+ "epoch": 989.39,
256812
+ "learning_rate": 8.03363782051282e-06,
256813
+ "loss": 0.8656,
256814
+ "step": 123750
256815
+ },
256816
+ {
256817
+ "epoch": 989.43,
256818
+ "learning_rate": 8.033557692307693e-06,
256819
+ "loss": 0.5815,
256820
+ "step": 123755
256821
+ },
256822
+ {
256823
+ "epoch": 989.47,
256824
+ "learning_rate": 8.033477564102566e-06,
256825
+ "loss": 0.2756,
256826
+ "step": 123760
256827
+ },
256828
+ {
256829
+ "epoch": 989.51,
256830
+ "learning_rate": 8.033397435897436e-06,
256831
+ "loss": 0.3436,
256832
+ "step": 123765
256833
+ },
256834
+ {
256835
+ "epoch": 989.55,
256836
+ "learning_rate": 8.033317307692309e-06,
256837
+ "loss": 0.444,
256838
+ "step": 123770
256839
+ },
256840
+ {
256841
+ "epoch": 989.59,
256842
+ "learning_rate": 8.03323717948718e-06,
256843
+ "loss": 0.8274,
256844
+ "step": 123775
256845
+ },
256846
+ {
256847
+ "epoch": 989.63,
256848
+ "learning_rate": 8.033157051282052e-06,
256849
+ "loss": 0.6641,
256850
+ "step": 123780
256851
+ },
256852
+ {
256853
+ "epoch": 989.67,
256854
+ "learning_rate": 8.033076923076923e-06,
256855
+ "loss": 0.2923,
256856
+ "step": 123785
256857
+ },
256858
+ {
256859
+ "epoch": 989.71,
256860
+ "learning_rate": 8.032996794871796e-06,
256861
+ "loss": 0.3045,
256862
+ "step": 123790
256863
+ },
256864
+ {
256865
+ "epoch": 989.75,
256866
+ "learning_rate": 8.032916666666667e-06,
256867
+ "loss": 0.5925,
256868
+ "step": 123795
256869
+ },
256870
+ {
256871
+ "epoch": 989.79,
256872
+ "learning_rate": 8.032836538461539e-06,
256873
+ "loss": 0.9411,
256874
+ "step": 123800
256875
+ },
256876
+ {
256877
+ "epoch": 989.83,
256878
+ "learning_rate": 8.03275641025641e-06,
256879
+ "loss": 0.6954,
256880
+ "step": 123805
256881
+ },
256882
+ {
256883
+ "epoch": 989.87,
256884
+ "learning_rate": 8.032676282051283e-06,
256885
+ "loss": 0.2363,
256886
+ "step": 123810
256887
+ },
256888
+ {
256889
+ "epoch": 989.91,
256890
+ "learning_rate": 8.032596153846154e-06,
256891
+ "loss": 0.3087,
256892
+ "step": 123815
256893
+ },
256894
+ {
256895
+ "epoch": 989.95,
256896
+ "learning_rate": 8.032516025641026e-06,
256897
+ "loss": 0.4986,
256898
+ "step": 123820
256899
+ },
256900
+ {
256901
+ "epoch": 989.99,
256902
+ "learning_rate": 8.032435897435899e-06,
256903
+ "loss": 0.9063,
256904
+ "step": 123825
256905
+ },
256906
+ {
256907
+ "epoch": 990.0,
256908
+ "eval_loss": 0.37780308723449707,
256909
+ "eval_runtime": 36.1129,
256910
+ "eval_samples_per_second": 23.316,
256911
+ "eval_steps_per_second": 0.748,
256912
+ "eval_wer": 0.18303048065650646,
256913
+ "step": 123826
256914
+ },
256915
+ {
256916
+ "epoch": 990.03,
256917
+ "learning_rate": 8.03235576923077e-06,
256918
+ "loss": 0.3242,
256919
+ "step": 123830
256920
+ },
256921
+ {
256922
+ "epoch": 990.07,
256923
+ "learning_rate": 8.032275641025642e-06,
256924
+ "loss": 0.2932,
256925
+ "step": 123835
256926
+ },
256927
+ {
256928
+ "epoch": 990.11,
256929
+ "learning_rate": 8.032195512820513e-06,
256930
+ "loss": 0.334,
256931
+ "step": 123840
256932
+ },
256933
+ {
256934
+ "epoch": 990.15,
256935
+ "learning_rate": 8.032115384615386e-06,
256936
+ "loss": 0.3641,
256937
+ "step": 123845
256938
+ },
256939
+ {
256940
+ "epoch": 990.19,
256941
+ "learning_rate": 8.032035256410257e-06,
256942
+ "loss": 0.9662,
256943
+ "step": 123850
256944
+ },
256945
+ {
256946
+ "epoch": 990.23,
256947
+ "learning_rate": 8.031955128205129e-06,
256948
+ "loss": 0.7018,
256949
+ "step": 123855
256950
+ },
256951
+ {
256952
+ "epoch": 990.27,
256953
+ "learning_rate": 8.031875000000002e-06,
256954
+ "loss": 0.2948,
256955
+ "step": 123860
256956
+ },
256957
+ {
256958
+ "epoch": 990.31,
256959
+ "learning_rate": 8.031794871794873e-06,
256960
+ "loss": 0.3453,
256961
+ "step": 123865
256962
+ },
256963
+ {
256964
+ "epoch": 990.35,
256965
+ "learning_rate": 8.031714743589744e-06,
256966
+ "loss": 0.439,
256967
+ "step": 123870
256968
+ },
256969
+ {
256970
+ "epoch": 990.39,
256971
+ "learning_rate": 8.031634615384616e-06,
256972
+ "loss": 1.012,
256973
+ "step": 123875
256974
+ },
256975
+ {
256976
+ "epoch": 990.43,
256977
+ "learning_rate": 8.031554487179489e-06,
256978
+ "loss": 0.6446,
256979
+ "step": 123880
256980
+ },
256981
+ {
256982
+ "epoch": 990.47,
256983
+ "learning_rate": 8.031474358974359e-06,
256984
+ "loss": 0.2602,
256985
+ "step": 123885
256986
+ },
256987
+ {
256988
+ "epoch": 990.51,
256989
+ "learning_rate": 8.031394230769232e-06,
256990
+ "loss": 0.3188,
256991
+ "step": 123890
256992
+ },
256993
+ {
256994
+ "epoch": 990.55,
256995
+ "learning_rate": 8.031314102564103e-06,
256996
+ "loss": 0.4144,
256997
+ "step": 123895
256998
+ },
256999
+ {
257000
+ "epoch": 990.59,
257001
+ "learning_rate": 8.031233974358974e-06,
257002
+ "loss": 0.7588,
257003
+ "step": 123900
257004
+ },
257005
+ {
257006
+ "epoch": 990.63,
257007
+ "learning_rate": 8.031153846153846e-06,
257008
+ "loss": 0.7133,
257009
+ "step": 123905
257010
+ },
257011
+ {
257012
+ "epoch": 990.67,
257013
+ "learning_rate": 8.031073717948719e-06,
257014
+ "loss": 0.2741,
257015
+ "step": 123910
257016
+ },
257017
+ {
257018
+ "epoch": 990.71,
257019
+ "learning_rate": 8.03099358974359e-06,
257020
+ "loss": 0.385,
257021
+ "step": 123915
257022
+ },
257023
+ {
257024
+ "epoch": 990.75,
257025
+ "learning_rate": 8.030913461538461e-06,
257026
+ "loss": 0.4035,
257027
+ "step": 123920
257028
+ },
257029
+ {
257030
+ "epoch": 990.79,
257031
+ "learning_rate": 8.030833333333335e-06,
257032
+ "loss": 0.9382,
257033
+ "step": 123925
257034
+ },
257035
+ {
257036
+ "epoch": 990.83,
257037
+ "learning_rate": 8.030753205128206e-06,
257038
+ "loss": 0.6219,
257039
+ "step": 123930
257040
+ },
257041
+ {
257042
+ "epoch": 990.87,
257043
+ "learning_rate": 8.030673076923077e-06,
257044
+ "loss": 0.2894,
257045
+ "step": 123935
257046
+ },
257047
+ {
257048
+ "epoch": 990.91,
257049
+ "learning_rate": 8.030592948717949e-06,
257050
+ "loss": 0.3362,
257051
+ "step": 123940
257052
+ },
257053
+ {
257054
+ "epoch": 990.95,
257055
+ "learning_rate": 8.030512820512822e-06,
257056
+ "loss": 0.4499,
257057
+ "step": 123945
257058
+ },
257059
+ {
257060
+ "epoch": 990.99,
257061
+ "learning_rate": 8.030432692307693e-06,
257062
+ "loss": 0.9499,
257063
+ "step": 123950
257064
+ },
257065
+ {
257066
+ "epoch": 991.0,
257067
+ "eval_loss": 0.38879507780075073,
257068
+ "eval_runtime": 36.3541,
257069
+ "eval_samples_per_second": 23.134,
257070
+ "eval_steps_per_second": 0.743,
257071
+ "eval_wer": 0.17872121256714002,
257072
+ "step": 123951
257073
+ },
257074
+ {
257075
+ "epoch": 991.03,
257076
+ "learning_rate": 8.030352564102564e-06,
257077
+ "loss": 0.3227,
257078
+ "step": 123955
257079
+ },
257080
+ {
257081
+ "epoch": 991.07,
257082
+ "learning_rate": 8.030272435897437e-06,
257083
+ "loss": 0.275,
257084
+ "step": 123960
257085
+ },
257086
+ {
257087
+ "epoch": 991.11,
257088
+ "learning_rate": 8.030192307692309e-06,
257089
+ "loss": 0.3181,
257090
+ "step": 123965
257091
+ },
257092
+ {
257093
+ "epoch": 991.15,
257094
+ "learning_rate": 8.03011217948718e-06,
257095
+ "loss": 0.3956,
257096
+ "step": 123970
257097
+ },
257098
+ {
257099
+ "epoch": 991.19,
257100
+ "learning_rate": 8.030032051282051e-06,
257101
+ "loss": 0.9103,
257102
+ "step": 123975
257103
+ },
257104
+ {
257105
+ "epoch": 991.23,
257106
+ "learning_rate": 8.029951923076925e-06,
257107
+ "loss": 0.6729,
257108
+ "step": 123980
257109
+ },
257110
+ {
257111
+ "epoch": 991.27,
257112
+ "learning_rate": 8.029871794871796e-06,
257113
+ "loss": 0.2789,
257114
+ "step": 123985
257115
+ },
257116
+ {
257117
+ "epoch": 991.31,
257118
+ "learning_rate": 8.029791666666667e-06,
257119
+ "loss": 0.3114,
257120
+ "step": 123990
257121
+ },
257122
+ {
257123
+ "epoch": 991.35,
257124
+ "learning_rate": 8.029711538461539e-06,
257125
+ "loss": 0.3959,
257126
+ "step": 123995
257127
+ },
257128
+ {
257129
+ "epoch": 991.39,
257130
+ "learning_rate": 8.029631410256412e-06,
257131
+ "loss": 0.9762,
257132
+ "step": 124000
257133
+ },
257134
+ {
257135
+ "epoch": 991.43,
257136
+ "learning_rate": 8.029551282051283e-06,
257137
+ "loss": 0.6035,
257138
+ "step": 124005
257139
+ },
257140
+ {
257141
+ "epoch": 991.47,
257142
+ "learning_rate": 8.029471153846154e-06,
257143
+ "loss": 0.3083,
257144
+ "step": 124010
257145
+ },
257146
+ {
257147
+ "epoch": 991.51,
257148
+ "learning_rate": 8.029391025641027e-06,
257149
+ "loss": 0.3193,
257150
+ "step": 124015
257151
+ },
257152
+ {
257153
+ "epoch": 991.55,
257154
+ "learning_rate": 8.029310897435899e-06,
257155
+ "loss": 0.3466,
257156
+ "step": 124020
257157
+ },
257158
+ {
257159
+ "epoch": 991.59,
257160
+ "learning_rate": 8.02923076923077e-06,
257161
+ "loss": 0.9612,
257162
+ "step": 124025
257163
+ },
257164
+ {
257165
+ "epoch": 991.63,
257166
+ "learning_rate": 8.029150641025642e-06,
257167
+ "loss": 0.6535,
257168
+ "step": 124030
257169
+ },
257170
+ {
257171
+ "epoch": 991.67,
257172
+ "learning_rate": 8.029070512820515e-06,
257173
+ "loss": 0.2786,
257174
+ "step": 124035
257175
+ },
257176
+ {
257177
+ "epoch": 991.71,
257178
+ "learning_rate": 8.028990384615384e-06,
257179
+ "loss": 0.2491,
257180
+ "step": 124040
257181
+ },
257182
+ {
257183
+ "epoch": 991.75,
257184
+ "learning_rate": 8.028910256410257e-06,
257185
+ "loss": 0.4216,
257186
+ "step": 124045
257187
+ },
257188
+ {
257189
+ "epoch": 991.79,
257190
+ "learning_rate": 8.028830128205129e-06,
257191
+ "loss": 0.9788,
257192
+ "step": 124050
257193
+ },
257194
+ {
257195
+ "epoch": 991.83,
257196
+ "learning_rate": 8.02875e-06,
257197
+ "loss": 0.5756,
257198
+ "step": 124055
257199
+ },
257200
+ {
257201
+ "epoch": 991.87,
257202
+ "learning_rate": 8.028669871794873e-06,
257203
+ "loss": 0.2539,
257204
+ "step": 124060
257205
+ },
257206
+ {
257207
+ "epoch": 991.91,
257208
+ "learning_rate": 8.028589743589744e-06,
257209
+ "loss": 0.3347,
257210
+ "step": 124065
257211
+ },
257212
+ {
257213
+ "epoch": 991.95,
257214
+ "learning_rate": 8.028509615384616e-06,
257215
+ "loss": 0.4351,
257216
+ "step": 124070
257217
+ },
257218
+ {
257219
+ "epoch": 991.99,
257220
+ "learning_rate": 8.028429487179487e-06,
257221
+ "loss": 0.8815,
257222
+ "step": 124075
257223
+ },
257224
+ {
257225
+ "epoch": 992.0,
257226
+ "eval_loss": 0.39232033491134644,
257227
+ "eval_runtime": 47.6402,
257228
+ "eval_samples_per_second": 17.653,
257229
+ "eval_steps_per_second": 0.567,
257230
+ "eval_wer": 0.18814771566194716,
257231
+ "step": 124076
257232
+ },
257233
+ {
257234
+ "epoch": 1000.03,
257235
+ "learning_rate": 8.02834935897436e-06,
257236
+ "loss": 0.4001,
257237
+ "step": 124080
257238
+ },
257239
+ {
257240
+ "epoch": 1000.07,
257241
+ "learning_rate": 8.028269230769232e-06,
257242
+ "loss": 0.2938,
257243
+ "step": 124085
257244
+ },
257245
+ {
257246
+ "epoch": 1000.11,
257247
+ "learning_rate": 8.028189102564103e-06,
257248
+ "loss": 0.3341,
257249
+ "step": 124090
257250
+ },
257251
+ {
257252
+ "epoch": 1000.15,
257253
+ "learning_rate": 8.028108974358974e-06,
257254
+ "loss": 0.3691,
257255
+ "step": 124095
257256
+ },
257257
+ {
257258
+ "epoch": 1000.19,
257259
+ "learning_rate": 8.028028846153847e-06,
257260
+ "loss": 0.7611,
257261
+ "step": 124100
257262
+ },
257263
+ {
257264
+ "epoch": 1000.23,
257265
+ "learning_rate": 8.027948717948719e-06,
257266
+ "loss": 0.8048,
257267
+ "step": 124105
257268
+ },
257269
+ {
257270
+ "epoch": 1000.27,
257271
+ "learning_rate": 8.02786858974359e-06,
257272
+ "loss": 0.2299,
257273
+ "step": 124110
257274
+ },
257275
+ {
257276
+ "epoch": 1000.31,
257277
+ "learning_rate": 8.027788461538463e-06,
257278
+ "loss": 0.2911,
257279
+ "step": 124115
257280
+ },
257281
+ {
257282
+ "epoch": 1000.35,
257283
+ "learning_rate": 8.027708333333334e-06,
257284
+ "loss": 0.367,
257285
+ "step": 124120
257286
+ },
257287
+ {
257288
+ "epoch": 1000.39,
257289
+ "learning_rate": 8.027628205128206e-06,
257290
+ "loss": 1.2227,
257291
+ "step": 124125
257292
+ },
257293
+ {
257294
+ "epoch": 1000.43,
257295
+ "learning_rate": 8.027548076923077e-06,
257296
+ "loss": 0.6543,
257297
+ "step": 124130
257298
+ },
257299
+ {
257300
+ "epoch": 1000.47,
257301
+ "learning_rate": 8.02746794871795e-06,
257302
+ "loss": 0.2603,
257303
+ "step": 124135
257304
+ },
257305
+ {
257306
+ "epoch": 1000.51,
257307
+ "learning_rate": 8.027387820512822e-06,
257308
+ "loss": 0.3135,
257309
+ "step": 124140
257310
+ },
257311
+ {
257312
+ "epoch": 1000.55,
257313
+ "learning_rate": 8.027307692307693e-06,
257314
+ "loss": 0.4096,
257315
+ "step": 124145
257316
+ },
257317
+ {
257318
+ "epoch": 1000.59,
257319
+ "learning_rate": 8.027227564102564e-06,
257320
+ "loss": 0.8186,
257321
+ "step": 124150
257322
+ },
257323
+ {
257324
+ "epoch": 1000.63,
257325
+ "learning_rate": 8.027147435897437e-06,
257326
+ "loss": 0.5708,
257327
+ "step": 124155
257328
+ },
257329
+ {
257330
+ "epoch": 1000.67,
257331
+ "learning_rate": 8.027067307692307e-06,
257332
+ "loss": 0.2524,
257333
+ "step": 124160
257334
+ },
257335
+ {
257336
+ "epoch": 1000.71,
257337
+ "learning_rate": 8.02698717948718e-06,
257338
+ "loss": 0.3512,
257339
+ "step": 124165
257340
+ },
257341
+ {
257342
+ "epoch": 1000.76,
257343
+ "learning_rate": 8.026907051282053e-06,
257344
+ "loss": 0.3847,
257345
+ "step": 124170
257346
+ },
257347
+ {
257348
+ "epoch": 1000.8,
257349
+ "learning_rate": 8.026826923076923e-06,
257350
+ "loss": 0.8454,
257351
+ "step": 124175
257352
+ },
257353
+ {
257354
+ "epoch": 1000.84,
257355
+ "learning_rate": 8.026746794871796e-06,
257356
+ "loss": 0.6479,
257357
+ "step": 124180
257358
+ },
257359
+ {
257360
+ "epoch": 1000.88,
257361
+ "learning_rate": 8.026666666666667e-06,
257362
+ "loss": 0.3131,
257363
+ "step": 124185
257364
+ },
257365
+ {
257366
+ "epoch": 1000.92,
257367
+ "learning_rate": 8.026586538461539e-06,
257368
+ "loss": 0.3086,
257369
+ "step": 124190
257370
+ },
257371
+ {
257372
+ "epoch": 1000.96,
257373
+ "learning_rate": 8.02650641025641e-06,
257374
+ "loss": 0.4573,
257375
+ "step": 124195
257376
+ },
257377
+ {
257378
+ "epoch": 1001.0,
257379
+ "learning_rate": 8.026426282051283e-06,
257380
+ "loss": 0.9595,
257381
+ "step": 124200
257382
+ },
257383
+ {
257384
+ "epoch": 1001.0,
257385
+ "eval_loss": 0.3651779294013977,
257386
+ "eval_runtime": 38.1351,
257387
+ "eval_samples_per_second": 22.027,
257388
+ "eval_steps_per_second": 0.708,
257389
+ "eval_wer": 0.17809599194070663,
257390
+ "step": 124200
257391
+ },
257392
+ {
257393
+ "epoch": 1001.04,
257394
+ "learning_rate": 8.026346153846154e-06,
257395
+ "loss": 0.3085,
257396
+ "step": 124205
257397
+ },
257398
+ {
257399
+ "epoch": 1001.08,
257400
+ "learning_rate": 8.026266025641026e-06,
257401
+ "loss": 0.2722,
257402
+ "step": 124210
257403
+ },
257404
+ {
257405
+ "epoch": 1001.12,
257406
+ "learning_rate": 8.026185897435899e-06,
257407
+ "loss": 0.2798,
257408
+ "step": 124215
257409
+ },
257410
+ {
257411
+ "epoch": 1001.16,
257412
+ "learning_rate": 8.02610576923077e-06,
257413
+ "loss": 0.4422,
257414
+ "step": 124220
257415
+ },
257416
+ {
257417
+ "epoch": 1001.2,
257418
+ "learning_rate": 8.026025641025641e-06,
257419
+ "loss": 1.1585,
257420
+ "step": 124225
257421
+ },
257422
+ {
257423
+ "epoch": 1001.24,
257424
+ "learning_rate": 8.025945512820513e-06,
257425
+ "loss": 0.3022,
257426
+ "step": 124230
257427
+ },
257428
+ {
257429
+ "epoch": 1001.28,
257430
+ "learning_rate": 8.025865384615386e-06,
257431
+ "loss": 0.3218,
257432
+ "step": 124235
257433
+ },
257434
+ {
257435
+ "epoch": 1001.32,
257436
+ "learning_rate": 8.025785256410257e-06,
257437
+ "loss": 0.3082,
257438
+ "step": 124240
257439
+ },
257440
+ {
257441
+ "epoch": 1001.36,
257442
+ "learning_rate": 8.025705128205129e-06,
257443
+ "loss": 0.4458,
257444
+ "step": 124245
257445
+ },
257446
+ {
257447
+ "epoch": 1001.4,
257448
+ "learning_rate": 8.025625e-06,
257449
+ "loss": 1.2388,
257450
+ "step": 124250
257451
+ },
257452
+ {
257453
+ "epoch": 1001.44,
257454
+ "learning_rate": 8.025544871794873e-06,
257455
+ "loss": 0.3478,
257456
+ "step": 124255
257457
+ },
257458
+ {
257459
+ "epoch": 1001.48,
257460
+ "learning_rate": 8.025464743589744e-06,
257461
+ "loss": 0.2347,
257462
+ "step": 124260
257463
+ },
257464
+ {
257465
+ "epoch": 1001.52,
257466
+ "learning_rate": 8.025384615384616e-06,
257467
+ "loss": 0.3652,
257468
+ "step": 124265
257469
+ },
257470
+ {
257471
+ "epoch": 1001.56,
257472
+ "learning_rate": 8.025304487179489e-06,
257473
+ "loss": 0.3937,
257474
+ "step": 124270
257475
+ },
257476
+ {
257477
+ "epoch": 1001.6,
257478
+ "learning_rate": 8.02522435897436e-06,
257479
+ "loss": 1.2448,
257480
+ "step": 124275
257481
+ },
257482
+ {
257483
+ "epoch": 1001.64,
257484
+ "learning_rate": 8.025144230769231e-06,
257485
+ "loss": 0.3001,
257486
+ "step": 124280
257487
+ },
257488
+ {
257489
+ "epoch": 1001.68,
257490
+ "learning_rate": 8.025064102564103e-06,
257491
+ "loss": 0.2776,
257492
+ "step": 124285
257493
+ },
257494
+ {
257495
+ "epoch": 1001.72,
257496
+ "learning_rate": 8.024983974358976e-06,
257497
+ "loss": 0.3356,
257498
+ "step": 124290
257499
+ },
257500
+ {
257501
+ "epoch": 1001.76,
257502
+ "learning_rate": 8.024903846153847e-06,
257503
+ "loss": 0.4398,
257504
+ "step": 124295
257505
+ },
257506
+ {
257507
+ "epoch": 1001.8,
257508
+ "learning_rate": 8.024823717948719e-06,
257509
+ "loss": 1.1453,
257510
+ "step": 124300
257511
+ },
257512
+ {
257513
+ "epoch": 1001.84,
257514
+ "learning_rate": 8.02474358974359e-06,
257515
+ "loss": 0.2821,
257516
+ "step": 124305
257517
+ },
257518
+ {
257519
+ "epoch": 1001.88,
257520
+ "learning_rate": 8.024663461538463e-06,
257521
+ "loss": 0.2459,
257522
+ "step": 124310
257523
+ },
257524
+ {
257525
+ "epoch": 1001.92,
257526
+ "learning_rate": 8.024583333333334e-06,
257527
+ "loss": 0.2939,
257528
+ "step": 124315
257529
+ },
257530
+ {
257531
+ "epoch": 1001.96,
257532
+ "learning_rate": 8.024503205128206e-06,
257533
+ "loss": 0.4506,
257534
+ "step": 124320
257535
+ },
257536
+ {
257537
+ "epoch": 1002.0,
257538
+ "eval_loss": 0.3501935303211212,
257539
+ "eval_runtime": 37.1608,
257540
+ "eval_samples_per_second": 22.604,
257541
+ "eval_steps_per_second": 0.727,
257542
+ "eval_wer": 0.18019187765995817,
257543
+ "step": 124324
257544
+ },
257545
+ {
257546
+ "epoch": 994.01,
257547
+ "learning_rate": 8.024423076923079e-06,
257548
+ "loss": 0.3624,
257549
+ "step": 124325
257550
+ },
257551
+ {
257552
+ "epoch": 994.05,
257553
+ "learning_rate": 8.024342948717948e-06,
257554
+ "loss": 0.3273,
257555
+ "step": 124330
257556
+ },
257557
+ {
257558
+ "epoch": 994.09,
257559
+ "learning_rate": 8.024262820512821e-06,
257560
+ "loss": 0.2553,
257561
+ "step": 124335
257562
+ },
257563
+ {
257564
+ "epoch": 994.13,
257565
+ "learning_rate": 8.024182692307693e-06,
257566
+ "loss": 0.3694,
257567
+ "step": 124340
257568
+ },
257569
+ {
257570
+ "epoch": 994.17,
257571
+ "learning_rate": 8.024102564102564e-06,
257572
+ "loss": 0.5139,
257573
+ "step": 124345
257574
+ },
257575
+ {
257576
+ "epoch": 994.21,
257577
+ "learning_rate": 8.024022435897436e-06,
257578
+ "loss": 1.2288,
257579
+ "step": 124350
257580
+ },
257581
+ {
257582
+ "epoch": 994.25,
257583
+ "learning_rate": 8.023942307692309e-06,
257584
+ "loss": 0.3068,
257585
+ "step": 124355
257586
+ },
257587
+ {
257588
+ "epoch": 994.29,
257589
+ "learning_rate": 8.02386217948718e-06,
257590
+ "loss": 0.3395,
257591
+ "step": 124360
257592
+ },
257593
+ {
257594
+ "epoch": 994.33,
257595
+ "learning_rate": 8.023782051282051e-06,
257596
+ "loss": 0.295,
257597
+ "step": 124365
257598
+ },
257599
+ {
257600
+ "epoch": 994.37,
257601
+ "learning_rate": 8.023701923076924e-06,
257602
+ "loss": 0.4902,
257603
+ "step": 124370
257604
+ },
257605
+ {
257606
+ "epoch": 994.41,
257607
+ "learning_rate": 8.023621794871796e-06,
257608
+ "loss": 1.1012,
257609
+ "step": 124375
257610
+ },
257611
+ {
257612
+ "epoch": 994.45,
257613
+ "learning_rate": 8.023541666666667e-06,
257614
+ "loss": 0.3274,
257615
+ "step": 124380
257616
+ },
257617
+ {
257618
+ "epoch": 994.49,
257619
+ "learning_rate": 8.023461538461538e-06,
257620
+ "loss": 0.3741,
257621
+ "step": 124385
257622
+ },
257623
+ {
257624
+ "epoch": 994.53,
257625
+ "learning_rate": 8.023381410256412e-06,
257626
+ "loss": 0.3158,
257627
+ "step": 124390
257628
+ },
257629
+ {
257630
+ "epoch": 994.57,
257631
+ "learning_rate": 8.023301282051283e-06,
257632
+ "loss": 0.6262,
257633
+ "step": 124395
257634
+ },
257635
+ {
257636
+ "epoch": 994.61,
257637
+ "learning_rate": 8.023221153846154e-06,
257638
+ "loss": 1.1513,
257639
+ "step": 124400
257640
+ },
257641
+ {
257642
+ "epoch": 994.65,
257643
+ "learning_rate": 8.023141025641026e-06,
257644
+ "loss": 0.3037,
257645
+ "step": 124405
257646
+ },
257647
+ {
257648
+ "epoch": 994.69,
257649
+ "learning_rate": 8.023060897435899e-06,
257650
+ "loss": 0.2719,
257651
+ "step": 124410
257652
+ },
257653
+ {
257654
+ "epoch": 994.73,
257655
+ "learning_rate": 8.02298076923077e-06,
257656
+ "loss": 0.3865,
257657
+ "step": 124415
257658
+ },
257659
+ {
257660
+ "epoch": 994.77,
257661
+ "learning_rate": 8.022900641025641e-06,
257662
+ "loss": 0.5366,
257663
+ "step": 124420
257664
+ },
257665
+ {
257666
+ "epoch": 994.81,
257667
+ "learning_rate": 8.022820512820514e-06,
257668
+ "loss": 1.0406,
257669
+ "step": 124425
257670
+ },
257671
+ {
257672
+ "epoch": 994.85,
257673
+ "learning_rate": 8.022740384615386e-06,
257674
+ "loss": 0.3121,
257675
+ "step": 124430
257676
+ },
257677
+ {
257678
+ "epoch": 994.89,
257679
+ "learning_rate": 8.022660256410257e-06,
257680
+ "loss": 0.4277,
257681
+ "step": 124435
257682
+ },
257683
+ {
257684
+ "epoch": 994.93,
257685
+ "learning_rate": 8.022580128205128e-06,
257686
+ "loss": 0.2867,
257687
+ "step": 124440
257688
+ },
257689
+ {
257690
+ "epoch": 994.97,
257691
+ "learning_rate": 8.022500000000002e-06,
257692
+ "loss": 0.5523,
257693
+ "step": 124445
257694
+ },
257695
+ {
257696
+ "epoch": 995.0,
257697
+ "eval_loss": 0.37554535269737244,
257698
+ "eval_runtime": 37.1448,
257699
+ "eval_samples_per_second": 22.614,
257700
+ "eval_steps_per_second": 0.727,
257701
+ "eval_wer": 0.178762469278589,
257702
+ "step": 124449
257703
+ },
257704
+ {
257705
+ "epoch": 995.01,
257706
+ "learning_rate": 8.022419871794871e-06,
257707
+ "loss": 0.6121,
257708
+ "step": 124450
257709
+ },
257710
+ {
257711
+ "epoch": 995.05,
257712
+ "learning_rate": 8.022339743589744e-06,
257713
+ "loss": 0.2873,
257714
+ "step": 124455
257715
+ },
257716
+ {
257717
+ "epoch": 995.09,
257718
+ "learning_rate": 8.022259615384616e-06,
257719
+ "loss": 0.299,
257720
+ "step": 124460
257721
+ },
257722
+ {
257723
+ "epoch": 995.13,
257724
+ "learning_rate": 8.022179487179487e-06,
257725
+ "loss": 0.2944,
257726
+ "step": 124465
257727
+ },
257728
+ {
257729
+ "epoch": 995.17,
257730
+ "learning_rate": 8.02209935897436e-06,
257731
+ "loss": 0.5003,
257732
+ "step": 124470
257733
+ },
257734
+ {
257735
+ "epoch": 995.21,
257736
+ "learning_rate": 8.022019230769231e-06,
257737
+ "loss": 1.2021,
257738
+ "step": 124475
257739
+ },
257740
+ {
257741
+ "epoch": 995.25,
257742
+ "learning_rate": 8.021939102564103e-06,
257743
+ "loss": 0.2656,
257744
+ "step": 124480
257745
+ },
257746
+ {
257747
+ "epoch": 995.29,
257748
+ "learning_rate": 8.021858974358974e-06,
257749
+ "loss": 0.2815,
257750
+ "step": 124485
257751
+ },
257752
+ {
257753
+ "epoch": 995.33,
257754
+ "learning_rate": 8.021778846153847e-06,
257755
+ "loss": 0.2868,
257756
+ "step": 124490
257757
+ },
257758
+ {
257759
+ "epoch": 995.37,
257760
+ "learning_rate": 8.021698717948719e-06,
257761
+ "loss": 0.4953,
257762
+ "step": 124495
257763
+ },
257764
+ {
257765
+ "epoch": 995.41,
257766
+ "learning_rate": 8.02161858974359e-06,
257767
+ "loss": 1.0536,
257768
+ "step": 124500
257769
+ },
257770
+ {
257771
+ "epoch": 995.45,
257772
+ "learning_rate": 8.021538461538461e-06,
257773
+ "loss": 0.2525,
257774
+ "step": 124505
257775
+ },
257776
+ {
257777
+ "epoch": 995.49,
257778
+ "learning_rate": 8.021458333333334e-06,
257779
+ "loss": 0.281,
257780
+ "step": 124510
257781
+ },
257782
+ {
257783
+ "epoch": 995.53,
257784
+ "learning_rate": 8.021378205128206e-06,
257785
+ "loss": 0.3611,
257786
+ "step": 124515
257787
+ },
257788
+ {
257789
+ "epoch": 995.57,
257790
+ "learning_rate": 8.021298076923077e-06,
257791
+ "loss": 0.4902,
257792
+ "step": 124520
257793
+ },
257794
+ {
257795
+ "epoch": 995.61,
257796
+ "learning_rate": 8.02121794871795e-06,
257797
+ "loss": 1.1837,
257798
+ "step": 124525
257799
+ },
257800
+ {
257801
+ "epoch": 995.65,
257802
+ "learning_rate": 8.021137820512821e-06,
257803
+ "loss": 0.3066,
257804
+ "step": 124530
257805
+ },
257806
+ {
257807
+ "epoch": 995.69,
257808
+ "learning_rate": 8.021057692307693e-06,
257809
+ "loss": 0.3343,
257810
+ "step": 124535
257811
+ },
257812
+ {
257813
+ "epoch": 995.73,
257814
+ "learning_rate": 8.020977564102564e-06,
257815
+ "loss": 0.3023,
257816
+ "step": 124540
257817
+ },
257818
+ {
257819
+ "epoch": 995.77,
257820
+ "learning_rate": 8.020897435897437e-06,
257821
+ "loss": 0.519,
257822
+ "step": 124545
257823
+ },
257824
+ {
257825
+ "epoch": 995.81,
257826
+ "learning_rate": 8.020817307692309e-06,
257827
+ "loss": 1.0876,
257828
+ "step": 124550
257829
+ },
257830
+ {
257831
+ "epoch": 995.85,
257832
+ "learning_rate": 8.02073717948718e-06,
257833
+ "loss": 0.2318,
257834
+ "step": 124555
257835
+ },
257836
+ {
257837
+ "epoch": 995.89,
257838
+ "learning_rate": 8.020657051282051e-06,
257839
+ "loss": 0.3073,
257840
+ "step": 124560
257841
+ },
257842
+ {
257843
+ "epoch": 995.93,
257844
+ "learning_rate": 8.020576923076924e-06,
257845
+ "loss": 0.3889,
257846
+ "step": 124565
257847
+ },
257848
+ {
257849
+ "epoch": 995.97,
257850
+ "learning_rate": 8.020496794871796e-06,
257851
+ "loss": 0.4879,
257852
+ "step": 124570
257853
+ },
257854
+ {
257855
+ "epoch": 996.0,
257856
+ "eval_loss": 0.44444289803504944,
257857
+ "eval_runtime": 36.0337,
257858
+ "eval_samples_per_second": 23.284,
257859
+ "eval_steps_per_second": 0.749,
257860
+ "eval_wer": 0.17907788254510357,
257861
+ "step": 124574
257862
+ },
257863
+ {
257864
+ "epoch": 1004.01,
257865
+ "learning_rate": 8.020416666666667e-06,
257866
+ "loss": 0.2902,
257867
+ "step": 124575
257868
+ },
257869
+ {
257870
+ "epoch": 1004.05,
257871
+ "learning_rate": 8.02033653846154e-06,
257872
+ "loss": 0.3478,
257873
+ "step": 124580
257874
+ },
257875
+ {
257876
+ "epoch": 1004.09,
257877
+ "learning_rate": 8.020256410256411e-06,
257878
+ "loss": 0.3836,
257879
+ "step": 124585
257880
+ },
257881
+ {
257882
+ "epoch": 1004.13,
257883
+ "learning_rate": 8.020176282051283e-06,
257884
+ "loss": 0.3269,
257885
+ "step": 124590
257886
+ },
257887
+ {
257888
+ "epoch": 1004.17,
257889
+ "learning_rate": 8.020096153846154e-06,
257890
+ "loss": 0.5404,
257891
+ "step": 124595
257892
+ },
257893
+ {
257894
+ "epoch": 1004.21,
257895
+ "learning_rate": 8.020016025641027e-06,
257896
+ "loss": 1.1555,
257897
+ "step": 124600
257898
+ },
257899
+ {
257900
+ "epoch": 1004.25,
257901
+ "learning_rate": 8.019935897435897e-06,
257902
+ "loss": 0.3135,
257903
+ "step": 124605
257904
+ },
257905
+ {
257906
+ "epoch": 1004.29,
257907
+ "learning_rate": 8.01985576923077e-06,
257908
+ "loss": 0.2687,
257909
+ "step": 124610
257910
+ },
257911
+ {
257912
+ "epoch": 1004.33,
257913
+ "learning_rate": 8.019775641025643e-06,
257914
+ "loss": 0.3204,
257915
+ "step": 124615
257916
+ },
257917
+ {
257918
+ "epoch": 1004.37,
257919
+ "learning_rate": 8.019695512820513e-06,
257920
+ "loss": 0.6159,
257921
+ "step": 124620
257922
+ },
257923
+ {
257924
+ "epoch": 1004.41,
257925
+ "learning_rate": 8.019615384615386e-06,
257926
+ "loss": 1.1154,
257927
+ "step": 124625
257928
+ },
257929
+ {
257930
+ "epoch": 1004.45,
257931
+ "learning_rate": 8.019535256410257e-06,
257932
+ "loss": 0.3203,
257933
+ "step": 124630
257934
+ },
257935
+ {
257936
+ "epoch": 1004.49,
257937
+ "learning_rate": 8.019455128205128e-06,
257938
+ "loss": 0.2448,
257939
+ "step": 124635
257940
+ },
257941
+ {
257942
+ "epoch": 1004.53,
257943
+ "learning_rate": 8.019375e-06,
257944
+ "loss": 0.3757,
257945
+ "step": 124640
257946
+ },
257947
+ {
257948
+ "epoch": 1004.57,
257949
+ "learning_rate": 8.019294871794873e-06,
257950
+ "loss": 0.5318,
257951
+ "step": 124645
257952
+ },
257953
+ {
257954
+ "epoch": 1004.61,
257955
+ "learning_rate": 8.019214743589744e-06,
257956
+ "loss": 1.0301,
257957
+ "step": 124650
257958
+ },
257959
+ {
257960
+ "epoch": 1004.65,
257961
+ "learning_rate": 8.019134615384616e-06,
257962
+ "loss": 0.3138,
257963
+ "step": 124655
257964
+ },
257965
+ {
257966
+ "epoch": 1004.69,
257967
+ "learning_rate": 8.019054487179487e-06,
257968
+ "loss": 0.2722,
257969
+ "step": 124660
257970
+ },
257971
+ {
257972
+ "epoch": 1004.73,
257973
+ "learning_rate": 8.01897435897436e-06,
257974
+ "loss": 0.3035,
257975
+ "step": 124665
257976
+ },
257977
+ {
257978
+ "epoch": 1004.77,
257979
+ "learning_rate": 8.018894230769231e-06,
257980
+ "loss": 0.4899,
257981
+ "step": 124670
257982
+ },
257983
+ {
257984
+ "epoch": 1004.81,
257985
+ "learning_rate": 8.018814102564103e-06,
257986
+ "loss": 1.1714,
257987
+ "step": 124675
257988
+ },
257989
+ {
257990
+ "epoch": 1004.85,
257991
+ "learning_rate": 8.018733974358976e-06,
257992
+ "loss": 0.2925,
257993
+ "step": 124680
257994
+ },
257995
+ {
257996
+ "epoch": 1004.9,
257997
+ "learning_rate": 8.018653846153847e-06,
257998
+ "loss": 0.2628,
257999
+ "step": 124685
258000
+ },
258001
+ {
258002
+ "epoch": 1004.94,
258003
+ "learning_rate": 8.018573717948718e-06,
258004
+ "loss": 0.3557,
258005
+ "step": 124690
258006
+ },
258007
+ {
258008
+ "epoch": 1004.98,
258009
+ "learning_rate": 8.01849358974359e-06,
258010
+ "loss": 0.6265,
258011
+ "step": 124695
258012
+ },
258013
+ {
258014
+ "epoch": 1005.0,
258015
+ "eval_loss": 0.33506569266319275,
258016
+ "eval_runtime": 36.6106,
258017
+ "eval_samples_per_second": 22.917,
258018
+ "eval_steps_per_second": 0.737,
258019
+ "eval_wer": 0.18211167363709738,
258020
+ "step": 124698
258021
+ },
258022
+ {
258023
+ "epoch": 997.02,
258024
+ "learning_rate": 8.018413461538463e-06,
258025
+ "loss": 0.394,
258026
+ "step": 124700
258027
+ },
258028
+ {
258029
+ "epoch": 997.06,
258030
+ "learning_rate": 8.018333333333334e-06,
258031
+ "loss": 0.3194,
258032
+ "step": 124705
258033
+ },
258034
+ {
258035
+ "epoch": 997.1,
258036
+ "learning_rate": 8.018253205128206e-06,
258037
+ "loss": 0.2794,
258038
+ "step": 124710
258039
+ },
258040
+ {
258041
+ "epoch": 997.14,
258042
+ "learning_rate": 8.018173076923079e-06,
258043
+ "loss": 0.3031,
258044
+ "step": 124715
258045
+ },
258046
+ {
258047
+ "epoch": 997.18,
258048
+ "learning_rate": 8.01809294871795e-06,
258049
+ "loss": 0.5738,
258050
+ "step": 124720
258051
+ },
258052
+ {
258053
+ "epoch": 997.22,
258054
+ "learning_rate": 8.018012820512821e-06,
258055
+ "loss": 0.9485,
258056
+ "step": 124725
258057
+ },
258058
+ {
258059
+ "epoch": 997.26,
258060
+ "learning_rate": 8.017932692307693e-06,
258061
+ "loss": 0.3145,
258062
+ "step": 124730
258063
+ },
258064
+ {
258065
+ "epoch": 997.3,
258066
+ "learning_rate": 8.017852564102566e-06,
258067
+ "loss": 0.2922,
258068
+ "step": 124735
258069
+ },
258070
+ {
258071
+ "epoch": 997.34,
258072
+ "learning_rate": 8.017772435897435e-06,
258073
+ "loss": 0.3783,
258074
+ "step": 124740
258075
+ },
258076
+ {
258077
+ "epoch": 997.38,
258078
+ "learning_rate": 8.017692307692308e-06,
258079
+ "loss": 0.6634,
258080
+ "step": 124745
258081
+ },
258082
+ {
258083
+ "epoch": 997.42,
258084
+ "learning_rate": 8.01761217948718e-06,
258085
+ "loss": 0.9275,
258086
+ "step": 124750
258087
+ },
258088
+ {
258089
+ "epoch": 997.46,
258090
+ "learning_rate": 8.017532051282053e-06,
258091
+ "loss": 0.3074,
258092
+ "step": 124755
258093
+ },
258094
+ {
258095
+ "epoch": 997.5,
258096
+ "learning_rate": 8.017451923076923e-06,
258097
+ "loss": 0.2433,
258098
+ "step": 124760
258099
+ },
258100
+ {
258101
+ "epoch": 997.54,
258102
+ "learning_rate": 8.017371794871796e-06,
258103
+ "loss": 0.3525,
258104
+ "step": 124765
258105
+ },
258106
+ {
258107
+ "epoch": 997.58,
258108
+ "learning_rate": 8.017291666666669e-06,
258109
+ "loss": 0.5824,
258110
+ "step": 124770
258111
+ },
258112
+ {
258113
+ "epoch": 997.62,
258114
+ "learning_rate": 8.017211538461538e-06,
258115
+ "loss": 0.9745,
258116
+ "step": 124775
258117
+ },
258118
+ {
258119
+ "epoch": 997.66,
258120
+ "learning_rate": 8.017131410256411e-06,
258121
+ "loss": 0.2424,
258122
+ "step": 124780
258123
+ },
258124
+ {
258125
+ "epoch": 997.7,
258126
+ "learning_rate": 8.017051282051283e-06,
258127
+ "loss": 0.3636,
258128
+ "step": 124785
258129
+ },
258130
+ {
258131
+ "epoch": 997.74,
258132
+ "learning_rate": 8.016971153846154e-06,
258133
+ "loss": 0.3245,
258134
+ "step": 124790
258135
+ },
258136
+ {
258137
+ "epoch": 997.78,
258138
+ "learning_rate": 8.016891025641025e-06,
258139
+ "loss": 0.6432,
258140
+ "step": 124795
258141
+ },
258142
+ {
258143
+ "epoch": 997.82,
258144
+ "learning_rate": 8.016810897435898e-06,
258145
+ "loss": 1.0522,
258146
+ "step": 124800
258147
+ },
258148
+ {
258149
+ "epoch": 997.86,
258150
+ "learning_rate": 8.01673076923077e-06,
258151
+ "loss": 0.3268,
258152
+ "step": 124805
258153
+ },
258154
+ {
258155
+ "epoch": 997.9,
258156
+ "learning_rate": 8.016650641025641e-06,
258157
+ "loss": 0.3003,
258158
+ "step": 124810
258159
+ },
258160
+ {
258161
+ "epoch": 997.94,
258162
+ "learning_rate": 8.016570512820514e-06,
258163
+ "loss": 0.3971,
258164
+ "step": 124815
258165
+ },
258166
+ {
258167
+ "epoch": 997.98,
258168
+ "learning_rate": 8.016490384615386e-06,
258169
+ "loss": 0.8373,
258170
+ "step": 124820
258171
+ },
258172
+ {
258173
+ "epoch": 998.0,
258174
+ "eval_loss": 0.33471718430519104,
258175
+ "eval_runtime": 36.9243,
258176
+ "eval_samples_per_second": 22.722,
258177
+ "eval_steps_per_second": 0.731,
258178
+ "eval_wer": 0.17628693821244465,
258179
+ "step": 124823
258180
+ },
258181
+ {
258182
+ "epoch": 1006.02,
258183
+ "learning_rate": 8.016410256410257e-06,
258184
+ "loss": 0.3007,
258185
+ "step": 124825
258186
+ },
258187
+ {
258188
+ "epoch": 1006.06,
258189
+ "learning_rate": 8.016330128205128e-06,
258190
+ "loss": 0.3029,
258191
+ "step": 124830
258192
+ },
258193
+ {
258194
+ "epoch": 1006.1,
258195
+ "learning_rate": 8.016250000000001e-06,
258196
+ "loss": 0.2823,
258197
+ "step": 124835
258198
+ },
258199
+ {
258200
+ "epoch": 1006.14,
258201
+ "learning_rate": 8.016169871794873e-06,
258202
+ "loss": 0.3448,
258203
+ "step": 124840
258204
+ },
258205
+ {
258206
+ "epoch": 1006.18,
258207
+ "learning_rate": 8.016089743589744e-06,
258208
+ "loss": 0.6987,
258209
+ "step": 124845
258210
+ },
258211
+ {
258212
+ "epoch": 1006.22,
258213
+ "learning_rate": 8.016009615384615e-06,
258214
+ "loss": 1.0145,
258215
+ "step": 124850
258216
+ },
258217
+ {
258218
+ "epoch": 1006.26,
258219
+ "learning_rate": 8.015929487179489e-06,
258220
+ "loss": 0.2459,
258221
+ "step": 124855
258222
+ },
258223
+ {
258224
+ "epoch": 1006.3,
258225
+ "learning_rate": 8.01584935897436e-06,
258226
+ "loss": 0.3105,
258227
+ "step": 124860
258228
+ },
258229
+ {
258230
+ "epoch": 1006.34,
258231
+ "learning_rate": 8.015769230769231e-06,
258232
+ "loss": 0.3416,
258233
+ "step": 124865
258234
+ },
258235
+ {
258236
+ "epoch": 1006.38,
258237
+ "learning_rate": 8.015689102564104e-06,
258238
+ "loss": 0.547,
258239
+ "step": 124870
258240
+ },
258241
+ {
258242
+ "epoch": 1006.42,
258243
+ "learning_rate": 8.015608974358976e-06,
258244
+ "loss": 0.9755,
258245
+ "step": 124875
258246
+ },
258247
+ {
258248
+ "epoch": 1006.46,
258249
+ "learning_rate": 8.015528846153847e-06,
258250
+ "loss": 0.2603,
258251
+ "step": 124880
258252
+ },
258253
+ {
258254
+ "epoch": 1006.5,
258255
+ "learning_rate": 8.015448717948718e-06,
258256
+ "loss": 0.3211,
258257
+ "step": 124885
258258
+ },
258259
+ {
258260
+ "epoch": 1006.54,
258261
+ "learning_rate": 8.015368589743591e-06,
258262
+ "loss": 0.2985,
258263
+ "step": 124890
258264
+ },
258265
+ {
258266
+ "epoch": 1006.58,
258267
+ "learning_rate": 8.015288461538461e-06,
258268
+ "loss": 0.577,
258269
+ "step": 124895
258270
+ },
258271
+ {
258272
+ "epoch": 1006.62,
258273
+ "learning_rate": 8.015208333333334e-06,
258274
+ "loss": 1.0838,
258275
+ "step": 124900
258276
+ },
258277
+ {
258278
+ "epoch": 1006.66,
258279
+ "learning_rate": 8.015128205128205e-06,
258280
+ "loss": 0.2679,
258281
+ "step": 124905
258282
+ },
258283
+ {
258284
+ "epoch": 1006.7,
258285
+ "learning_rate": 8.015048076923077e-06,
258286
+ "loss": 0.2726,
258287
+ "step": 124910
258288
+ },
258289
+ {
258290
+ "epoch": 1006.74,
258291
+ "learning_rate": 8.01496794871795e-06,
258292
+ "loss": 0.3246,
258293
+ "step": 124915
258294
+ },
258295
+ {
258296
+ "epoch": 1006.78,
258297
+ "learning_rate": 8.014887820512821e-06,
258298
+ "loss": 0.5986,
258299
+ "step": 124920
258300
+ },
258301
+ {
258302
+ "epoch": 1006.82,
258303
+ "learning_rate": 8.014807692307693e-06,
258304
+ "loss": 1.0309,
258305
+ "step": 124925
258306
+ },
258307
+ {
258308
+ "epoch": 1006.86,
258309
+ "learning_rate": 8.014727564102564e-06,
258310
+ "loss": 0.2936,
258311
+ "step": 124930
258312
+ },
258313
+ {
258314
+ "epoch": 1006.9,
258315
+ "learning_rate": 8.014647435897437e-06,
258316
+ "loss": 0.3178,
258317
+ "step": 124935
258318
+ },
258319
+ {
258320
+ "epoch": 1006.94,
258321
+ "learning_rate": 8.014567307692308e-06,
258322
+ "loss": 0.3584,
258323
+ "step": 124940
258324
+ },
258325
+ {
258326
+ "epoch": 1006.98,
258327
+ "learning_rate": 8.01448717948718e-06,
258328
+ "loss": 0.716,
258329
+ "step": 124945
258330
+ },
258331
+ {
258332
+ "epoch": 1007.0,
258333
+ "eval_loss": 0.3555839955806732,
258334
+ "eval_runtime": 36.5503,
258335
+ "eval_samples_per_second": 22.955,
258336
+ "eval_steps_per_second": 0.739,
258337
+ "eval_wer": 0.17162025681719809,
258338
+ "step": 124947
258339
+ },
258340
+ {
258341
+ "epoch": 999.02,
258342
+ "learning_rate": 8.014407051282051e-06,
258343
+ "loss": 0.3558,
258344
+ "step": 124950
258345
+ },
258346
+ {
258347
+ "epoch": 999.06,
258348
+ "learning_rate": 8.014326923076924e-06,
258349
+ "loss": 0.2697,
258350
+ "step": 124955
258351
+ },
258352
+ {
258353
+ "epoch": 999.1,
258354
+ "learning_rate": 8.014246794871796e-06,
258355
+ "loss": 0.3866,
258356
+ "step": 124960
258357
+ },
258358
+ {
258359
+ "epoch": 999.14,
258360
+ "learning_rate": 8.014166666666667e-06,
258361
+ "loss": 0.4439,
258362
+ "step": 124965
258363
+ },
258364
+ {
258365
+ "epoch": 999.18,
258366
+ "learning_rate": 8.01408653846154e-06,
258367
+ "loss": 0.7156,
258368
+ "step": 124970
258369
+ },
258370
+ {
258371
+ "epoch": 999.22,
258372
+ "learning_rate": 8.014006410256411e-06,
258373
+ "loss": 0.7368,
258374
+ "step": 124975
258375
+ },
258376
+ {
258377
+ "epoch": 999.26,
258378
+ "learning_rate": 8.013926282051283e-06,
258379
+ "loss": 0.2736,
258380
+ "step": 124980
258381
+ },
258382
+ {
258383
+ "epoch": 999.3,
258384
+ "learning_rate": 8.013846153846154e-06,
258385
+ "loss": 0.2855,
258386
+ "step": 124985
258387
+ },
258388
+ {
258389
+ "epoch": 999.34,
258390
+ "learning_rate": 8.013766025641027e-06,
258391
+ "loss": 0.3776,
258392
+ "step": 124990
258393
+ },
258394
+ {
258395
+ "epoch": 999.38,
258396
+ "learning_rate": 8.013685897435898e-06,
258397
+ "loss": 0.7372,
258398
+ "step": 124995
258399
+ },
258400
+ {
258401
+ "epoch": 999.42,
258402
+ "learning_rate": 8.01360576923077e-06,
258403
+ "loss": 0.8669,
258404
+ "step": 125000
258405
+ },
258406
+ {
258407
+ "epoch": 999.46,
258408
+ "learning_rate": 8.013525641025641e-06,
258409
+ "loss": 0.2687,
258410
+ "step": 125005
258411
+ },
258412
+ {
258413
+ "epoch": 999.5,
258414
+ "learning_rate": 8.013445512820514e-06,
258415
+ "loss": 0.2684,
258416
+ "step": 125010
258417
+ },
258418
+ {
258419
+ "epoch": 999.54,
258420
+ "learning_rate": 8.013365384615386e-06,
258421
+ "loss": 0.3246,
258422
+ "step": 125015
258423
+ },
258424
+ {
258425
+ "epoch": 999.58,
258426
+ "learning_rate": 8.013285256410257e-06,
258427
+ "loss": 0.6847,
258428
+ "step": 125020
258429
+ },
258430
+ {
258431
+ "epoch": 999.62,
258432
+ "learning_rate": 8.01320512820513e-06,
258433
+ "loss": 0.8567,
258434
+ "step": 125025
258435
+ },
258436
+ {
258437
+ "epoch": 999.66,
258438
+ "learning_rate": 8.013125000000001e-06,
258439
+ "loss": 0.2985,
258440
+ "step": 125030
258441
+ },
258442
+ {
258443
+ "epoch": 999.7,
258444
+ "learning_rate": 8.013044871794873e-06,
258445
+ "loss": 0.3044,
258446
+ "step": 125035
258447
+ },
258448
+ {
258449
+ "epoch": 999.74,
258450
+ "learning_rate": 8.012964743589744e-06,
258451
+ "loss": 0.3877,
258452
+ "step": 125040
258453
+ },
258454
+ {
258455
+ "epoch": 999.78,
258456
+ "learning_rate": 8.012884615384617e-06,
258457
+ "loss": 0.839,
258458
+ "step": 125045
258459
+ },
258460
+ {
258461
+ "epoch": 999.82,
258462
+ "learning_rate": 8.012804487179487e-06,
258463
+ "loss": 0.8549,
258464
+ "step": 125050
258465
+ },
258466
+ {
258467
+ "epoch": 999.86,
258468
+ "learning_rate": 8.01272435897436e-06,
258469
+ "loss": 0.2613,
258470
+ "step": 125055
258471
+ },
258472
+ {
258473
+ "epoch": 999.9,
258474
+ "learning_rate": 8.012644230769231e-06,
258475
+ "loss": 0.3373,
258476
+ "step": 125060
258477
+ },
258478
+ {
258479
+ "epoch": 999.94,
258480
+ "learning_rate": 8.012564102564103e-06,
258481
+ "loss": 0.5089,
258482
+ "step": 125065
258483
+ },
258484
+ {
258485
+ "epoch": 999.98,
258486
+ "learning_rate": 8.012483974358976e-06,
258487
+ "loss": 0.7532,
258488
+ "step": 125070
258489
+ },
258490
+ {
258491
+ "epoch": 1000.0,
258492
+ "eval_loss": 0.37426483631134033,
258493
+ "eval_runtime": 36.4808,
258494
+ "eval_samples_per_second": 22.998,
258495
+ "eval_steps_per_second": 0.74,
258496
+ "eval_wer": 0.17843594981032973,
258497
+ "step": 125072
258498
+ },
258499
+ {
258500
+ "epoch": 1000.02,
258501
+ "learning_rate": 8.012403846153847e-06,
258502
+ "loss": 0.311,
258503
+ "step": 125075
258504
+ },
258505
+ {
258506
+ "epoch": 1000.06,
258507
+ "learning_rate": 8.012323717948718e-06,
258508
+ "loss": 0.2755,
258509
+ "step": 125080
258510
+ },
258511
+ {
258512
+ "epoch": 1000.1,
258513
+ "learning_rate": 8.01224358974359e-06,
258514
+ "loss": 0.2835,
258515
+ "step": 125085
258516
+ },
258517
+ {
258518
+ "epoch": 1000.14,
258519
+ "learning_rate": 8.012163461538463e-06,
258520
+ "loss": 0.3655,
258521
+ "step": 125090
258522
+ },
258523
+ {
258524
+ "epoch": 1000.18,
258525
+ "learning_rate": 8.012083333333334e-06,
258526
+ "loss": 0.728,
258527
+ "step": 125095
258528
+ },
258529
+ {
258530
+ "epoch": 1000.22,
258531
+ "learning_rate": 8.012003205128205e-06,
258532
+ "loss": 0.8118,
258533
+ "step": 125100
258534
+ },
258535
+ {
258536
+ "epoch": 1000.26,
258537
+ "learning_rate": 8.011923076923077e-06,
258538
+ "loss": 0.3486,
258539
+ "step": 125105
258540
+ },
258541
+ {
258542
+ "epoch": 1000.3,
258543
+ "learning_rate": 8.01184294871795e-06,
258544
+ "loss": 0.2872,
258545
+ "step": 125110
258546
+ },
258547
+ {
258548
+ "epoch": 1000.34,
258549
+ "learning_rate": 8.011762820512821e-06,
258550
+ "loss": 0.3816,
258551
+ "step": 125115
258552
+ },
258553
+ {
258554
+ "epoch": 1000.38,
258555
+ "learning_rate": 8.011682692307693e-06,
258556
+ "loss": 0.736,
258557
+ "step": 125120
258558
+ },
258559
+ {
258560
+ "epoch": 1000.42,
258561
+ "learning_rate": 8.011602564102566e-06,
258562
+ "loss": 0.7688,
258563
+ "step": 125125
258564
+ },
258565
+ {
258566
+ "epoch": 1000.46,
258567
+ "learning_rate": 8.011522435897437e-06,
258568
+ "loss": 0.2619,
258569
+ "step": 125130
258570
+ },
258571
+ {
258572
+ "epoch": 1000.5,
258573
+ "learning_rate": 8.011442307692308e-06,
258574
+ "loss": 0.2839,
258575
+ "step": 125135
258576
+ },
258577
+ {
258578
+ "epoch": 1000.54,
258579
+ "learning_rate": 8.01136217948718e-06,
258580
+ "loss": 0.3585,
258581
+ "step": 125140
258582
+ },
258583
+ {
258584
+ "epoch": 1000.58,
258585
+ "learning_rate": 8.011282051282053e-06,
258586
+ "loss": 0.8002,
258587
+ "step": 125145
258588
+ },
258589
+ {
258590
+ "epoch": 1000.62,
258591
+ "learning_rate": 8.011201923076924e-06,
258592
+ "loss": 0.9264,
258593
+ "step": 125150
258594
+ },
258595
+ {
258596
+ "epoch": 1000.66,
258597
+ "learning_rate": 8.011121794871795e-06,
258598
+ "loss": 0.3188,
258599
+ "step": 125155
258600
+ },
258601
+ {
258602
+ "epoch": 1000.7,
258603
+ "learning_rate": 8.011041666666667e-06,
258604
+ "loss": 0.3042,
258605
+ "step": 125160
258606
+ },
258607
+ {
258608
+ "epoch": 1000.74,
258609
+ "learning_rate": 8.01096153846154e-06,
258610
+ "loss": 0.4207,
258611
+ "step": 125165
258612
+ },
258613
+ {
258614
+ "epoch": 1000.78,
258615
+ "learning_rate": 8.010881410256411e-06,
258616
+ "loss": 0.7485,
258617
+ "step": 125170
258618
+ },
258619
+ {
258620
+ "epoch": 1000.82,
258621
+ "learning_rate": 8.010801282051283e-06,
258622
+ "loss": 0.8247,
258623
+ "step": 125175
258624
+ },
258625
+ {
258626
+ "epoch": 1000.86,
258627
+ "learning_rate": 8.010721153846156e-06,
258628
+ "loss": 0.2523,
258629
+ "step": 125180
258630
+ },
258631
+ {
258632
+ "epoch": 1000.9,
258633
+ "learning_rate": 8.010641025641025e-06,
258634
+ "loss": 0.2924,
258635
+ "step": 125185
258636
+ },
258637
+ {
258638
+ "epoch": 1000.94,
258639
+ "learning_rate": 8.010560897435898e-06,
258640
+ "loss": 0.3141,
258641
+ "step": 125190
258642
+ },
258643
+ {
258644
+ "epoch": 1000.98,
258645
+ "learning_rate": 8.01048076923077e-06,
258646
+ "loss": 0.7975,
258647
+ "step": 125195
258648
+ },
258649
+ {
258650
+ "epoch": 1001.0,
258651
+ "eval_loss": 0.36214983463287354,
258652
+ "eval_runtime": 36.9288,
258653
+ "eval_samples_per_second": 22.746,
258654
+ "eval_steps_per_second": 0.731,
258655
+ "eval_wer": 0.17658487443333093,
258656
+ "step": 125197
258657
+ },
258658
+ {
258659
+ "epoch": 1001.02,
258660
+ "learning_rate": 8.010400641025641e-06,
258661
+ "loss": 0.3073,
258662
+ "step": 125200
258663
+ },
258664
+ {
258665
+ "epoch": 1001.06,
258666
+ "learning_rate": 8.010320512820512e-06,
258667
+ "loss": 0.2778,
258668
+ "step": 125205
258669
+ },
258670
+ {
258671
+ "epoch": 1001.1,
258672
+ "learning_rate": 8.010240384615385e-06,
258673
+ "loss": 0.2989,
258674
+ "step": 125210
258675
+ },
258676
+ {
258677
+ "epoch": 1001.14,
258678
+ "learning_rate": 8.010160256410257e-06,
258679
+ "loss": 0.3686,
258680
+ "step": 125215
258681
+ },
258682
+ {
258683
+ "epoch": 1001.18,
258684
+ "learning_rate": 8.010080128205128e-06,
258685
+ "loss": 0.9013,
258686
+ "step": 125220
258687
+ },
258688
+ {
258689
+ "epoch": 1001.22,
258690
+ "learning_rate": 8.010000000000001e-06,
258691
+ "loss": 0.9577,
258692
+ "step": 125225
258693
+ },
258694
+ {
258695
+ "epoch": 1001.26,
258696
+ "learning_rate": 8.009919871794873e-06,
258697
+ "loss": 0.2616,
258698
+ "step": 125230
258699
+ },
258700
+ {
258701
+ "epoch": 1001.3,
258702
+ "learning_rate": 8.009839743589744e-06,
258703
+ "loss": 0.2642,
258704
+ "step": 125235
258705
+ },
258706
+ {
258707
+ "epoch": 1001.34,
258708
+ "learning_rate": 8.009759615384615e-06,
258709
+ "loss": 0.4073,
258710
+ "step": 125240
258711
+ },
258712
+ {
258713
+ "epoch": 1001.38,
258714
+ "learning_rate": 8.009679487179488e-06,
258715
+ "loss": 0.6622,
258716
+ "step": 125245
258717
+ },
258718
+ {
258719
+ "epoch": 1001.42,
258720
+ "learning_rate": 8.00959935897436e-06,
258721
+ "loss": 0.7739,
258722
+ "step": 125250
258723
+ },
258724
+ {
258725
+ "epoch": 1001.46,
258726
+ "learning_rate": 8.009519230769231e-06,
258727
+ "loss": 0.3028,
258728
+ "step": 125255
258729
+ },
258730
+ {
258731
+ "epoch": 1001.5,
258732
+ "learning_rate": 8.009439102564102e-06,
258733
+ "loss": 0.3526,
258734
+ "step": 125260
258735
+ },
258736
+ {
258737
+ "epoch": 1001.54,
258738
+ "learning_rate": 8.009358974358975e-06,
258739
+ "loss": 0.3953,
258740
+ "step": 125265
258741
+ },
258742
+ {
258743
+ "epoch": 1001.58,
258744
+ "learning_rate": 8.009278846153847e-06,
258745
+ "loss": 0.7862,
258746
+ "step": 125270
258747
+ },
258748
+ {
258749
+ "epoch": 1001.62,
258750
+ "learning_rate": 8.009198717948718e-06,
258751
+ "loss": 0.7973,
258752
+ "step": 125275
258753
+ },
258754
+ {
258755
+ "epoch": 1001.66,
258756
+ "learning_rate": 8.009118589743591e-06,
258757
+ "loss": 0.3049,
258758
+ "step": 125280
258759
+ },
258760
+ {
258761
+ "epoch": 1001.7,
258762
+ "learning_rate": 8.009038461538463e-06,
258763
+ "loss": 0.2909,
258764
+ "step": 125285
258765
+ },
258766
+ {
258767
+ "epoch": 1001.74,
258768
+ "learning_rate": 8.008958333333334e-06,
258769
+ "loss": 0.3441,
258770
+ "step": 125290
258771
+ },
258772
+ {
258773
+ "epoch": 1001.78,
258774
+ "learning_rate": 8.008878205128205e-06,
258775
+ "loss": 0.7919,
258776
+ "step": 125295
258777
+ },
258778
+ {
258779
+ "epoch": 1001.82,
258780
+ "learning_rate": 8.008798076923078e-06,
258781
+ "loss": 0.8206,
258782
+ "step": 125300
258783
+ },
258784
+ {
258785
+ "epoch": 1001.86,
258786
+ "learning_rate": 8.00871794871795e-06,
258787
+ "loss": 0.2665,
258788
+ "step": 125305
258789
+ },
258790
+ {
258791
+ "epoch": 1001.9,
258792
+ "learning_rate": 8.008637820512821e-06,
258793
+ "loss": 0.2716,
258794
+ "step": 125310
258795
+ },
258796
+ {
258797
+ "epoch": 1001.94,
258798
+ "learning_rate": 8.008557692307692e-06,
258799
+ "loss": 0.3289,
258800
+ "step": 125315
258801
+ },
258802
+ {
258803
+ "epoch": 1001.98,
258804
+ "learning_rate": 8.008477564102566e-06,
258805
+ "loss": 0.67,
258806
+ "step": 125320
258807
+ },
258808
+ {
258809
+ "epoch": 1002.0,
258810
+ "eval_loss": 0.3591495454311371,
258811
+ "eval_runtime": 36.9591,
258812
+ "eval_samples_per_second": 22.728,
258813
+ "eval_steps_per_second": 0.731,
258814
+ "eval_wer": 0.18127389453942147,
258815
+ "step": 125322
258816
+ },
258817
+ {
258818
+ "epoch": 1010.02,
258819
+ "learning_rate": 8.008397435897437e-06,
258820
+ "loss": 0.4208,
258821
+ "step": 125325
258822
+ },
258823
+ {
258824
+ "epoch": 1010.06,
258825
+ "learning_rate": 8.008317307692308e-06,
258826
+ "loss": 0.2601,
258827
+ "step": 125330
258828
+ },
258829
+ {
258830
+ "epoch": 1010.1,
258831
+ "learning_rate": 8.008237179487181e-06,
258832
+ "loss": 0.2873,
258833
+ "step": 125335
258834
+ },
258835
+ {
258836
+ "epoch": 1010.14,
258837
+ "learning_rate": 8.008157051282051e-06,
258838
+ "loss": 0.3757,
258839
+ "step": 125340
258840
+ },
258841
+ {
258842
+ "epoch": 1010.18,
258843
+ "learning_rate": 8.008076923076924e-06,
258844
+ "loss": 0.62,
258845
+ "step": 125345
258846
+ },
258847
+ {
258848
+ "epoch": 1010.22,
258849
+ "learning_rate": 8.007996794871795e-06,
258850
+ "loss": 0.8827,
258851
+ "step": 125350
258852
+ },
258853
+ {
258854
+ "epoch": 1010.27,
258855
+ "learning_rate": 8.007916666666667e-06,
258856
+ "loss": 0.3183,
258857
+ "step": 125355
258858
+ },
258859
+ {
258860
+ "epoch": 1010.31,
258861
+ "learning_rate": 8.007836538461538e-06,
258862
+ "loss": 0.299,
258863
+ "step": 125360
258864
+ },
258865
+ {
258866
+ "epoch": 1010.35,
258867
+ "learning_rate": 8.007756410256411e-06,
258868
+ "loss": 0.3342,
258869
+ "step": 125365
258870
+ },
258871
+ {
258872
+ "epoch": 1010.39,
258873
+ "learning_rate": 8.007676282051282e-06,
258874
+ "loss": 0.7115,
258875
+ "step": 125370
258876
+ },
258877
+ {
258878
+ "epoch": 1010.43,
258879
+ "learning_rate": 8.007596153846154e-06,
258880
+ "loss": 0.9144,
258881
+ "step": 125375
258882
+ },
258883
+ {
258884
+ "epoch": 1010.47,
258885
+ "learning_rate": 8.007516025641027e-06,
258886
+ "loss": 0.2747,
258887
+ "step": 125380
258888
+ },
258889
+ {
258890
+ "epoch": 1010.51,
258891
+ "learning_rate": 8.007435897435898e-06,
258892
+ "loss": 0.2573,
258893
+ "step": 125385
258894
+ },
258895
+ {
258896
+ "epoch": 1010.55,
258897
+ "learning_rate": 8.00735576923077e-06,
258898
+ "loss": 0.4469,
258899
+ "step": 125390
258900
+ },
258901
+ {
258902
+ "epoch": 1010.59,
258903
+ "learning_rate": 8.007275641025641e-06,
258904
+ "loss": 0.6269,
258905
+ "step": 125395
258906
+ },
258907
+ {
258908
+ "epoch": 1010.63,
258909
+ "learning_rate": 8.007195512820514e-06,
258910
+ "loss": 0.8044,
258911
+ "step": 125400
258912
+ },
258913
+ {
258914
+ "epoch": 1010.67,
258915
+ "learning_rate": 8.007115384615385e-06,
258916
+ "loss": 0.3032,
258917
+ "step": 125405
258918
+ },
258919
+ {
258920
+ "epoch": 1010.71,
258921
+ "learning_rate": 8.007035256410257e-06,
258922
+ "loss": 0.292,
258923
+ "step": 125410
258924
+ },
258925
+ {
258926
+ "epoch": 1010.75,
258927
+ "learning_rate": 8.006955128205128e-06,
258928
+ "loss": 0.3459,
258929
+ "step": 125415
258930
+ },
258931
+ {
258932
+ "epoch": 1010.79,
258933
+ "learning_rate": 8.006875000000001e-06,
258934
+ "loss": 0.6893,
258935
+ "step": 125420
258936
+ },
258937
+ {
258938
+ "epoch": 1010.83,
258939
+ "learning_rate": 8.006794871794873e-06,
258940
+ "loss": 0.8442,
258941
+ "step": 125425
258942
+ },
258943
+ {
258944
+ "epoch": 1010.87,
258945
+ "learning_rate": 8.006714743589744e-06,
258946
+ "loss": 0.3027,
258947
+ "step": 125430
258948
+ },
258949
+ {
258950
+ "epoch": 1010.91,
258951
+ "learning_rate": 8.006634615384617e-06,
258952
+ "loss": 0.3003,
258953
+ "step": 125435
258954
+ },
258955
+ {
258956
+ "epoch": 1010.95,
258957
+ "learning_rate": 8.006554487179488e-06,
258958
+ "loss": 0.3103,
258959
+ "step": 125440
258960
+ },
258961
+ {
258962
+ "epoch": 1010.99,
258963
+ "learning_rate": 8.00647435897436e-06,
258964
+ "loss": 0.8081,
258965
+ "step": 125445
258966
+ },
258967
+ {
258968
+ "epoch": 1011.0,
258969
+ "eval_loss": 0.3846486508846283,
258970
+ "eval_runtime": 37.3078,
258971
+ "eval_samples_per_second": 22.515,
258972
+ "eval_steps_per_second": 0.724,
258973
+ "eval_wer": 0.1797744687003036,
258974
+ "step": 125446
258975
  }
258976
  ],
258977
+ "max_steps": 620000,
258978
  "num_train_epochs": 5000,
258979
+ "total_flos": 3.5301452563316874e+20,
258980
  "trial_name": null,
258981
  "trial_params": null
258982
  }
model-bin/finetune/base/{checkpoint-124823 β†’ checkpoint-125446}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/log/1630144693.9967587/events.out.tfevents.1630144693.86bb0ddabf9b.4092.111 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e4aa38c42113fd64f680f49d3d38a69f958c998da8a2379f32ea43be116e69
3
+ size 4194
model-bin/finetune/base/log/1630145081.8079932/events.out.tfevents.1630145081.86bb0ddabf9b.4092.113 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93d2459978224989786560dad4d925f3353d96cbefc84810ec9e03b88b7c2b6
3
+ size 4194
model-bin/finetune/base/log/1630145470.2638097/events.out.tfevents.1630145470.86bb0ddabf9b.4092.115 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c0de4f047420469c3c680d81b6bd9d7160a0cfb47553e2374a39dfeb9e4840b
3
+ size 4194
model-bin/finetune/base/log/1630145861.462048/events.out.tfevents.1630145861.86bb0ddabf9b.4092.117 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c29717c65f0ed80fb0de899ed553da349b163774729899a672dd4e5161266f
3
+ size 4194
model-bin/finetune/base/log/1630146253.3092854/events.out.tfevents.1630146253.86bb0ddabf9b.4092.119 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c16b9fcc15c3e16fadb20f5b4b25ed34a1f8bfaea064cdd63f24e5e94a0b76c
3
+ size 4194
model-bin/finetune/base/log/events.out.tfevents.1630144693.86bb0ddabf9b.4092.110 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33135ecc17206d68f8caf17635e77b79a7965af925a2048b8eeb715863a47a7f
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1630145081.86bb0ddabf9b.4092.112 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa558d6000ce6407a045fddd092de97c3b1770c2767aa9fe989155470dab77a
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1630145470.86bb0ddabf9b.4092.114 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55057af16f44323a6f0f74328fbc8998f6ecf8d90c44836d65c02c5fe75488cf
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1630145861.86bb0ddabf9b.4092.116 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e660a82db2cba865c566bdb2288134eedda48c07ac6b5991f46c93cd71876690
3
+ size 8622
model-bin/finetune/base/log/events.out.tfevents.1630146253.86bb0ddabf9b.4092.118 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ccaa2dd27e51dbc2aba8eae62fd8872dd51c1f38964a76b62432787f4672350
3
+ size 8622