SamagraDataGov commited on
Commit
2fe25c9
·
verified ·
1 Parent(s): ab2e6e4

Training in progress, step 40

Browse files
Files changed (36) hide show
  1. README.md +16 -13
  2. config.json +105 -3
  3. model.safetensors +1 -1
  4. pytorch_model.bin +3 -0
  5. runs/Jun20_17-12-17_bharatsahaiyak-test/events.out.tfevents.1718903538.bharatsahaiyak-test.584004.17 +3 -0
  6. runs/Jun25_19-32-54_bharatsahaiyak-test/events.out.tfevents.1719343975.bharatsahaiyak-test.945044.0 +3 -0
  7. runs/Jun25_20-15-53_bharatsahaiyak-test/events.out.tfevents.1719346554.bharatsahaiyak-test.945044.1 +3 -0
  8. runs/Jun25_21-12-28_bharatsahaiyak-test/events.out.tfevents.1719349949.bharatsahaiyak-test.990312.0 +3 -0
  9. runs/Jun25_21-25-06_bharatsahaiyak-test/events.out.tfevents.1719350707.bharatsahaiyak-test.945044.2 +3 -0
  10. runs/Jun25_21-25-06_bharatsahaiyak-test/events.out.tfevents.1719351329.bharatsahaiyak-test.945044.3 +3 -0
  11. runs/Jun25_21-43-21_bharatsahaiyak-test/events.out.tfevents.1719351802.bharatsahaiyak-test.945044.4 +3 -0
  12. runs/Jun25_21-43-21_bharatsahaiyak-test/events.out.tfevents.1719352396.bharatsahaiyak-test.945044.5 +3 -0
  13. runs/Jun25_22-02-00_bharatsahaiyak-test/events.out.tfevents.1719352921.bharatsahaiyak-test.945044.6 +3 -0
  14. runs/Jun26_08-14-03_bharatsahaiyak-test/events.out.tfevents.1719389644.bharatsahaiyak-test.1215638.0 +3 -0
  15. runs/Jun26_08-55-05_bharatsahaiyak-test/events.out.tfevents.1719392105.bharatsahaiyak-test.1215638.1 +3 -0
  16. runs/Jun26_08-55-05_bharatsahaiyak-test/events.out.tfevents.1719392419.bharatsahaiyak-test.1215638.2 +3 -0
  17. runs/Jun26_09-01-10_bharatsahaiyak-test/events.out.tfevents.1719392471.bharatsahaiyak-test.1215638.3 +3 -0
  18. runs/Jun26_09-01-10_bharatsahaiyak-test/events.out.tfevents.1719392836.bharatsahaiyak-test.1215638.4 +3 -0
  19. runs/Jun28_19-45-26_bharatsahaiyak-test/events.out.tfevents.1719603928.bharatsahaiyak-test.1799391.0 +3 -0
  20. training_args.bin +1 -1
  21. whisper-tiny-hi-checkpoint-1/config.json +1 -1
  22. whisper-tiny-hi-checkpoint-1/model.safetensors +1 -1
  23. whisper-tiny-hi-checkpoint-1/pytorch_model.bin +3 -0
  24. whisper-tiny-hi-checkpoint-1/trainer_state.json +80 -80
  25. whisper-tiny-hi-checkpoint-1/training_args.bin +1 -1
  26. whisper-tiny-hi-checkpoint-2/config.json +1 -1
  27. whisper-tiny-hi-checkpoint-2/model.safetensors +1 -1
  28. whisper-tiny-hi-checkpoint-2/pytorch_model.bin +3 -0
  29. whisper-tiny-hi-checkpoint-2/trainer_state.json +94 -94
  30. whisper-tiny-hi-checkpoint-2/training_args.bin +1 -1
  31. whisper-tiny-hi-checkpoint-3/config.json +1 -1
  32. whisper-tiny-hi-checkpoint-3/model.safetensors +1 -1
  33. whisper-tiny-hi-checkpoint-3/pytorch_model.bin +3 -0
  34. whisper-tiny-hi-checkpoint-3/trainer_state.json +78 -78
  35. whisper-tiny-hi-checkpoint-3/training_args.bin +1 -1
  36. whisper-tiny-hi-checkpoint-9/trainer_state.json +301 -0
README.md CHANGED
@@ -15,8 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6505
19
- - Wer: 69.9890
20
 
21
  ## Model description
22
 
@@ -35,24 +35,27 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 1e-05
39
- - train_batch_size: 32
40
- - eval_batch_size: 1
41
  - seed: 42
 
 
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
- - lr_scheduler_type: linear
 
44
  - training_steps: 200
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
49
- | Training Loss | Epoch | Step | Validation Loss | Wer |
50
- |:-------------:|:-----:|:----:|:---------------:|:-------:|
51
- | 1.2169 | 1.25 | 40 | 1.0360 | 91.6081 |
52
- | 0.7302 | 2.5 | 80 | 0.7369 | 79.8035 |
53
- | 0.5917 | 3.75 | 120 | 0.6505 | 69.9890 |
54
- | 0.5156 | 5.0 | 160 | 0.6157 | 70.4293 |
55
- | 0.481 | 6.25 | 200 | 0.6051 | 70.0398 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.4940
19
+ - Wer: 59.7206
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 3.75e-05
39
+ - train_batch_size: 16
40
+ - eval_batch_size: 4
41
  - seed: 42
42
+ - gradient_accumulation_steps: 2
43
+ - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: constant
46
+ - lr_scheduler_warmup_steps: 50
47
  - training_steps: 200
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
53
+ |:-------------:|:------:|:----:|:---------------:|:-------:|
54
+ | 0.6766 | 1.2698 | 40 | 0.6154 | 81.4733 |
55
+ | 0.3599 | 2.5397 | 80 | 0.5078 | 67.0110 |
56
+ | 0.2297 | 3.8095 | 120 | 0.4940 | 59.7206 |
57
+ | 0.153 | 5.0794 | 160 | 0.5193 | 62.0745 |
58
+ | 0.0885 | 6.3492 | 200 | 0.5557 | 60.5843 |
59
 
60
 
61
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./whisper-tiny-hi2_test/whisper-tiny-hi-checkpoint-8",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -25,7 +25,20 @@
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 4,
27
  "eos_token_id": 50257,
28
- "forced_decoder_ids": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "init_std": 0.02,
30
  "is_encoder_decoder": true,
31
  "mask_feature_length": 10,
@@ -43,7 +56,96 @@
43
  "num_mel_bins": 80,
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
- "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "torch_dtype": "float32",
48
  "transformers_version": "4.41.1",
49
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "openai/whisper-tiny",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 4,
27
  "eos_token_id": 50257,
28
+ "forced_decoder_ids": [
29
+ [
30
+ 1,
31
+ 50259
32
+ ],
33
+ [
34
+ 2,
35
+ 50359
36
+ ],
37
+ [
38
+ 3,
39
+ 50363
40
+ ]
41
+ ],
42
  "init_std": 0.02,
43
  "is_encoder_decoder": true,
44
  "mask_feature_length": 10,
 
56
  "num_mel_bins": 80,
57
  "pad_token_id": 50257,
58
  "scale_embedding": false,
59
+ "suppress_tokens": [
60
+ 1,
61
+ 2,
62
+ 7,
63
+ 8,
64
+ 9,
65
+ 10,
66
+ 14,
67
+ 25,
68
+ 26,
69
+ 27,
70
+ 28,
71
+ 29,
72
+ 31,
73
+ 58,
74
+ 59,
75
+ 60,
76
+ 61,
77
+ 62,
78
+ 63,
79
+ 90,
80
+ 91,
81
+ 92,
82
+ 93,
83
+ 359,
84
+ 503,
85
+ 522,
86
+ 542,
87
+ 873,
88
+ 893,
89
+ 902,
90
+ 918,
91
+ 922,
92
+ 931,
93
+ 1350,
94
+ 1853,
95
+ 1982,
96
+ 2460,
97
+ 2627,
98
+ 3246,
99
+ 3253,
100
+ 3268,
101
+ 3536,
102
+ 3846,
103
+ 3961,
104
+ 4183,
105
+ 4667,
106
+ 6585,
107
+ 6647,
108
+ 7273,
109
+ 9061,
110
+ 9383,
111
+ 10428,
112
+ 10929,
113
+ 11938,
114
+ 12033,
115
+ 12331,
116
+ 12562,
117
+ 13793,
118
+ 14157,
119
+ 14635,
120
+ 15265,
121
+ 15618,
122
+ 16553,
123
+ 16604,
124
+ 18362,
125
+ 18956,
126
+ 20075,
127
+ 21675,
128
+ 22520,
129
+ 26130,
130
+ 26161,
131
+ 26435,
132
+ 28279,
133
+ 29464,
134
+ 31650,
135
+ 32302,
136
+ 32470,
137
+ 36865,
138
+ 42863,
139
+ 47425,
140
+ 49870,
141
+ 50254,
142
+ 50258,
143
+ 50358,
144
+ 50359,
145
+ 50360,
146
+ 50361,
147
+ 50362
148
+ ],
149
  "torch_dtype": "float32",
150
  "transformers_version": "4.41.1",
151
  "use_cache": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59c8982089b3ffb0f35b2a8b93a329827d3603f741f4d47ddb7a17dc5a7e5e2
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8391a512f727d4bff056237acc3255c51e1c540d618e2a36bbcb5ff4e3d67b9e
3
  size 151061672
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3cef835a24908d9f4ca10612961248e57318d2137495d93cf150b4aada444c
3
+ size 151099494
runs/Jun20_17-12-17_bharatsahaiyak-test/events.out.tfevents.1718903538.bharatsahaiyak-test.584004.17 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc21cff19d8da62d157be35e39c6e543d5e06b24e395581958ad47a1b58a4f8f
3
+ size 6041
runs/Jun25_19-32-54_bharatsahaiyak-test/events.out.tfevents.1719343975.bharatsahaiyak-test.945044.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ea47ed7805b390543a0fdece61b93fac693e053e54e98f158c867a9dc4cab8
3
+ size 8257
runs/Jun25_20-15-53_bharatsahaiyak-test/events.out.tfevents.1719346554.bharatsahaiyak-test.945044.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c88a94e9915f794c941f692cb1e6d0f4d91cb35b92392892187f4ed9c5b453
3
+ size 9672
runs/Jun25_21-12-28_bharatsahaiyak-test/events.out.tfevents.1719349949.bharatsahaiyak-test.990312.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ed85b0e945d2e7d1ca43a423ce270bb6db762abc0a20cd57885f19d5738987
3
+ size 6796
runs/Jun25_21-25-06_bharatsahaiyak-test/events.out.tfevents.1719350707.bharatsahaiyak-test.945044.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4c9bb256acdfef047d08be61d3bdf3f8e4a0dc2d4afed391ab2d8155506300
3
+ size 6047
runs/Jun25_21-25-06_bharatsahaiyak-test/events.out.tfevents.1719351329.bharatsahaiyak-test.945044.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97a19fd12d5b1ccb44d90206d795c9fbdd8fe053881c14c441bfb993249a25b
3
+ size 406
runs/Jun25_21-43-21_bharatsahaiyak-test/events.out.tfevents.1719351802.bharatsahaiyak-test.945044.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac54ea5634bcee3dbc86b92a1cccb1abccdbf2ed682afca1b1e20b7f0b79bac4
3
+ size 6047
runs/Jun25_21-43-21_bharatsahaiyak-test/events.out.tfevents.1719352396.bharatsahaiyak-test.945044.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8032919c2782097e631dc680b67b003a4061a58eee4f96c8016d2b0855c5ccf4
3
+ size 406
runs/Jun25_22-02-00_bharatsahaiyak-test/events.out.tfevents.1719352921.bharatsahaiyak-test.945044.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4309d2f1655bb13fd980419f849b27252aa5935a577da2655b8fe5708e5b13
3
+ size 6047
runs/Jun26_08-14-03_bharatsahaiyak-test/events.out.tfevents.1719389644.bharatsahaiyak-test.1215638.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d267c65def0ee6598d4887f45121902e836b16611747bf66db17b409357cedc
3
+ size 9673
runs/Jun26_08-55-05_bharatsahaiyak-test/events.out.tfevents.1719392105.bharatsahaiyak-test.1215638.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165d0582673ef188b9d52b9c14d1a45c51671773f3298a7eb630d6fb6f82e50a
3
+ size 6048
runs/Jun26_08-55-05_bharatsahaiyak-test/events.out.tfevents.1719392419.bharatsahaiyak-test.1215638.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778b4630a2197cb19c0539dab2ab6c6ebbfe08a5a766bb35019cf7a58d314172
3
+ size 406
runs/Jun26_09-01-10_bharatsahaiyak-test/events.out.tfevents.1719392471.bharatsahaiyak-test.1215638.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b3d3b1228637f0424cd544116d5762921a132e40112df77d5772202d481236
3
+ size 6048
runs/Jun26_09-01-10_bharatsahaiyak-test/events.out.tfevents.1719392836.bharatsahaiyak-test.1215638.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b513f668d1fb6272239f09cabc467830355aeb006b023b49b4c837f258e67c
3
+ size 406
runs/Jun28_19-45-26_bharatsahaiyak-test/events.out.tfevents.1719603928.bharatsahaiyak-test.1799391.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede0928aa24113fd4cb8c80d4d30ca006747b321ea6844fd562cde3f9521fc30
3
+ size 7347
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:491ea0b3fac64ea37df87b10285499fb08a8bcc03ba05abb9770d611ee81c6e0
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e2def70f73f2a79252976d7c743511eccda12a2480b3b51c124f1ebcb9504d7
3
  size 5240
whisper-tiny-hi-checkpoint-1/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
- "dropout": 0.0,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
 
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
+ "dropout": 0.1,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
whisper-tiny-hi-checkpoint-1/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59c8982089b3ffb0f35b2a8b93a329827d3603f741f4d47ddb7a17dc5a7e5e2
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daeea7778cdeda935a2e90f442518eb60512721cd10bce0921a80c47aa8f2543
3
  size 151061672
whisper-tiny-hi-checkpoint-1/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802a25ee7db91faba0d8c4d8ea560053183efcadf455712cb855292076f680e4
3
+ size 151099494
whisper-tiny-hi-checkpoint-1/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 69.98899144720129,
3
  "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
- "epoch": 6.25,
5
  "eval_steps": 40,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
@@ -9,128 +9,128 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.625,
13
- "grad_norm": 10.95101547241211,
14
- "learning_rate": 9.200000000000002e-06,
15
- "loss": 2.3747,
16
  "step": 20
17
  },
18
  {
19
- "epoch": 1.25,
20
- "grad_norm": 7.747081279754639,
21
- "learning_rate": 8.2e-06,
22
- "loss": 1.2169,
23
  "step": 40
24
  },
25
  {
26
- "epoch": 1.25,
27
- "eval_loss": 1.0360029935836792,
28
- "eval_runtime": 574.5339,
29
- "eval_samples_per_second": 1.741,
30
- "eval_steps_per_second": 1.741,
31
- "eval_wer": 91.60809552036582,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 1.875,
36
- "grad_norm": 7.32460355758667,
37
- "learning_rate": 7.2000000000000005e-06,
38
- "loss": 0.9058,
39
  "step": 60
40
  },
41
  {
42
- "epoch": 2.5,
43
- "grad_norm": 6.835532188415527,
44
- "learning_rate": 6.200000000000001e-06,
45
- "loss": 0.7302,
46
  "step": 80
47
  },
48
  {
49
- "epoch": 2.5,
50
- "eval_loss": 0.7368654608726501,
51
- "eval_runtime": 549.562,
52
- "eval_samples_per_second": 1.82,
53
- "eval_steps_per_second": 1.82,
54
- "eval_wer": 79.80353967313066,
55
  "step": 80
56
  },
57
  {
58
- "epoch": 3.125,
59
- "grad_norm": 6.408384323120117,
60
- "learning_rate": 5.2e-06,
61
- "loss": 0.6453,
62
  "step": 100
63
  },
64
  {
65
- "epoch": 3.75,
66
- "grad_norm": 5.809150695800781,
67
- "learning_rate": 4.2000000000000004e-06,
68
- "loss": 0.5917,
69
  "step": 120
70
  },
71
  {
72
- "epoch": 3.75,
73
- "eval_loss": 0.6504533886909485,
74
- "eval_runtime": 548.8688,
75
- "eval_samples_per_second": 1.822,
76
- "eval_steps_per_second": 1.822,
77
- "eval_wer": 69.98899144720129,
78
  "step": 120
79
  },
80
  {
81
- "epoch": 4.375,
82
- "grad_norm": 5.724637508392334,
83
- "learning_rate": 3.2000000000000003e-06,
84
- "loss": 0.5356,
85
  "step": 140
86
  },
87
  {
88
- "epoch": 5.0,
89
- "grad_norm": 10.124310493469238,
90
- "learning_rate": 2.2e-06,
91
- "loss": 0.5156,
92
  "step": 160
93
  },
94
  {
95
- "epoch": 5.0,
96
- "eval_loss": 0.6157090663909912,
97
- "eval_runtime": 549.7895,
98
- "eval_samples_per_second": 1.819,
99
- "eval_steps_per_second": 1.819,
100
- "eval_wer": 70.4293335591498,
101
  "step": 160
102
  },
103
  {
104
- "epoch": 5.625,
105
- "grad_norm": 4.947811126708984,
106
- "learning_rate": 1.2000000000000002e-06,
107
- "loss": 0.4986,
108
  "step": 180
109
  },
110
  {
111
- "epoch": 6.25,
112
- "grad_norm": 4.541861057281494,
113
- "learning_rate": 2.0000000000000002e-07,
114
- "loss": 0.481,
115
  "step": 200
116
  },
117
  {
118
- "epoch": 6.25,
119
- "eval_loss": 0.6050636768341064,
120
- "eval_runtime": 552.964,
121
- "eval_samples_per_second": 1.808,
122
- "eval_steps_per_second": 1.808,
123
- "eval_wer": 70.03980015242611,
124
  "step": 200
125
  },
126
  {
127
- "epoch": 6.25,
128
  "step": 200,
129
- "total_flos": 1.5401574531072e+17,
130
- "train_loss": 0.8495243072509766,
131
- "train_runtime": 3592.94,
132
- "train_samples_per_second": 1.781,
133
- "train_steps_per_second": 0.056
134
  }
135
  ],
136
  "logging_steps": 20,
@@ -150,8 +150,8 @@
150
  "attributes": {}
151
  }
152
  },
153
- "total_flos": 1.5401574531072e+17,
154
- "train_batch_size": 32,
155
  "trial_name": null,
156
  "trial_params": null
157
  }
 
1
  {
2
+ "best_metric": 59.720575783234544,
3
  "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
+ "epoch": 6.349206349206349,
5
  "eval_steps": 40,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.6349206349206349,
13
+ "grad_norm": 8.253315925598145,
14
+ "learning_rate": 3.75e-05,
15
+ "loss": 1.7555,
16
  "step": 20
17
  },
18
  {
19
+ "epoch": 1.2698412698412698,
20
+ "grad_norm": 6.022846698760986,
21
+ "learning_rate": 3.75e-05,
22
+ "loss": 0.6766,
23
  "step": 40
24
  },
25
  {
26
+ "epoch": 1.2698412698412698,
27
+ "eval_loss": 0.6153799295425415,
28
+ "eval_runtime": 368.3001,
29
+ "eval_samples_per_second": 2.715,
30
+ "eval_steps_per_second": 0.679,
31
+ "eval_wer": 81.47332768839966,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 1.9047619047619047,
36
+ "grad_norm": 6.249065399169922,
37
+ "learning_rate": 3.75e-05,
38
+ "loss": 0.4907,
39
  "step": 60
40
  },
41
  {
42
+ "epoch": 2.5396825396825395,
43
+ "grad_norm": 5.881297588348389,
44
+ "learning_rate": 3.75e-05,
45
+ "loss": 0.3599,
46
  "step": 80
47
  },
48
  {
49
+ "epoch": 2.5396825396825395,
50
+ "eval_loss": 0.5078147649765015,
51
+ "eval_runtime": 314.278,
52
+ "eval_samples_per_second": 3.182,
53
+ "eval_steps_per_second": 0.795,
54
+ "eval_wer": 67.01100762066045,
55
  "step": 80
56
  },
57
  {
58
+ "epoch": 3.1746031746031744,
59
+ "grad_norm": 4.2090606689453125,
60
+ "learning_rate": 3.75e-05,
61
+ "loss": 0.301,
62
  "step": 100
63
  },
64
  {
65
+ "epoch": 3.8095238095238093,
66
+ "grad_norm": 4.868574142456055,
67
+ "learning_rate": 3.75e-05,
68
+ "loss": 0.2297,
69
  "step": 120
70
  },
71
  {
72
+ "epoch": 3.8095238095238093,
73
+ "eval_loss": 0.49397116899490356,
74
+ "eval_runtime": 304.836,
75
+ "eval_samples_per_second": 3.28,
76
+ "eval_steps_per_second": 0.82,
77
+ "eval_wer": 59.720575783234544,
78
  "step": 120
79
  },
80
  {
81
+ "epoch": 4.444444444444445,
82
+ "grad_norm": 4.324865341186523,
83
+ "learning_rate": 3.75e-05,
84
+ "loss": 0.1754,
85
  "step": 140
86
  },
87
  {
88
+ "epoch": 5.079365079365079,
89
+ "grad_norm": 3.0074007511138916,
90
+ "learning_rate": 3.75e-05,
91
+ "loss": 0.153,
92
  "step": 160
93
  },
94
  {
95
+ "epoch": 5.079365079365079,
96
+ "eval_loss": 0.5192885994911194,
97
+ "eval_runtime": 309.7886,
98
+ "eval_samples_per_second": 3.228,
99
+ "eval_steps_per_second": 0.807,
100
+ "eval_wer": 62.07451312447078,
101
  "step": 160
102
  },
103
  {
104
+ "epoch": 5.714285714285714,
105
+ "grad_norm": 2.9954638481140137,
106
+ "learning_rate": 3.75e-05,
107
+ "loss": 0.1049,
108
  "step": 180
109
  },
110
  {
111
+ "epoch": 6.349206349206349,
112
+ "grad_norm": 3.4090383052825928,
113
+ "learning_rate": 3.75e-05,
114
+ "loss": 0.0885,
115
  "step": 200
116
  },
117
  {
118
+ "epoch": 6.349206349206349,
119
+ "eval_loss": 0.5557394027709961,
120
+ "eval_runtime": 308.2851,
121
+ "eval_samples_per_second": 3.244,
122
+ "eval_steps_per_second": 0.811,
123
+ "eval_wer": 60.584250635055035,
124
  "step": 200
125
  },
126
  {
127
+ "epoch": 6.349206349206349,
128
  "step": 200,
129
+ "total_flos": 1.5637915828224e+17,
130
+ "train_loss": 0.4335097998380661,
131
+ "train_runtime": 2402.3095,
132
+ "train_samples_per_second": 2.664,
133
+ "train_steps_per_second": 0.083
134
  }
135
  ],
136
  "logging_steps": 20,
 
150
  "attributes": {}
151
  }
152
  },
153
+ "total_flos": 1.5637915828224e+17,
154
+ "train_batch_size": 16,
155
  "trial_name": null,
156
  "trial_params": null
157
  }
whisper-tiny-hi-checkpoint-1/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c982fa9327e2e49f4765e9f4a5be54ca2c672121610ce55b8168505a2beb3b7
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a2d63b5e8da641a253ed23f58db5d3da9941bebd2a70aef95105c81a4fa484
3
  size 5240
whisper-tiny-hi-checkpoint-2/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
- "dropout": 0.0,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
 
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
+ "dropout": 0.1,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
whisper-tiny-hi-checkpoint-2/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59c8982089b3ffb0f35b2a8b93a329827d3603f741f4d47ddb7a17dc5a7e5e2
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daeea7778cdeda935a2e90f442518eb60512721cd10bce0921a80c47aa8f2543
3
  size 151061672
whisper-tiny-hi-checkpoint-2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802a25ee7db91faba0d8c4d8ea560053183efcadf455712cb855292076f680e4
3
+ size 151099494
whisper-tiny-hi-checkpoint-2/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 69.98899144720129,
3
  "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
- "epoch": 6.28125,
5
  "eval_steps": 40,
6
  "global_step": 201,
7
  "is_hyper_param_search": false,
@@ -9,145 +9,145 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.625,
13
- "grad_norm": 10.95101547241211,
14
- "learning_rate": 9.200000000000002e-06,
15
- "loss": 2.3747,
16
  "step": 20
17
  },
18
  {
19
- "epoch": 1.25,
20
- "grad_norm": 7.747081279754639,
21
- "learning_rate": 8.2e-06,
22
- "loss": 1.2169,
23
  "step": 40
24
  },
25
  {
26
- "epoch": 1.25,
27
- "eval_loss": 1.0360029935836792,
28
- "eval_runtime": 574.5339,
29
- "eval_samples_per_second": 1.741,
30
- "eval_steps_per_second": 1.741,
31
- "eval_wer": 91.60809552036582,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 1.875,
36
- "grad_norm": 7.32460355758667,
37
- "learning_rate": 7.2000000000000005e-06,
38
- "loss": 0.9058,
39
  "step": 60
40
  },
41
  {
42
- "epoch": 2.5,
43
- "grad_norm": 6.835532188415527,
44
- "learning_rate": 6.200000000000001e-06,
45
- "loss": 0.7302,
46
  "step": 80
47
  },
48
  {
49
- "epoch": 2.5,
50
- "eval_loss": 0.7368654608726501,
51
- "eval_runtime": 549.562,
52
- "eval_samples_per_second": 1.82,
53
- "eval_steps_per_second": 1.82,
54
- "eval_wer": 79.80353967313066,
55
  "step": 80
56
  },
57
  {
58
- "epoch": 3.125,
59
- "grad_norm": 6.408384323120117,
60
- "learning_rate": 5.2e-06,
61
- "loss": 0.6453,
62
  "step": 100
63
  },
64
  {
65
- "epoch": 3.75,
66
- "grad_norm": 5.809150695800781,
67
- "learning_rate": 4.2000000000000004e-06,
68
- "loss": 0.5917,
69
  "step": 120
70
  },
71
  {
72
- "epoch": 3.75,
73
- "eval_loss": 0.6504533886909485,
74
- "eval_runtime": 548.8688,
75
- "eval_samples_per_second": 1.822,
76
- "eval_steps_per_second": 1.822,
77
- "eval_wer": 69.98899144720129,
78
  "step": 120
79
  },
80
  {
81
- "epoch": 4.375,
82
- "grad_norm": 5.724637508392334,
83
- "learning_rate": 3.2000000000000003e-06,
84
- "loss": 0.5356,
85
  "step": 140
86
  },
87
  {
88
- "epoch": 5.0,
89
- "grad_norm": 10.124310493469238,
90
- "learning_rate": 2.2e-06,
91
- "loss": 0.5156,
92
  "step": 160
93
  },
94
  {
95
- "epoch": 5.0,
96
- "eval_loss": 0.6157090663909912,
97
- "eval_runtime": 549.7895,
98
- "eval_samples_per_second": 1.819,
99
- "eval_steps_per_second": 1.819,
100
- "eval_wer": 70.4293335591498,
101
  "step": 160
102
  },
103
  {
104
- "epoch": 5.625,
105
- "grad_norm": 4.947811126708984,
106
- "learning_rate": 1.2000000000000002e-06,
107
- "loss": 0.4986,
108
  "step": 180
109
  },
110
  {
111
- "epoch": 6.25,
112
- "grad_norm": 4.541861057281494,
113
- "learning_rate": 2.0000000000000002e-07,
114
- "loss": 0.481,
115
  "step": 200
116
  },
117
  {
118
- "epoch": 6.25,
119
- "eval_loss": 0.6050636768341064,
120
- "eval_runtime": 552.964,
121
- "eval_samples_per_second": 1.808,
122
- "eval_steps_per_second": 1.808,
123
- "eval_wer": 70.03980015242611,
124
  "step": 200
125
  },
126
  {
127
- "epoch": 6.25,
128
  "step": 200,
129
- "total_flos": 1.5401574531072e+17,
130
- "train_loss": 0.8495243072509766,
131
- "train_runtime": 3592.94,
132
- "train_samples_per_second": 1.781,
133
- "train_steps_per_second": 0.056
134
  },
135
  {
136
- "epoch": 6.28125,
137
  "step": 201,
138
- "total_flos": 1.5480354963456e+17,
139
- "train_loss": 0.003230639654605543,
140
- "train_runtime": 7.5043,
141
- "train_samples_per_second": 852.844,
142
- "train_steps_per_second": 26.651
143
- },
144
- {
145
- "epoch": 6.28125,
146
- "eval_loss": 0.6504533886909485,
147
- "eval_runtime": 548.5863,
148
- "eval_samples_per_second": 1.823,
149
- "eval_steps_per_second": 1.823,
150
- "eval_wer": 69.98899144720129,
151
  "step": 201
152
  }
153
  ],
@@ -168,8 +168,8 @@
168
  "attributes": {}
169
  }
170
  },
171
- "total_flos": 1.5480354963456e+17,
172
- "train_batch_size": 32,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
 
1
  {
2
+ "best_metric": 59.720575783234544,
3
  "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
+ "epoch": 6.476190476190476,
5
  "eval_steps": 40,
6
  "global_step": 201,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.6349206349206349,
13
+ "grad_norm": 8.253315925598145,
14
+ "learning_rate": 3.75e-05,
15
+ "loss": 1.7555,
16
  "step": 20
17
  },
18
  {
19
+ "epoch": 1.2698412698412698,
20
+ "grad_norm": 6.022846698760986,
21
+ "learning_rate": 3.75e-05,
22
+ "loss": 0.6766,
23
  "step": 40
24
  },
25
  {
26
+ "epoch": 1.2698412698412698,
27
+ "eval_loss": 0.6153799295425415,
28
+ "eval_runtime": 368.3001,
29
+ "eval_samples_per_second": 2.715,
30
+ "eval_steps_per_second": 0.679,
31
+ "eval_wer": 81.47332768839966,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 1.9047619047619047,
36
+ "grad_norm": 6.249065399169922,
37
+ "learning_rate": 3.75e-05,
38
+ "loss": 0.4907,
39
  "step": 60
40
  },
41
  {
42
+ "epoch": 2.5396825396825395,
43
+ "grad_norm": 5.881297588348389,
44
+ "learning_rate": 3.75e-05,
45
+ "loss": 0.3599,
46
  "step": 80
47
  },
48
  {
49
+ "epoch": 2.5396825396825395,
50
+ "eval_loss": 0.5078147649765015,
51
+ "eval_runtime": 314.278,
52
+ "eval_samples_per_second": 3.182,
53
+ "eval_steps_per_second": 0.795,
54
+ "eval_wer": 67.01100762066045,
55
  "step": 80
56
  },
57
  {
58
+ "epoch": 3.1746031746031744,
59
+ "grad_norm": 4.2090606689453125,
60
+ "learning_rate": 3.75e-05,
61
+ "loss": 0.301,
62
  "step": 100
63
  },
64
  {
65
+ "epoch": 3.8095238095238093,
66
+ "grad_norm": 4.868574142456055,
67
+ "learning_rate": 3.75e-05,
68
+ "loss": 0.2297,
69
  "step": 120
70
  },
71
  {
72
+ "epoch": 3.8095238095238093,
73
+ "eval_loss": 0.49397116899490356,
74
+ "eval_runtime": 304.836,
75
+ "eval_samples_per_second": 3.28,
76
+ "eval_steps_per_second": 0.82,
77
+ "eval_wer": 59.720575783234544,
78
  "step": 120
79
  },
80
  {
81
+ "epoch": 4.444444444444445,
82
+ "grad_norm": 4.324865341186523,
83
+ "learning_rate": 3.75e-05,
84
+ "loss": 0.1754,
85
  "step": 140
86
  },
87
  {
88
+ "epoch": 5.079365079365079,
89
+ "grad_norm": 3.0074007511138916,
90
+ "learning_rate": 3.75e-05,
91
+ "loss": 0.153,
92
  "step": 160
93
  },
94
  {
95
+ "epoch": 5.079365079365079,
96
+ "eval_loss": 0.5192885994911194,
97
+ "eval_runtime": 309.7886,
98
+ "eval_samples_per_second": 3.228,
99
+ "eval_steps_per_second": 0.807,
100
+ "eval_wer": 62.07451312447078,
101
  "step": 160
102
  },
103
  {
104
+ "epoch": 5.714285714285714,
105
+ "grad_norm": 2.9954638481140137,
106
+ "learning_rate": 3.75e-05,
107
+ "loss": 0.1049,
108
  "step": 180
109
  },
110
  {
111
+ "epoch": 6.349206349206349,
112
+ "grad_norm": 3.4090383052825928,
113
+ "learning_rate": 3.75e-05,
114
+ "loss": 0.0885,
115
  "step": 200
116
  },
117
  {
118
+ "epoch": 6.349206349206349,
119
+ "eval_loss": 0.5557394027709961,
120
+ "eval_runtime": 308.2851,
121
+ "eval_samples_per_second": 3.244,
122
+ "eval_steps_per_second": 0.811,
123
+ "eval_wer": 60.584250635055035,
124
  "step": 200
125
  },
126
  {
127
+ "epoch": 6.349206349206349,
128
  "step": 200,
129
+ "total_flos": 1.5637915828224e+17,
130
+ "train_loss": 0.4335097998380661,
131
+ "train_runtime": 2402.3095,
132
+ "train_samples_per_second": 2.664,
133
+ "train_steps_per_second": 0.083
134
  },
135
  {
136
+ "epoch": 6.476190476190476,
137
  "step": 201,
138
+ "total_flos": 1.5716696260608e+17,
139
+ "train_loss": 0.043295191295111356,
140
+ "train_runtime": 5.6453,
141
+ "train_samples_per_second": 1133.694,
142
+ "train_steps_per_second": 35.428
143
+ },
144
+ {
145
+ "epoch": 6.476190476190476,
146
+ "eval_loss": 0.49397116899490356,
147
+ "eval_runtime": 307.542,
148
+ "eval_samples_per_second": 3.252,
149
+ "eval_steps_per_second": 0.813,
150
+ "eval_wer": 59.720575783234544,
151
  "step": 201
152
  }
153
  ],
 
168
  "attributes": {}
169
  }
170
  },
171
+ "total_flos": 1.5716696260608e+17,
172
+ "train_batch_size": 16,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
whisper-tiny-hi-checkpoint-2/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57320ccedcf473157fcb44c0152851471c74170811def3aa0279b4a3552d63db
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9287bb38beb16bf58664c3ed10f735c1bd05dd62bb178cf34a56cf97c52b277
3
  size 5240
whisper-tiny-hi-checkpoint-3/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
- "dropout": 0.0,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
 
19
  "decoder_layerdrop": 0.0,
20
  "decoder_layers": 4,
21
  "decoder_start_token_id": 50258,
22
+ "dropout": 0.1,
23
  "encoder_attention_heads": 6,
24
  "encoder_ffn_dim": 1536,
25
  "encoder_layerdrop": 0.0,
whisper-tiny-hi-checkpoint-3/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59c8982089b3ffb0f35b2a8b93a329827d3603f741f4d47ddb7a17dc5a7e5e2
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daeea7778cdeda935a2e90f442518eb60512721cd10bce0921a80c47aa8f2543
3
  size 151061672
whisper-tiny-hi-checkpoint-3/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802a25ee7db91faba0d8c4d8ea560053183efcadf455712cb855292076f680e4
3
+ size 151099494
whisper-tiny-hi-checkpoint-3/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 69.98899144720129,
3
- "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
  "epoch": 6.3125,
5
  "eval_steps": 40,
6
  "global_step": 202,
@@ -10,162 +10,162 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.625,
13
- "grad_norm": 10.95101547241211,
14
- "learning_rate": 9.200000000000002e-06,
15
- "loss": 2.3747,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 1.25,
20
- "grad_norm": 7.747081279754639,
21
- "learning_rate": 8.2e-06,
22
- "loss": 1.2169,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 1.25,
27
- "eval_loss": 1.0360029935836792,
28
- "eval_runtime": 574.5339,
29
- "eval_samples_per_second": 1.741,
30
- "eval_steps_per_second": 1.741,
31
- "eval_wer": 91.60809552036582,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.875,
36
- "grad_norm": 7.32460355758667,
37
- "learning_rate": 7.2000000000000005e-06,
38
- "loss": 0.9058,
39
  "step": 60
40
  },
41
  {
42
  "epoch": 2.5,
43
- "grad_norm": 6.835532188415527,
44
- "learning_rate": 6.200000000000001e-06,
45
- "loss": 0.7302,
46
  "step": 80
47
  },
48
  {
49
  "epoch": 2.5,
50
- "eval_loss": 0.7368654608726501,
51
- "eval_runtime": 549.562,
52
- "eval_samples_per_second": 1.82,
53
- "eval_steps_per_second": 1.82,
54
- "eval_wer": 79.80353967313066,
55
  "step": 80
56
  },
57
  {
58
  "epoch": 3.125,
59
- "grad_norm": 6.408384323120117,
60
- "learning_rate": 5.2e-06,
61
- "loss": 0.6453,
62
  "step": 100
63
  },
64
  {
65
  "epoch": 3.75,
66
- "grad_norm": 5.809150695800781,
67
- "learning_rate": 4.2000000000000004e-06,
68
- "loss": 0.5917,
69
  "step": 120
70
  },
71
  {
72
  "epoch": 3.75,
73
- "eval_loss": 0.6504533886909485,
74
- "eval_runtime": 548.8688,
75
- "eval_samples_per_second": 1.822,
76
- "eval_steps_per_second": 1.822,
77
- "eval_wer": 69.98899144720129,
78
  "step": 120
79
  },
80
  {
81
  "epoch": 4.375,
82
- "grad_norm": 5.724637508392334,
83
- "learning_rate": 3.2000000000000003e-06,
84
- "loss": 0.5356,
85
  "step": 140
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 10.124310493469238,
90
- "learning_rate": 2.2e-06,
91
- "loss": 0.5156,
92
  "step": 160
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_loss": 0.6157090663909912,
97
- "eval_runtime": 549.7895,
98
- "eval_samples_per_second": 1.819,
99
- "eval_steps_per_second": 1.819,
100
- "eval_wer": 70.4293335591498,
101
  "step": 160
102
  },
103
  {
104
  "epoch": 5.625,
105
- "grad_norm": 4.947811126708984,
106
- "learning_rate": 1.2000000000000002e-06,
107
- "loss": 0.4986,
108
  "step": 180
109
  },
110
  {
111
  "epoch": 6.25,
112
- "grad_norm": 4.541861057281494,
113
- "learning_rate": 2.0000000000000002e-07,
114
- "loss": 0.481,
115
  "step": 200
116
  },
117
  {
118
  "epoch": 6.25,
119
- "eval_loss": 0.6050636768341064,
120
- "eval_runtime": 552.964,
121
- "eval_samples_per_second": 1.808,
122
- "eval_steps_per_second": 1.808,
123
- "eval_wer": 70.03980015242611,
124
  "step": 200
125
  },
126
  {
127
  "epoch": 6.25,
128
  "step": 200,
129
  "total_flos": 1.5401574531072e+17,
130
- "train_loss": 0.8495243072509766,
131
- "train_runtime": 3592.94,
132
- "train_samples_per_second": 1.781,
133
  "train_steps_per_second": 0.056
134
  },
135
  {
136
  "epoch": 6.28125,
137
  "step": 201,
138
  "total_flos": 1.5480354963456e+17,
139
- "train_loss": 0.003230639654605543,
140
- "train_runtime": 7.5043,
141
- "train_samples_per_second": 852.844,
142
- "train_steps_per_second": 26.651
143
  },
144
  {
145
  "epoch": 6.28125,
146
- "eval_loss": 0.6504533886909485,
147
- "eval_runtime": 548.5863,
148
- "eval_samples_per_second": 1.823,
149
- "eval_steps_per_second": 1.823,
150
- "eval_wer": 69.98899144720129,
151
  "step": 201
152
  },
153
  {
154
  "epoch": 6.3125,
155
  "step": 202,
156
  "total_flos": 1.555913539584e+17,
157
- "train_loss": 0.0035205459830784562,
158
- "train_runtime": 7.6486,
159
- "train_samples_per_second": 836.753,
160
- "train_steps_per_second": 26.149
161
  },
162
  {
163
  "epoch": 6.3125,
164
- "eval_loss": 0.6504533886909485,
165
- "eval_runtime": 548.2984,
166
- "eval_samples_per_second": 1.824,
167
- "eval_steps_per_second": 1.824,
168
- "eval_wer": 69.98899144720129,
169
  "step": 202
170
  }
171
  ],
 
1
  {
2
+ "best_metric": 58.31498729889923,
3
+ "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-80",
4
  "epoch": 6.3125,
5
  "eval_steps": 40,
6
  "global_step": 202,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.625,
13
+ "grad_norm": 8.544885635375977,
14
+ "learning_rate": 3.75e-05,
15
+ "loss": 1.8228,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 1.25,
20
+ "grad_norm": 6.32118558883667,
21
+ "learning_rate": 3.75e-05,
22
+ "loss": 0.6803,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 1.25,
27
+ "eval_loss": 0.6102083325386047,
28
+ "eval_runtime": 542.5987,
29
+ "eval_samples_per_second": 1.843,
30
+ "eval_steps_per_second": 1.843,
31
+ "eval_wer": 65.94411515664692,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.875,
36
+ "grad_norm": 6.4600510597229,
37
+ "learning_rate": 3.75e-05,
38
+ "loss": 0.4966,
39
  "step": 60
40
  },
41
  {
42
  "epoch": 2.5,
43
+ "grad_norm": 5.194849491119385,
44
+ "learning_rate": 3.75e-05,
45
+ "loss": 0.3649,
46
  "step": 80
47
  },
48
  {
49
  "epoch": 2.5,
50
+ "eval_loss": 0.5036975741386414,
51
+ "eval_runtime": 540.6408,
52
+ "eval_samples_per_second": 1.85,
53
+ "eval_steps_per_second": 1.85,
54
+ "eval_wer": 58.31498729889923,
55
  "step": 80
56
  },
57
  {
58
  "epoch": 3.125,
59
+ "grad_norm": 4.111001014709473,
60
+ "learning_rate": 3.75e-05,
61
+ "loss": 0.3075,
62
  "step": 100
63
  },
64
  {
65
  "epoch": 3.75,
66
+ "grad_norm": 4.358007431030273,
67
+ "learning_rate": 3.75e-05,
68
+ "loss": 0.2296,
69
  "step": 120
70
  },
71
  {
72
  "epoch": 3.75,
73
+ "eval_loss": 0.48966631293296814,
74
+ "eval_runtime": 538.04,
75
+ "eval_samples_per_second": 1.859,
76
+ "eval_steps_per_second": 1.859,
77
+ "eval_wer": 66.71464860287891,
78
  "step": 120
79
  },
80
  {
81
  "epoch": 4.375,
82
+ "grad_norm": 4.111566066741943,
83
+ "learning_rate": 3.75e-05,
84
+ "loss": 0.1811,
85
  "step": 140
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 6.702174186706543,
90
+ "learning_rate": 3.75e-05,
91
+ "loss": 0.1564,
92
  "step": 160
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_loss": 0.5130774974822998,
97
+ "eval_runtime": 529.7879,
98
+ "eval_samples_per_second": 1.888,
99
+ "eval_steps_per_second": 1.888,
100
+ "eval_wer": 65.34292972057578,
101
  "step": 160
102
  },
103
  {
104
  "epoch": 5.625,
105
+ "grad_norm": 3.0882856845855713,
106
+ "learning_rate": 3.75e-05,
107
+ "loss": 0.101,
108
  "step": 180
109
  },
110
  {
111
  "epoch": 6.25,
112
+ "grad_norm": 4.524318695068359,
113
+ "learning_rate": 3.75e-05,
114
+ "loss": 0.0905,
115
  "step": 200
116
  },
117
  {
118
  "epoch": 6.25,
119
+ "eval_loss": 0.5503659248352051,
120
+ "eval_runtime": 587.5545,
121
+ "eval_samples_per_second": 1.702,
122
+ "eval_steps_per_second": 1.702,
123
+ "eval_wer": 63.903471634208294,
124
  "step": 200
125
  },
126
  {
127
  "epoch": 6.25,
128
  "step": 200,
129
  "total_flos": 1.5401574531072e+17,
130
+ "train_loss": 0.44307032763957976,
131
+ "train_runtime": 3561.8571,
132
+ "train_samples_per_second": 1.797,
133
  "train_steps_per_second": 0.056
134
  },
135
  {
136
  "epoch": 6.28125,
137
  "step": 201,
138
  "total_flos": 1.5480354963456e+17,
139
+ "train_loss": 0.04041733433358112,
140
+ "train_runtime": 8.5506,
141
+ "train_samples_per_second": 748.49,
142
+ "train_steps_per_second": 23.39
143
  },
144
  {
145
  "epoch": 6.28125,
146
+ "eval_loss": 0.5036975741386414,
147
+ "eval_runtime": 613.1737,
148
+ "eval_samples_per_second": 1.631,
149
+ "eval_steps_per_second": 1.631,
150
+ "eval_wer": 58.31498729889923,
151
  "step": 201
152
  },
153
  {
154
  "epoch": 6.3125,
155
  "step": 202,
156
  "total_flos": 1.555913539584e+17,
157
+ "train_loss": 0.04091336939594533,
158
+ "train_runtime": 7.7232,
159
+ "train_samples_per_second": 828.669,
160
+ "train_steps_per_second": 25.896
161
  },
162
  {
163
  "epoch": 6.3125,
164
+ "eval_loss": 0.5036975741386414,
165
+ "eval_runtime": 586.1638,
166
+ "eval_samples_per_second": 1.706,
167
+ "eval_steps_per_second": 1.706,
168
+ "eval_wer": 58.31498729889923,
169
  "step": 202
170
  }
171
  ],
whisper-tiny-hi-checkpoint-3/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf420e0b38b82d46a55cd7ad07aeb13abc779974b6f703a2306e4deb9f513f5b
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76e24b44f2bb0092eb34ce034df2972a05583438e44c5c896bc9865acf1d9d2
3
  size 5240
whisper-tiny-hi-checkpoint-9/trainer_state.json ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 69.98899144720129,
3
+ "best_model_checkpoint": "./whisper-tiny-hi2_test/checkpoint-120",
4
+ "epoch": 6.5,
5
+ "eval_steps": 40,
6
+ "global_step": 208,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.625,
13
+ "grad_norm": 10.95101547241211,
14
+ "learning_rate": 9.200000000000002e-06,
15
+ "loss": 2.3747,
16
+ "step": 20
17
+ },
18
+ {
19
+ "epoch": 1.25,
20
+ "grad_norm": 7.747081279754639,
21
+ "learning_rate": 8.2e-06,
22
+ "loss": 1.2169,
23
+ "step": 40
24
+ },
25
+ {
26
+ "epoch": 1.25,
27
+ "eval_loss": 1.0360029935836792,
28
+ "eval_runtime": 574.5339,
29
+ "eval_samples_per_second": 1.741,
30
+ "eval_steps_per_second": 1.741,
31
+ "eval_wer": 91.60809552036582,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.875,
36
+ "grad_norm": 7.32460355758667,
37
+ "learning_rate": 7.2000000000000005e-06,
38
+ "loss": 0.9058,
39
+ "step": 60
40
+ },
41
+ {
42
+ "epoch": 2.5,
43
+ "grad_norm": 6.835532188415527,
44
+ "learning_rate": 6.200000000000001e-06,
45
+ "loss": 0.7302,
46
+ "step": 80
47
+ },
48
+ {
49
+ "epoch": 2.5,
50
+ "eval_loss": 0.7368654608726501,
51
+ "eval_runtime": 549.562,
52
+ "eval_samples_per_second": 1.82,
53
+ "eval_steps_per_second": 1.82,
54
+ "eval_wer": 79.80353967313066,
55
+ "step": 80
56
+ },
57
+ {
58
+ "epoch": 3.125,
59
+ "grad_norm": 6.408384323120117,
60
+ "learning_rate": 5.2e-06,
61
+ "loss": 0.6453,
62
+ "step": 100
63
+ },
64
+ {
65
+ "epoch": 3.75,
66
+ "grad_norm": 5.809150695800781,
67
+ "learning_rate": 4.2000000000000004e-06,
68
+ "loss": 0.5917,
69
+ "step": 120
70
+ },
71
+ {
72
+ "epoch": 3.75,
73
+ "eval_loss": 0.6504533886909485,
74
+ "eval_runtime": 548.8688,
75
+ "eval_samples_per_second": 1.822,
76
+ "eval_steps_per_second": 1.822,
77
+ "eval_wer": 69.98899144720129,
78
+ "step": 120
79
+ },
80
+ {
81
+ "epoch": 4.375,
82
+ "grad_norm": 5.724637508392334,
83
+ "learning_rate": 3.2000000000000003e-06,
84
+ "loss": 0.5356,
85
+ "step": 140
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 10.124310493469238,
90
+ "learning_rate": 2.2e-06,
91
+ "loss": 0.5156,
92
+ "step": 160
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_loss": 0.6157090663909912,
97
+ "eval_runtime": 549.7895,
98
+ "eval_samples_per_second": 1.819,
99
+ "eval_steps_per_second": 1.819,
100
+ "eval_wer": 70.4293335591498,
101
+ "step": 160
102
+ },
103
+ {
104
+ "epoch": 5.625,
105
+ "grad_norm": 4.947811126708984,
106
+ "learning_rate": 1.2000000000000002e-06,
107
+ "loss": 0.4986,
108
+ "step": 180
109
+ },
110
+ {
111
+ "epoch": 6.25,
112
+ "grad_norm": 4.541861057281494,
113
+ "learning_rate": 2.0000000000000002e-07,
114
+ "loss": 0.481,
115
+ "step": 200
116
+ },
117
+ {
118
+ "epoch": 6.25,
119
+ "eval_loss": 0.6050636768341064,
120
+ "eval_runtime": 552.964,
121
+ "eval_samples_per_second": 1.808,
122
+ "eval_steps_per_second": 1.808,
123
+ "eval_wer": 70.03980015242611,
124
+ "step": 200
125
+ },
126
+ {
127
+ "epoch": 6.25,
128
+ "step": 200,
129
+ "total_flos": 1.5401574531072e+17,
130
+ "train_loss": 0.8495243072509766,
131
+ "train_runtime": 3592.94,
132
+ "train_samples_per_second": 1.781,
133
+ "train_steps_per_second": 0.056
134
+ },
135
+ {
136
+ "epoch": 6.28125,
137
+ "step": 201,
138
+ "total_flos": 1.5480354963456e+17,
139
+ "train_loss": 0.003230639654605543,
140
+ "train_runtime": 7.5043,
141
+ "train_samples_per_second": 852.844,
142
+ "train_steps_per_second": 26.651
143
+ },
144
+ {
145
+ "epoch": 6.28125,
146
+ "eval_loss": 0.6504533886909485,
147
+ "eval_runtime": 548.5863,
148
+ "eval_samples_per_second": 1.823,
149
+ "eval_steps_per_second": 1.823,
150
+ "eval_wer": 69.98899144720129,
151
+ "step": 201
152
+ },
153
+ {
154
+ "epoch": 6.3125,
155
+ "step": 202,
156
+ "total_flos": 1.555913539584e+17,
157
+ "train_loss": 0.0035205459830784562,
158
+ "train_runtime": 7.6486,
159
+ "train_samples_per_second": 836.753,
160
+ "train_steps_per_second": 26.149
161
+ },
162
+ {
163
+ "epoch": 6.3125,
164
+ "eval_loss": 0.6504533886909485,
165
+ "eval_runtime": 548.2984,
166
+ "eval_samples_per_second": 1.824,
167
+ "eval_steps_per_second": 1.824,
168
+ "eval_wer": 69.98899144720129,
169
+ "step": 202
170
+ },
171
+ {
172
+ "epoch": 6.34375,
173
+ "step": 203,
174
+ "total_flos": 1.5637915828224e+17,
175
+ "train_loss": 0.002875891812329222,
176
+ "train_runtime": 7.5227,
177
+ "train_samples_per_second": 850.756,
178
+ "train_steps_per_second": 26.586
179
+ },
180
+ {
181
+ "epoch": 6.34375,
182
+ "eval_loss": 0.6504533886909485,
183
+ "eval_runtime": 547.5722,
184
+ "eval_samples_per_second": 1.826,
185
+ "eval_steps_per_second": 1.826,
186
+ "eval_wer": 69.98899144720129,
187
+ "step": 203
188
+ },
189
+ {
190
+ "epoch": 6.375,
191
+ "step": 204,
192
+ "total_flos": 1.5716696260608e+17,
193
+ "train_loss": 0.003042735305486941,
194
+ "train_runtime": 7.3464,
195
+ "train_samples_per_second": 871.18,
196
+ "train_steps_per_second": 27.224
197
+ },
198
+ {
199
+ "epoch": 6.375,
200
+ "eval_loss": 0.6504533886909485,
201
+ "eval_runtime": 547.015,
202
+ "eval_samples_per_second": 1.828,
203
+ "eval_steps_per_second": 1.828,
204
+ "eval_wer": 69.98899144720129,
205
+ "step": 204
206
+ },
207
+ {
208
+ "epoch": 6.40625,
209
+ "step": 205,
210
+ "total_flos": 1.5795476692992e+17,
211
+ "train_loss": 0.0033008325390699433,
212
+ "train_runtime": 7.3265,
213
+ "train_samples_per_second": 873.547,
214
+ "train_steps_per_second": 27.298
215
+ },
216
+ {
217
+ "epoch": 6.40625,
218
+ "eval_loss": 0.6504533886909485,
219
+ "eval_runtime": 545.7731,
220
+ "eval_samples_per_second": 1.832,
221
+ "eval_steps_per_second": 1.832,
222
+ "eval_wer": 69.98899144720129,
223
+ "step": 205
224
+ },
225
+ {
226
+ "epoch": 6.4375,
227
+ "step": 206,
228
+ "total_flos": 1.5874257125376e+17,
229
+ "train_loss": 0.0030977731769524734,
230
+ "train_runtime": 7.5608,
231
+ "train_samples_per_second": 846.467,
232
+ "train_steps_per_second": 26.452
233
+ },
234
+ {
235
+ "epoch": 6.4375,
236
+ "eval_loss": 0.6504533886909485,
237
+ "eval_runtime": 546.3323,
238
+ "eval_samples_per_second": 1.83,
239
+ "eval_steps_per_second": 1.83,
240
+ "eval_wer": 69.98899144720129,
241
+ "step": 206
242
+ },
243
+ {
244
+ "epoch": 6.46875,
245
+ "step": 207,
246
+ "total_flos": 1.595303755776e+17,
247
+ "train_loss": 0.0028965755361289795,
248
+ "train_runtime": 7.5526,
249
+ "train_samples_per_second": 847.394,
250
+ "train_steps_per_second": 26.481
251
+ },
252
+ {
253
+ "epoch": 6.46875,
254
+ "eval_loss": 0.6504533886909485,
255
+ "eval_runtime": 546.1485,
256
+ "eval_samples_per_second": 1.831,
257
+ "eval_steps_per_second": 1.831,
258
+ "eval_wer": 69.98899144720129,
259
+ "step": 207
260
+ },
261
+ {
262
+ "epoch": 6.5,
263
+ "step": 208,
264
+ "total_flos": 1.6031817990144e+17,
265
+ "train_loss": 0.003312833607196808,
266
+ "train_runtime": 7.6251,
267
+ "train_samples_per_second": 839.332,
268
+ "train_steps_per_second": 26.229
269
+ },
270
+ {
271
+ "epoch": 6.5,
272
+ "eval_loss": 0.6504533886909485,
273
+ "eval_runtime": 547.4069,
274
+ "eval_samples_per_second": 1.827,
275
+ "eval_steps_per_second": 1.827,
276
+ "eval_wer": 69.98899144720129,
277
+ "step": 208
278
+ }
279
+ ],
280
+ "logging_steps": 20,
281
+ "max_steps": 200,
282
+ "num_input_tokens_seen": 0,
283
+ "num_train_epochs": 7,
284
+ "save_steps": 40,
285
+ "stateful_callbacks": {
286
+ "TrainerControl": {
287
+ "args": {
288
+ "should_epoch_stop": false,
289
+ "should_evaluate": false,
290
+ "should_log": false,
291
+ "should_save": true,
292
+ "should_training_stop": true
293
+ },
294
+ "attributes": {}
295
+ }
296
+ },
297
+ "total_flos": 1.6031817990144e+17,
298
+ "train_batch_size": 32,
299
+ "trial_name": null,
300
+ "trial_params": null
301
+ }