nrshoudi commited on
Commit
016bb93
·
1 Parent(s): 6d2698a

End of training

Browse files
Files changed (5) hide show
  1. README.md +27 -17
  2. config.json +1 -1
  3. model.safetensors +1 -1
  4. trainer_state.json +254 -94
  5. training_args.bin +1 -1
README.md CHANGED
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/hubert-large-ls960-ft](https://huggingface.co/facebook/hubert-large-ls960-ft) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.5404
21
- - Wer: 0.0859
22
- - Per: 0.0671
23
 
24
  ## Model description
25
 
@@ -45,27 +45,37 @@ The following hyperparameters were used during training:
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.1
48
- - num_epochs: 10
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
- | 6.3943 | 1.0 | 1637 | 1.2722 | 0.4693 | 0.4456 |
55
- | 0.7962 | 2.0 | 3274 | 0.5990 | 0.1377 | 0.1185 |
56
- | 0.4245 | 3.0 | 4911 | 0.6075 | 0.0899 | 0.0674 |
57
- | 0.2898 | 4.0 | 6548 | 0.5285 | 0.0979 | 0.0738 |
58
- | 0.2262 | 5.0 | 8185 | 0.5600 | 0.0977 | 0.0758 |
59
- | 0.1803 | 6.0 | 9822 | 0.5504 | 0.0808 | 0.0603 |
60
- | 0.1488 | 7.0 | 11459 | 0.5854 | 0.0898 | 0.0700 |
61
- | 0.1267 | 8.0 | 13096 | 0.5438 | 0.0914 | 0.0722 |
62
- | 0.1156 | 9.0 | 14733 | 0.5395 | 0.0866 | 0.0671 |
63
- | 0.0993 | 10.0 | 16370 | 0.5404 | 0.0859 | 0.0671 |
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  ### Framework versions
67
 
68
- - Transformers 4.35.0
69
  - Pytorch 2.1.0+cu118
70
- - Datasets 2.14.6
71
- - Tokenizers 0.14.1
 
17
 
18
  This model is a fine-tuned version of [facebook/hubert-large-ls960-ft](https://huggingface.co/facebook/hubert-large-ls960-ft) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3991
21
+ - Wer: 0.0466
22
+ - Per: 0.0363
23
 
24
  ## Model description
25
 
 
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.1
48
+ - num_epochs: 20
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Per |
53
  |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
54
+ | 5.6351 | 1.0 | 1637 | 0.7704 | 0.3277 | 0.3110 |
55
+ | 0.3646 | 2.0 | 3274 | 0.4333 | 0.0673 | 0.0540 |
56
+ | 0.1587 | 3.0 | 4911 | 0.3876 | 0.0710 | 0.0580 |
57
+ | 0.1105 | 4.0 | 6548 | 0.3608 | 0.0670 | 0.0558 |
58
+ | 0.0878 | 5.0 | 8185 | 0.3808 | 0.0678 | 0.0562 |
59
+ | 0.0738 | 6.0 | 9822 | 0.3576 | 0.0639 | 0.0518 |
60
+ | 0.0594 | 7.0 | 11459 | 0.3309 | 0.0617 | 0.0508 |
61
+ | 0.056 | 8.0 | 13096 | 0.3570 | 0.0587 | 0.0467 |
62
+ | 0.0478 | 9.0 | 14733 | 0.3450 | 0.0583 | 0.0473 |
63
+ | 0.0388 | 10.0 | 16370 | 0.3993 | 0.0591 | 0.0480 |
64
+ | 0.0378 | 11.0 | 18007 | 0.4172 | 0.0550 | 0.0437 |
65
+ | 0.0319 | 12.0 | 19644 | 0.3625 | 0.0518 | 0.0413 |
66
+ | 0.0309 | 13.0 | 21281 | 0.3794 | 0.0505 | 0.0392 |
67
+ | 0.0257 | 14.0 | 22918 | 0.3897 | 0.0534 | 0.0426 |
68
+ | 0.0202 | 15.0 | 24555 | 0.4077 | 0.0512 | 0.0404 |
69
+ | 0.0197 | 16.0 | 26192 | 0.3846 | 0.0508 | 0.0411 |
70
+ | 0.016 | 17.0 | 27829 | 0.3808 | 0.0491 | 0.0391 |
71
+ | 0.0157 | 18.0 | 29466 | 0.4060 | 0.0471 | 0.0373 |
72
+ | 0.0131 | 19.0 | 31103 | 0.3925 | 0.0465 | 0.0361 |
73
+ | 0.0115 | 20.0 | 32740 | 0.3991 | 0.0466 | 0.0363 |
74
 
75
 
76
  ### Framework versions
77
 
78
+ - Transformers 4.35.2
79
  - Pytorch 2.1.0+cu118
80
+ - Datasets 2.15.0
81
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -70,7 +70,7 @@
70
  "num_hidden_layers": 24,
71
  "pad_token_id": 37,
72
  "torch_dtype": "float32",
73
- "transformers_version": "4.35.0",
74
  "use_weighted_layer_sum": false,
75
  "vocab_size": 40
76
  }
 
70
  "num_hidden_layers": 24,
71
  "pad_token_id": 37,
72
  "torch_dtype": "float32",
73
+ "transformers_version": "4.35.2",
74
  "use_weighted_layer_sum": false,
75
  "vocab_size": 40
76
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7e1feb28658dec91aefb393dfcfcb493935e45bb90b786f5599d7b3a49cecc0
3
  size 1261970648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2969a7623859b9a904b126801a107448526dca244893252958260ce5f33819ac
3
  size 1261970648
trainer_state.json CHANGED
@@ -1,188 +1,348 @@
1
  {
2
- "best_metric": 0.5284518003463745,
3
- "best_model_checkpoint": "nrshoudi/hubert_arabic_mdd/checkpoint-6548",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 16370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "learning_rate": 0.0001,
14
- "loss": 6.3943,
15
  "step": 1637
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 1.2721797227859497,
20
- "eval_per": 0.4456091585791011,
21
- "eval_runtime": 117.745,
22
- "eval_samples_per_second": 6.905,
23
- "eval_steps_per_second": 3.457,
24
- "eval_wer": 0.4692734823386683,
25
  "step": 1637
26
  },
27
  {
28
  "epoch": 2.0,
29
- "learning_rate": 8.888888888888889e-05,
30
- "loss": 0.7962,
31
  "step": 3274
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 0.5990138053894043,
36
- "eval_per": 0.11846320550268538,
37
- "eval_runtime": 118.7347,
38
- "eval_samples_per_second": 6.847,
39
- "eval_steps_per_second": 3.428,
40
- "eval_wer": 0.13774438896925015,
41
  "step": 3274
42
  },
43
  {
44
  "epoch": 3.0,
45
- "learning_rate": 7.777777777777778e-05,
46
- "loss": 0.4245,
47
  "step": 4911
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_loss": 0.6075053811073303,
52
- "eval_per": 0.06744087439932159,
53
- "eval_runtime": 118.3123,
54
- "eval_samples_per_second": 6.872,
55
- "eval_steps_per_second": 3.44,
56
- "eval_wer": 0.08991647613270495,
57
  "step": 4911
58
  },
59
  {
60
  "epoch": 4.0,
61
- "learning_rate": 6.666666666666667e-05,
62
- "loss": 0.2898,
63
  "step": 6548
64
  },
65
  {
66
  "epoch": 4.0,
67
- "eval_loss": 0.5284518003463745,
68
- "eval_per": 0.07384811080750024,
69
- "eval_runtime": 121.2465,
70
- "eval_samples_per_second": 6.705,
71
- "eval_steps_per_second": 3.357,
72
- "eval_wer": 0.09789557183519201,
73
  "step": 6548
74
  },
75
  {
76
  "epoch": 5.0,
77
- "learning_rate": 5.555555555555556e-05,
78
- "loss": 0.2262,
79
  "step": 8185
80
  },
81
  {
82
  "epoch": 5.0,
83
- "eval_loss": 0.5600156784057617,
84
- "eval_per": 0.07577970413643645,
85
- "eval_runtime": 119.4374,
86
- "eval_samples_per_second": 6.807,
87
- "eval_steps_per_second": 3.408,
88
- "eval_wer": 0.09770892632168354,
89
  "step": 8185
90
  },
91
  {
92
  "epoch": 6.0,
93
- "learning_rate": 4.4444444444444447e-05,
94
- "loss": 0.1803,
95
  "step": 9822
96
  },
97
  {
98
  "epoch": 6.0,
99
- "eval_loss": 0.5504231452941895,
100
- "eval_per": 0.060279845472533684,
101
- "eval_runtime": 119.1759,
102
- "eval_samples_per_second": 6.822,
103
- "eval_steps_per_second": 3.415,
104
- "eval_wer": 0.08077084597078998,
105
  "step": 9822
106
  },
107
  {
108
  "epoch": 7.0,
109
- "learning_rate": 3.3333333333333335e-05,
110
- "loss": 0.1488,
111
  "step": 11459
112
  },
113
  {
114
  "epoch": 7.0,
115
- "eval_loss": 0.5854084491729736,
116
- "eval_per": 0.06998492414962781,
117
- "eval_runtime": 119.0832,
118
- "eval_samples_per_second": 6.827,
119
- "eval_steps_per_second": 3.418,
120
- "eval_wer": 0.08982315337595073,
121
  "step": 11459
122
  },
123
  {
124
  "epoch": 8.0,
125
- "learning_rate": 2.2222222222222223e-05,
126
- "loss": 0.1267,
127
  "step": 13096
128
  },
129
  {
130
  "epoch": 8.0,
131
- "eval_loss": 0.5438163876533508,
132
- "eval_per": 0.07222274568924904,
133
- "eval_runtime": 119.6311,
134
- "eval_samples_per_second": 6.796,
135
- "eval_steps_per_second": 3.402,
136
- "eval_wer": 0.09140964024077271,
137
  "step": 13096
138
  },
139
  {
140
  "epoch": 9.0,
141
- "learning_rate": 1.1111111111111112e-05,
142
- "loss": 0.1156,
143
  "step": 14733
144
  },
145
  {
146
  "epoch": 9.0,
147
- "eval_loss": 0.5394747257232666,
148
- "eval_per": 0.06713464618863657,
149
- "eval_runtime": 119.6655,
150
- "eval_samples_per_second": 6.794,
151
- "eval_steps_per_second": 3.401,
152
- "eval_wer": 0.08660351826792964,
153
  "step": 14733
154
  },
155
  {
156
  "epoch": 10.0,
157
- "learning_rate": 0.0,
158
- "loss": 0.0993,
159
  "step": 16370
160
  },
161
  {
162
  "epoch": 10.0,
163
- "eval_loss": 0.5404430627822876,
164
- "eval_per": 0.06713464618863657,
165
- "eval_runtime": 120.5761,
166
- "eval_samples_per_second": 6.743,
167
- "eval_steps_per_second": 3.375,
168
- "eval_wer": 0.08590359759227288,
169
  "step": 16370
170
  },
171
  {
172
- "epoch": 10.0,
173
- "step": 16370,
174
- "total_flos": 5.23722142955399e+18,
175
- "train_loss": 0.8801708410189657,
176
- "train_runtime": 9837.0244,
177
- "train_samples_per_second": 3.327,
178
- "train_steps_per_second": 1.664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
  ],
181
  "logging_steps": 500,
182
- "max_steps": 16370,
183
- "num_train_epochs": 10,
184
  "save_steps": 500,
185
- "total_flos": 5.23722142955399e+18,
186
  "trial_name": null,
187
  "trial_params": null
188
  }
 
1
  {
2
+ "best_metric": 0.3309297263622284,
3
+ "best_model_checkpoint": "nrshoudi/hubert_arabic_mdd/checkpoint-11459",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 32740,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "learning_rate": 5e-05,
14
+ "loss": 5.6351,
15
  "step": 1637
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 0.7704485058784485,
20
+ "eval_per": 0.3110336379911429,
21
+ "eval_runtime": 117.9251,
22
+ "eval_samples_per_second": 6.894,
23
+ "eval_steps_per_second": 3.451,
24
+ "eval_wer": 0.3277028603424945,
25
  "step": 1637
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "learning_rate": 0.0001,
30
+ "loss": 0.3646,
31
  "step": 3274
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 0.433285653591156,
36
+ "eval_per": 0.05403750117780081,
37
+ "eval_runtime": 117.5906,
38
+ "eval_samples_per_second": 6.914,
39
+ "eval_steps_per_second": 3.461,
40
+ "eval_wer": 0.06728570761980308,
41
  "step": 3274
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "learning_rate": 9.444444444444444e-05,
46
+ "loss": 0.1587,
47
  "step": 4911
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_loss": 0.3875606060028076,
52
+ "eval_per": 0.05804202393291247,
53
+ "eval_runtime": 117.765,
54
+ "eval_samples_per_second": 6.904,
55
+ "eval_steps_per_second": 3.456,
56
+ "eval_wer": 0.07101861788997246,
57
  "step": 4911
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "learning_rate": 8.888888888888889e-05,
62
+ "loss": 0.1105,
63
  "step": 6548
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "eval_loss": 0.3607988655567169,
68
+ "eval_per": 0.05578064637708471,
69
+ "eval_runtime": 117.9127,
70
+ "eval_samples_per_second": 6.895,
71
+ "eval_steps_per_second": 3.452,
72
+ "eval_wer": 0.06695907797116327,
73
  "step": 6548
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "learning_rate": 8.333333333333334e-05,
78
+ "loss": 0.0878,
79
  "step": 8185
80
  },
81
  {
82
  "epoch": 5.0,
83
+ "eval_loss": 0.3808358907699585,
84
+ "eval_per": 0.05618109865259587,
85
+ "eval_runtime": 117.5542,
86
+ "eval_samples_per_second": 6.916,
87
+ "eval_steps_per_second": 3.462,
88
+ "eval_wer": 0.06775232140357426,
89
  "step": 8185
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "learning_rate": 7.777777777777778e-05,
94
+ "loss": 0.0738,
95
  "step": 9822
96
  },
97
  {
98
  "epoch": 6.0,
99
+ "eval_loss": 0.3576147258281708,
100
+ "eval_per": 0.05177612362197305,
101
+ "eval_runtime": 118.5636,
102
+ "eval_samples_per_second": 6.857,
103
+ "eval_steps_per_second": 3.433,
104
+ "eval_wer": 0.06392608837665065,
105
  "step": 9822
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "learning_rate": 7.222222222222222e-05,
110
+ "loss": 0.0594,
111
  "step": 11459
112
  },
113
  {
114
  "epoch": 7.0,
115
+ "eval_loss": 0.3309297263622284,
116
+ "eval_per": 0.050810326957504946,
117
+ "eval_runtime": 117.7279,
118
+ "eval_samples_per_second": 6.906,
119
+ "eval_steps_per_second": 3.457,
120
+ "eval_wer": 0.06173300359292613,
121
  "step": 11459
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "learning_rate": 6.666666666666667e-05,
126
+ "loss": 0.056,
127
  "step": 13096
128
  },
129
  {
130
  "epoch": 8.0,
131
+ "eval_loss": 0.3569846451282501,
132
+ "eval_per": 0.046711580137567134,
133
+ "eval_runtime": 118.1175,
134
+ "eval_samples_per_second": 6.883,
135
+ "eval_steps_per_second": 3.446,
136
+ "eval_wer": 0.058700013998413514,
137
  "step": 13096
138
  },
139
  {
140
  "epoch": 9.0,
141
+ "learning_rate": 6.111111111111112e-05,
142
+ "loss": 0.0478,
143
  "step": 14733
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "eval_loss": 0.34502002596855164,
148
+ "eval_per": 0.04727692452652407,
149
+ "eval_runtime": 117.9103,
150
+ "eval_samples_per_second": 6.895,
151
+ "eval_steps_per_second": 3.452,
152
+ "eval_wer": 0.05832672297139657,
153
  "step": 14733
154
  },
155
  {
156
  "epoch": 10.0,
157
+ "learning_rate": 5.555555555555556e-05,
158
+ "loss": 0.0388,
159
  "step": 16370
160
  },
161
  {
162
  "epoch": 10.0,
163
+ "eval_loss": 0.3993436396121979,
164
+ "eval_per": 0.04803071704513333,
165
+ "eval_runtime": 117.7116,
166
+ "eval_samples_per_second": 6.907,
167
+ "eval_steps_per_second": 3.458,
168
+ "eval_wer": 0.05907330502543045,
169
  "step": 16370
170
  },
171
  {
172
+ "epoch": 11.0,
173
+ "learning_rate": 5e-05,
174
+ "loss": 0.0378,
175
+ "step": 18007
176
+ },
177
+ {
178
+ "epoch": 11.0,
179
+ "eval_loss": 0.4171537160873413,
180
+ "eval_per": 0.0437435220955432,
181
+ "eval_runtime": 118.1365,
182
+ "eval_samples_per_second": 6.882,
183
+ "eval_steps_per_second": 3.445,
184
+ "eval_wer": 0.05501376510662125,
185
+ "step": 18007
186
+ },
187
+ {
188
+ "epoch": 12.0,
189
+ "learning_rate": 4.4444444444444447e-05,
190
+ "loss": 0.0319,
191
+ "step": 19644
192
+ },
193
+ {
194
+ "epoch": 12.0,
195
+ "eval_loss": 0.3625492751598358,
196
+ "eval_per": 0.04131725242626967,
197
+ "eval_runtime": 118.0928,
198
+ "eval_samples_per_second": 6.884,
199
+ "eval_steps_per_second": 3.446,
200
+ "eval_wer": 0.051794129998600155,
201
+ "step": 19644
202
+ },
203
+ {
204
+ "epoch": 13.0,
205
+ "learning_rate": 3.888888888888889e-05,
206
+ "loss": 0.0309,
207
+ "step": 21281
208
+ },
209
+ {
210
+ "epoch": 13.0,
211
+ "eval_loss": 0.3794006109237671,
212
+ "eval_per": 0.03919721096768115,
213
+ "eval_runtime": 118.3172,
214
+ "eval_samples_per_second": 6.871,
215
+ "eval_steps_per_second": 3.44,
216
+ "eval_wer": 0.05053427278241799,
217
+ "step": 21281
218
+ },
219
+ {
220
+ "epoch": 14.0,
221
+ "learning_rate": 3.3333333333333335e-05,
222
+ "loss": 0.0257,
223
+ "step": 22918
224
+ },
225
+ {
226
+ "epoch": 14.0,
227
+ "eval_loss": 0.3896752893924713,
228
+ "eval_per": 0.04263638933383586,
229
+ "eval_runtime": 118.7663,
230
+ "eval_samples_per_second": 6.845,
231
+ "eval_steps_per_second": 3.427,
232
+ "eval_wer": 0.05342727824179926,
233
+ "step": 22918
234
+ },
235
+ {
236
+ "epoch": 15.0,
237
+ "learning_rate": 2.777777777777778e-05,
238
+ "loss": 0.0202,
239
+ "step": 24555
240
+ },
241
+ {
242
+ "epoch": 15.0,
243
+ "eval_loss": 0.40769127011299133,
244
+ "eval_per": 0.04042212381042118,
245
+ "eval_runtime": 118.2316,
246
+ "eval_samples_per_second": 6.876,
247
+ "eval_steps_per_second": 3.442,
248
+ "eval_wer": 0.05123419345807475,
249
+ "step": 24555
250
+ },
251
+ {
252
+ "epoch": 16.0,
253
+ "learning_rate": 2.2222222222222223e-05,
254
+ "loss": 0.0197,
255
+ "step": 26192
256
+ },
257
+ {
258
+ "epoch": 16.0,
259
+ "eval_loss": 0.38464316725730896,
260
+ "eval_per": 0.041128804296617354,
261
+ "eval_runtime": 118.9714,
262
+ "eval_samples_per_second": 6.834,
263
+ "eval_steps_per_second": 3.421,
264
+ "eval_wer": 0.05076757967430358,
265
+ "step": 26192
266
+ },
267
+ {
268
+ "epoch": 17.0,
269
+ "learning_rate": 1.6666666666666667e-05,
270
+ "loss": 0.016,
271
+ "step": 27829
272
+ },
273
+ {
274
+ "epoch": 17.0,
275
+ "eval_loss": 0.3807629942893982,
276
+ "eval_per": 0.03910298690285499,
277
+ "eval_runtime": 118.4593,
278
+ "eval_samples_per_second": 6.863,
279
+ "eval_steps_per_second": 3.436,
280
+ "eval_wer": 0.04913443143110448,
281
+ "step": 27829
282
+ },
283
+ {
284
+ "epoch": 18.0,
285
+ "learning_rate": 1.1111111111111112e-05,
286
+ "loss": 0.0157,
287
+ "step": 29466
288
+ },
289
+ {
290
+ "epoch": 18.0,
291
+ "eval_loss": 0.4060150980949402,
292
+ "eval_per": 0.03726561763874493,
293
+ "eval_runtime": 118.2175,
294
+ "eval_samples_per_second": 6.877,
295
+ "eval_steps_per_second": 3.443,
296
+ "eval_wer": 0.047127992160888435,
297
+ "step": 29466
298
+ },
299
+ {
300
+ "epoch": 19.0,
301
+ "learning_rate": 5.555555555555556e-06,
302
+ "loss": 0.0131,
303
+ "step": 31103
304
+ },
305
+ {
306
+ "epoch": 19.0,
307
+ "eval_loss": 0.3925394117832184,
308
+ "eval_per": 0.03611137284462452,
309
+ "eval_runtime": 118.443,
310
+ "eval_samples_per_second": 6.864,
311
+ "eval_steps_per_second": 3.436,
312
+ "eval_wer": 0.04652139424198591,
313
+ "step": 31103
314
+ },
315
+ {
316
+ "epoch": 20.0,
317
+ "learning_rate": 0.0,
318
+ "loss": 0.0115,
319
+ "step": 32740
320
+ },
321
+ {
322
+ "epoch": 20.0,
323
+ "eval_loss": 0.39911746978759766,
324
+ "eval_per": 0.036252708941863755,
325
+ "eval_runtime": 118.88,
326
+ "eval_samples_per_second": 6.839,
327
+ "eval_steps_per_second": 3.424,
328
+ "eval_wer": 0.04656805562036303,
329
+ "step": 32740
330
+ },
331
+ {
332
+ "epoch": 20.0,
333
+ "step": 32740,
334
+ "total_flos": 1.0473975577487264e+19,
335
+ "train_loss": 0.3427419745201153,
336
+ "train_runtime": 19585.0411,
337
+ "train_samples_per_second": 3.342,
338
+ "train_steps_per_second": 1.672
339
  }
340
  ],
341
  "logging_steps": 500,
342
+ "max_steps": 32740,
343
+ "num_train_epochs": 20,
344
  "save_steps": 500,
345
+ "total_flos": 1.0473975577487264e+19,
346
  "trial_name": null,
347
  "trial_params": null
348
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df396eb2305401892bce240be3383c2e51e3c16ab0698dab7a388cc695d3d602
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a1aeff00b8bc3228970214b85879c5999771d34eb110131836d18ee1b0b128
3
  size 4600