marofmar commited on
Commit
1fd7d20
Β·
1 Parent(s): 2f3abc7

another trial

Browse files
config.json CHANGED
@@ -70,7 +70,7 @@
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
- "pad_token_id": 259,
74
  "transformers_version": "4.6.0.dev0",
75
- "vocab_size": 260
76
  }
 
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
+ "pad_token_id": 45,
74
  "transformers_version": "4.6.0.dev0",
75
+ "vocab_size": 46
76
  }
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bfc6508f4174c9f6359349b9687c28ddab0e09aa92126ab0fa9bcf89e6bb4fb
3
- size 2492208775
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a8acfe830487f7ddb00a2c4c6db079bdc50bf9d4ba7f6fd18f42934341b452
3
- size 1262999831
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63fff73f92863b73d9fa36f173b126d733cdff623e49de63158bfb66b1dc8d4
3
+ size 1262122455
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:057999ec38e996dec146de5e28f3aaf30dfbc86f0c54eee12d611f5e3b7423e8
3
- size 623
 
 
 
 
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
trainer_state.json DELETED
@@ -1,310 +0,0 @@
1
- {
2
- "best_metric": 4.255084991455078,
3
- "best_model_checkpoint": "./xlsr-demo/checkpoint-80",
4
- "epoch": 28.0,
5
- "global_step": 84,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 1.33,
12
- "learning_rate": 5.9999999999999995e-05,
13
- "loss": 45.1762,
14
- "step": 4
15
- },
16
- {
17
- "epoch": 1.33,
18
- "eval_loss": 32.95661544799805,
19
- "eval_runtime": 0.6944,
20
- "eval_samples_per_second": 14.402,
21
- "eval_wer": 1.0,
22
- "step": 4
23
- },
24
- {
25
- "epoch": 2.67,
26
- "learning_rate": 0.00023999999999999998,
27
- "loss": 48.4094,
28
- "step": 8
29
- },
30
- {
31
- "epoch": 2.67,
32
- "eval_loss": 32.567665100097656,
33
- "eval_runtime": 0.5168,
34
- "eval_samples_per_second": 19.351,
35
- "eval_wer": 1.0,
36
- "step": 8
37
- },
38
- {
39
- "epoch": 4.0,
40
- "learning_rate": 0.00028941176470588233,
41
- "loss": 41.452,
42
- "step": 12
43
- },
44
- {
45
- "epoch": 4.0,
46
- "eval_loss": 31.335033416748047,
47
- "eval_runtime": 0.5187,
48
- "eval_samples_per_second": 19.279,
49
- "eval_wer": 1.0,
50
- "step": 12
51
- },
52
- {
53
- "epoch": 5.33,
54
- "learning_rate": 0.0002788235294117647,
55
- "loss": 43.7158,
56
- "step": 16
57
- },
58
- {
59
- "epoch": 5.33,
60
- "eval_loss": 29.559152603149414,
61
- "eval_runtime": 0.5095,
62
- "eval_samples_per_second": 19.627,
63
- "eval_wer": 1.0,
64
- "step": 16
65
- },
66
- {
67
- "epoch": 6.67,
68
- "learning_rate": 0.00026470588235294115,
69
- "loss": 30.5266,
70
- "step": 20
71
- },
72
- {
73
- "epoch": 6.67,
74
- "eval_loss": 18.15114974975586,
75
- "eval_runtime": 0.5292,
76
- "eval_samples_per_second": 18.896,
77
- "eval_wer": 1.0,
78
- "step": 20
79
- },
80
- {
81
- "epoch": 8.0,
82
- "learning_rate": 0.0002505882352941176,
83
- "loss": 27.451,
84
- "step": 24
85
- },
86
- {
87
- "epoch": 8.0,
88
- "eval_loss": 13.7584810256958,
89
- "eval_runtime": 0.5073,
90
- "eval_samples_per_second": 19.713,
91
- "eval_wer": 1.0,
92
- "step": 24
93
- },
94
- {
95
- "epoch": 9.33,
96
- "learning_rate": 0.0002364705882352941,
97
- "loss": 19.4205,
98
- "step": 28
99
- },
100
- {
101
- "epoch": 9.33,
102
- "eval_loss": 10.164986610412598,
103
- "eval_runtime": 0.5127,
104
- "eval_samples_per_second": 19.503,
105
- "eval_wer": 1.0,
106
- "step": 28
107
- },
108
- {
109
- "epoch": 10.67,
110
- "learning_rate": 0.00022235294117647057,
111
- "loss": 13.2132,
112
- "step": 32
113
- },
114
- {
115
- "epoch": 10.67,
116
- "eval_loss": 7.898101806640625,
117
- "eval_runtime": 0.5031,
118
- "eval_samples_per_second": 19.877,
119
- "eval_wer": 1.0,
120
- "step": 32
121
- },
122
- {
123
- "epoch": 12.0,
124
- "learning_rate": 0.00020823529411764704,
125
- "loss": 9.3548,
126
- "step": 36
127
- },
128
- {
129
- "epoch": 12.0,
130
- "eval_loss": 6.538975715637207,
131
- "eval_runtime": 0.5312,
132
- "eval_samples_per_second": 18.825,
133
- "eval_wer": 1.0,
134
- "step": 36
135
- },
136
- {
137
- "epoch": 13.33,
138
- "learning_rate": 0.0001941176470588235,
139
- "loss": 8.7253,
140
- "step": 40
141
- },
142
- {
143
- "epoch": 13.33,
144
- "eval_loss": 5.714229583740234,
145
- "eval_runtime": 0.4983,
146
- "eval_samples_per_second": 20.07,
147
- "eval_wer": 1.0,
148
- "step": 40
149
- },
150
- {
151
- "epoch": 14.67,
152
- "learning_rate": 0.00017999999999999998,
153
- "loss": 6.8295,
154
- "step": 44
155
- },
156
- {
157
- "epoch": 14.67,
158
- "eval_loss": 5.214608192443848,
159
- "eval_runtime": 0.5112,
160
- "eval_samples_per_second": 19.561,
161
- "eval_wer": 1.0,
162
- "step": 44
163
- },
164
- {
165
- "epoch": 16.0,
166
- "learning_rate": 0.00016588235294117646,
167
- "loss": 5.9458,
168
- "step": 48
169
- },
170
- {
171
- "epoch": 16.0,
172
- "eval_loss": 4.887683868408203,
173
- "eval_runtime": 0.5156,
174
- "eval_samples_per_second": 19.394,
175
- "eval_wer": 1.0,
176
- "step": 48
177
- },
178
- {
179
- "epoch": 17.33,
180
- "learning_rate": 0.00015176470588235293,
181
- "loss": 5.4214,
182
- "step": 52
183
- },
184
- {
185
- "epoch": 17.33,
186
- "eval_loss": 4.679065704345703,
187
- "eval_runtime": 1.0634,
188
- "eval_samples_per_second": 9.403,
189
- "eval_wer": 1.0,
190
- "step": 52
191
- },
192
- {
193
- "epoch": 18.67,
194
- "learning_rate": 0.0001376470588235294,
195
- "loss": 5.8716,
196
- "step": 56
197
- },
198
- {
199
- "epoch": 18.67,
200
- "eval_loss": 4.548101425170898,
201
- "eval_runtime": 0.4987,
202
- "eval_samples_per_second": 20.051,
203
- "eval_wer": 1.0,
204
- "step": 56
205
- },
206
- {
207
- "epoch": 20.0,
208
- "learning_rate": 0.00012352941176470587,
209
- "loss": 4.69,
210
- "step": 60
211
- },
212
- {
213
- "epoch": 20.0,
214
- "eval_loss": 4.435084342956543,
215
- "eval_runtime": 0.6371,
216
- "eval_samples_per_second": 15.695,
217
- "eval_wer": 1.0,
218
- "step": 60
219
- },
220
- {
221
- "epoch": 21.33,
222
- "learning_rate": 0.00010941176470588234,
223
- "loss": 4.6092,
224
- "step": 64
225
- },
226
- {
227
- "epoch": 21.33,
228
- "eval_loss": 4.350079536437988,
229
- "eval_runtime": 0.5165,
230
- "eval_samples_per_second": 19.361,
231
- "eval_wer": 1.0,
232
- "step": 64
233
- },
234
- {
235
- "epoch": 22.67,
236
- "learning_rate": 9.529411764705882e-05,
237
- "loss": 4.9462,
238
- "step": 68
239
- },
240
- {
241
- "epoch": 22.67,
242
- "eval_loss": 4.301529884338379,
243
- "eval_runtime": 0.4931,
244
- "eval_samples_per_second": 20.278,
245
- "eval_wer": 1.0,
246
- "step": 68
247
- },
248
- {
249
- "epoch": 24.0,
250
- "learning_rate": 8.117647058823529e-05,
251
- "loss": 4.7406,
252
- "step": 72
253
- },
254
- {
255
- "epoch": 24.0,
256
- "eval_loss": 4.267996311187744,
257
- "eval_runtime": 0.5187,
258
- "eval_samples_per_second": 19.279,
259
- "eval_wer": 1.0,
260
- "step": 72
261
- },
262
- {
263
- "epoch": 25.33,
264
- "learning_rate": 6.705882352941176e-05,
265
- "loss": 4.5484,
266
- "step": 76
267
- },
268
- {
269
- "epoch": 25.33,
270
- "eval_loss": 4.258701801300049,
271
- "eval_runtime": 0.5059,
272
- "eval_samples_per_second": 19.768,
273
- "eval_wer": 1.0,
274
- "step": 76
275
- },
276
- {
277
- "epoch": 26.67,
278
- "learning_rate": 5.294117647058824e-05,
279
- "loss": 4.2841,
280
- "step": 80
281
- },
282
- {
283
- "epoch": 26.67,
284
- "eval_loss": 4.255084991455078,
285
- "eval_runtime": 0.493,
286
- "eval_samples_per_second": 20.286,
287
- "eval_wer": 1.0,
288
- "step": 80
289
- },
290
- {
291
- "epoch": 28.0,
292
- "learning_rate": 3.882352941176471e-05,
293
- "loss": 4.4733,
294
- "step": 84
295
- },
296
- {
297
- "epoch": 28.0,
298
- "eval_loss": 4.255660533905029,
299
- "eval_runtime": 0.5041,
300
- "eval_samples_per_second": 19.839,
301
- "eval_wer": 1.0,
302
- "step": 84
303
- }
304
- ],
305
- "max_steps": 90,
306
- "num_train_epochs": 30,
307
- "total_flos": 3.564185651712e+17,
308
- "trial_name": null,
309
- "trial_params": null
310
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c2220a89d8cda2c9e9ddfe7fd922e39268d2ceebcd96397807b4516c8d2e156
3
- size 2351
 
 
 
 
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ㅝ": 0, "γ„±": 1, "γ…‚": 2, "γ…‘": 3, "γ…œ": 4, ",": 5, "γ…„": 6, "γ„΄": 7, "γ…•": 8, "γ…Ÿ": 9, "ㅐ": 10, "γ…†": 11, "1": 12, "γ…Š": 13, "γ…–": 14, "γ…ˆ": 15, "γ…‰": 16, "γ…™": 17, "γ…›": 18, ".": 19, "γ…Œ": 20, "γ…‡": 21, "γ…š": 23, "γ„Ά": 24, "γ…£": 25, "γ„Έ": 26, "ㅍ": 27, "γ„Ή": 28, "γ…”": 29, "γ…’": 30, "γ…’": 31, "ㅏ": 32, "γ„·": 33, "γ……": 34, "γ… ": 35, "γ…“": 36, "γ…˜": 37, "γ…‹": 38, "ㅁ": 39, "γ…Ž": 40, "γ„²": 41, "γ…—": 42, "γ…‘": 43, "|": 22, "[UNK]": 44, "[PAD]": 45}