dq158 commited on
Commit
c5e1326
·
1 Parent(s): 16fa8f0
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "dq158/morbius",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "dq158/coqui",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "dq158/morbius",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "dq158/coqui",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33cd6ceaf3e5c0a15820c9055970f910107122ac5f2692ee038a552928978e93
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fdc10781ae17b5c33e74f66d11972140998a4ca7e36ed46fa1901c77bf1d4f0
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2efc889d52b2d6c0784b72b631f0fe873c965bb89fe3a1d443214cc2dcc088
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf0c8114e0d17b01c08b60e2655b7f3912c168b4c21d9948ca81961e68b3817
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3af69a82a1b91d6e5092406535f43bc1f304899b494dcf1053de5c4be7029152
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615a7c672f674cfa19b84fc4db0609e68907e0961c9b9105c4a59f95df807653
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d45ce75f87aaa5b92cdc5c7abdc86c8b62d1ae1a014d3240669d9630def6c5a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532e8f8b3e9fe98e9f61e70c68df3749ad95f0b1e1d90c26907050b4cdc46c03
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,184 +1,361 @@
1
  {
2
- "best_metric": 2.3189847469329834,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-9489",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 9489,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.16,
13
- "learning_rate": 7e-06,
14
- "loss": 2.4609,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.32,
19
- "learning_rate": 6.998903801353583e-06,
20
- "loss": 2.4409,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.47,
25
- "learning_rate": 6.9956158920723155e-06,
26
- "loss": 2.4156,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.63,
31
- "learning_rate": 6.9901383317000274e-06,
32
- "loss": 2.4131,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.79,
37
- "learning_rate": 6.982474551376301e-06,
38
- "loss": 2.425,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.95,
43
- "learning_rate": 6.972629351687204e-06,
44
- "loss": 2.4587,
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 1.0,
49
  "eval_bleu": 1.0,
50
  "eval_brevity_penalty": 1.0,
51
  "eval_length_ratio": 1.0,
52
- "eval_loss": 2.3418445587158203,
53
  "eval_precisions": [
54
  1.0,
55
  1.0,
56
  1.0,
57
  1.0
58
  ],
59
- "eval_reference_length": 53280,
60
- "eval_runtime": 565.3469,
61
- "eval_samples_per_second": 4.974,
62
- "eval_steps_per_second": 0.623,
63
- "eval_translation_length": 53280,
64
- "step": 3163
65
  },
66
  {
67
- "epoch": 1.11,
68
- "learning_rate": 6.960608899658203e-06,
69
- "loss": 2.4199,
70
  "step": 3500
71
  },
72
  {
73
- "epoch": 1.26,
74
- "learning_rate": 6.946420724891154e-06,
75
- "loss": 2.3982,
76
  "step": 4000
77
  },
78
  {
79
- "epoch": 1.42,
80
- "learning_rate": 6.930073714847756e-06,
81
- "loss": 2.4308,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  "step": 4500
83
  },
84
  {
85
- "epoch": 1.58,
86
- "learning_rate": 6.911578109282458e-06,
87
- "loss": 2.4074,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "step": 5000
89
  },
90
  {
91
- "epoch": 1.74,
92
- "learning_rate": 6.890945493828278e-06,
93
- "loss": 2.3945,
94
  "step": 5500
95
  },
96
  {
97
- "epoch": 1.9,
98
- "learning_rate": 6.868188792739579e-06,
99
- "loss": 2.4344,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  "step": 6000
101
  },
102
  {
103
- "epoch": 2.0,
 
 
 
 
 
 
104
  "eval_bleu": 1.0,
105
  "eval_brevity_penalty": 1.0,
106
  "eval_length_ratio": 1.0,
107
- "eval_loss": 2.3292136192321777,
108
  "eval_precisions": [
109
  1.0,
110
  1.0,
111
  1.0,
112
  1.0
113
  ],
114
- "eval_reference_length": 53147,
115
- "eval_runtime": 562.613,
116
- "eval_samples_per_second": 4.998,
117
- "eval_steps_per_second": 0.626,
118
- "eval_translation_length": 53147,
119
- "step": 6326
120
  },
121
  {
122
- "epoch": 2.06,
123
- "learning_rate": 6.8433222607963194e-06,
124
- "loss": 2.4242,
125
- "step": 6500
126
  },
127
  {
128
- "epoch": 2.21,
129
- "learning_rate": 6.816361474374877e-06,
130
- "loss": 2.4037,
131
- "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  },
133
  {
134
- "epoch": 2.37,
135
- "learning_rate": 6.787323321691012e-06,
136
- "loss": 2.3952,
137
  "step": 7500
138
  },
139
  {
140
- "epoch": 2.53,
141
- "learning_rate": 6.7562259922211045e-06,
142
- "loss": 2.4106,
143
  "step": 8000
144
  },
145
  {
146
- "epoch": 2.69,
147
- "learning_rate": 6.723088965308281e-06,
148
- "loss": 2.4145,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "step": 8500
150
  },
151
  {
152
- "epoch": 2.85,
153
- "learning_rate": 6.68793299796057e-06,
154
- "loss": 2.356,
155
  "step": 9000
156
  },
157
  {
158
- "epoch": 3.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  "eval_bleu": 1.0,
160
  "eval_brevity_penalty": 1.0,
161
  "eval_length_ratio": 1.0,
162
- "eval_loss": 2.3189847469329834,
163
  "eval_precisions": [
164
  1.0,
165
  1.0,
166
  1.0,
167
  1.0
168
  ],
169
- "eval_reference_length": 53040,
170
- "eval_runtime": 561.5646,
171
- "eval_samples_per_second": 5.007,
172
- "eval_steps_per_second": 0.627,
173
- "eval_translation_length": 53040,
174
- "step": 9489
175
  }
176
  ],
177
  "logging_steps": 500,
178
- "max_steps": 63260,
179
  "num_train_epochs": 20,
180
  "save_steps": 500,
181
- "total_flos": 5.197924586697523e+16,
182
  "trial_name": null,
183
  "trial_params": null
184
  }
 
1
  {
2
+ "best_metric": 2.047217607498169,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-9939",
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 9939,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.6,
13
+ "learning_rate": 4e-05,
14
+ "loss": 2.3843,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 1.0,
19
+ "eval_bleu": 1.0,
20
+ "eval_brevity_penalty": 1.0,
21
+ "eval_length_ratio": 1.0,
22
+ "eval_loss": 2.1837692260742188,
23
+ "eval_precisions": [
24
+ 1.0,
25
+ 1.0,
26
+ 1.0,
27
+ 1.0
28
+ ],
29
+ "eval_reference_length": 54613,
30
+ "eval_runtime": 607.6631,
31
+ "eval_samples_per_second": 4.846,
32
+ "eval_steps_per_second": 0.607,
33
+ "eval_translation_length": 54613,
34
+ "step": 828
35
+ },
36
+ {
37
+ "epoch": 1.21,
38
+ "learning_rate": 3.9904412209094755e-05,
39
+ "loss": 2.3038,
40
  "step": 1000
41
  },
42
  {
43
+ "epoch": 1.81,
44
+ "learning_rate": 3.961856253895603e-05,
45
+ "loss": 2.2743,
46
  "step": 1500
47
  },
48
  {
49
+ "epoch": 2.0,
50
+ "eval_bleu": 1.0,
51
+ "eval_brevity_penalty": 1.0,
52
+ "eval_length_ratio": 1.0,
53
+ "eval_loss": 2.139969825744629,
54
+ "eval_precisions": [
55
+ 1.0,
56
+ 1.0,
57
+ 1.0,
58
+ 1.0
59
+ ],
60
+ "eval_reference_length": 53569,
61
+ "eval_runtime": 602.4016,
62
+ "eval_samples_per_second": 4.889,
63
+ "eval_steps_per_second": 0.613,
64
+ "eval_translation_length": 53569,
65
+ "step": 1656
66
+ },
67
+ {
68
+ "epoch": 2.41,
69
+ "learning_rate": 3.9145183363433777e-05,
70
+ "loss": 2.2346,
71
  "step": 2000
72
  },
73
  {
74
+ "epoch": 3.0,
75
+ "eval_bleu": 1.0,
76
+ "eval_brevity_penalty": 1.0,
77
+ "eval_length_ratio": 1.0,
78
+ "eval_loss": 2.113609552383423,
79
+ "eval_precisions": [
80
+ 1.0,
81
+ 1.0,
82
+ 1.0,
83
+ 1.0
84
+ ],
85
+ "eval_reference_length": 53706,
86
+ "eval_runtime": 601.3356,
87
+ "eval_samples_per_second": 4.897,
88
+ "eval_steps_per_second": 0.614,
89
+ "eval_translation_length": 53706,
90
+ "step": 2484
91
+ },
92
+ {
93
+ "epoch": 3.02,
94
+ "learning_rate": 3.848879960949287e-05,
95
+ "loss": 2.219,
96
  "step": 2500
97
  },
98
  {
99
+ "epoch": 3.62,
100
+ "learning_rate": 3.765568550443583e-05,
101
+ "loss": 2.1894,
102
  "step": 3000
103
  },
104
  {
105
+ "epoch": 4.0,
106
  "eval_bleu": 1.0,
107
  "eval_brevity_penalty": 1.0,
108
  "eval_length_ratio": 1.0,
109
+ "eval_loss": 2.09635066986084,
110
  "eval_precisions": [
111
  1.0,
112
  1.0,
113
  1.0,
114
  1.0
115
  ],
116
+ "eval_reference_length": 53658,
117
+ "eval_runtime": 602.1777,
118
+ "eval_samples_per_second": 4.891,
119
+ "eval_steps_per_second": 0.613,
120
+ "eval_translation_length": 53658,
121
+ "step": 3313
122
  },
123
  {
124
+ "epoch": 4.23,
125
+ "learning_rate": 3.6653804601950126e-05,
126
+ "loss": 2.1772,
127
  "step": 3500
128
  },
129
  {
130
+ "epoch": 4.83,
131
+ "learning_rate": 3.5492733660257605e-05,
132
+ "loss": 2.1547,
133
  "step": 4000
134
  },
135
  {
136
+ "epoch": 5.0,
137
+ "eval_bleu": 1.0,
138
+ "eval_brevity_penalty": 1.0,
139
+ "eval_length_ratio": 1.0,
140
+ "eval_loss": 2.083543539047241,
141
+ "eval_precisions": [
142
+ 1.0,
143
+ 1.0,
144
+ 1.0,
145
+ 1.0
146
+ ],
147
+ "eval_reference_length": 53886,
148
+ "eval_runtime": 602.7422,
149
+ "eval_samples_per_second": 4.886,
150
+ "eval_steps_per_second": 0.612,
151
+ "eval_translation_length": 53886,
152
+ "step": 4141
153
+ },
154
+ {
155
+ "epoch": 5.43,
156
+ "learning_rate": 3.4183571099998355e-05,
157
+ "loss": 2.1303,
158
  "step": 4500
159
  },
160
  {
161
+ "epoch": 6.0,
162
+ "eval_bleu": 1.0,
163
+ "eval_brevity_penalty": 1.0,
164
+ "eval_length_ratio": 1.0,
165
+ "eval_loss": 2.0750300884246826,
166
+ "eval_precisions": [
167
+ 1.0,
168
+ 1.0,
169
+ 1.0,
170
+ 1.0
171
+ ],
172
+ "eval_reference_length": 53636,
173
+ "eval_runtime": 602.3864,
174
+ "eval_samples_per_second": 4.889,
175
+ "eval_steps_per_second": 0.613,
176
+ "eval_translation_length": 53636,
177
+ "step": 4969
178
+ },
179
+ {
180
+ "epoch": 6.04,
181
+ "learning_rate": 3.273883091687946e-05,
182
+ "loss": 2.1421,
183
  "step": 5000
184
  },
185
  {
186
+ "epoch": 6.64,
187
+ "learning_rate": 3.117232306315456e-05,
188
+ "loss": 2.1231,
189
  "step": 5500
190
  },
191
  {
192
+ "epoch": 7.0,
193
+ "eval_bleu": 1.0,
194
+ "eval_brevity_penalty": 1.0,
195
+ "eval_length_ratio": 1.0,
196
+ "eval_loss": 2.0663375854492188,
197
+ "eval_precisions": [
198
+ 1.0,
199
+ 1.0,
200
+ 1.0,
201
+ 1.0
202
+ ],
203
+ "eval_reference_length": 53739,
204
+ "eval_runtime": 603.5685,
205
+ "eval_samples_per_second": 4.879,
206
+ "eval_steps_per_second": 0.611,
207
+ "eval_translation_length": 53739,
208
+ "step": 5797
209
+ },
210
+ {
211
+ "epoch": 7.24,
212
+ "learning_rate": 2.9499021441341012e-05,
213
+ "loss": 2.1103,
214
  "step": 6000
215
  },
216
  {
217
+ "epoch": 7.85,
218
+ "learning_rate": 2.773492077199351e-05,
219
+ "loss": 2.0866,
220
+ "step": 6500
221
+ },
222
+ {
223
+ "epoch": 8.0,
224
  "eval_bleu": 1.0,
225
  "eval_brevity_penalty": 1.0,
226
  "eval_length_ratio": 1.0,
227
+ "eval_loss": 2.060825824737549,
228
  "eval_precisions": [
229
  1.0,
230
  1.0,
231
  1.0,
232
  1.0
233
  ],
234
+ "eval_reference_length": 53979,
235
+ "eval_runtime": 603.0031,
236
+ "eval_samples_per_second": 4.884,
237
+ "eval_steps_per_second": 0.612,
238
+ "eval_translation_length": 53979,
239
+ "step": 6626
240
  },
241
  {
242
+ "epoch": 8.45,
243
+ "learning_rate": 2.589688370370382e-05,
244
+ "loss": 2.0876,
245
+ "step": 7000
246
  },
247
  {
248
+ "epoch": 9.0,
249
+ "eval_bleu": 1.0,
250
+ "eval_brevity_penalty": 1.0,
251
+ "eval_length_ratio": 1.0,
252
+ "eval_loss": 2.0561046600341797,
253
+ "eval_precisions": [
254
+ 1.0,
255
+ 1.0,
256
+ 1.0,
257
+ 1.0
258
+ ],
259
+ "eval_reference_length": 53596,
260
+ "eval_runtime": 603.5437,
261
+ "eval_samples_per_second": 4.88,
262
+ "eval_steps_per_second": 0.611,
263
+ "eval_translation_length": 53596,
264
+ "step": 7454
265
  },
266
  {
267
+ "epoch": 9.06,
268
+ "learning_rate": 2.4002479626767903e-05,
269
+ "loss": 2.0782,
270
  "step": 7500
271
  },
272
  {
273
+ "epoch": 9.66,
274
+ "learning_rate": 2.206981673126539e-05,
275
+ "loss": 2.0736,
276
  "step": 8000
277
  },
278
  {
279
+ "epoch": 10.0,
280
+ "eval_bleu": 1.0,
281
+ "eval_brevity_penalty": 1.0,
282
+ "eval_length_ratio": 1.0,
283
+ "eval_loss": 2.0527448654174805,
284
+ "eval_precisions": [
285
+ 1.0,
286
+ 1.0,
287
+ 1.0,
288
+ 1.0
289
+ ],
290
+ "eval_reference_length": 53846,
291
+ "eval_runtime": 603.4511,
292
+ "eval_samples_per_second": 4.88,
293
+ "eval_steps_per_second": 0.611,
294
+ "eval_translation_length": 53846,
295
+ "step": 8282
296
+ },
297
+ {
298
+ "epoch": 10.26,
299
+ "learning_rate": 2.0117368914870838e-05,
300
+ "loss": 2.0662,
301
  "step": 8500
302
  },
303
  {
304
+ "epoch": 10.87,
305
+ "learning_rate": 1.8163799194946938e-05,
306
+ "loss": 2.0613,
307
  "step": 9000
308
  },
309
  {
310
+ "epoch": 11.0,
311
+ "eval_bleu": 1.0,
312
+ "eval_brevity_penalty": 1.0,
313
+ "eval_length_ratio": 1.0,
314
+ "eval_loss": 2.050435781478882,
315
+ "eval_precisions": [
316
+ 1.0,
317
+ 1.0,
318
+ 1.0,
319
+ 1.0
320
+ ],
321
+ "eval_reference_length": 53805,
322
+ "eval_runtime": 604.156,
323
+ "eval_samples_per_second": 4.875,
324
+ "eval_steps_per_second": 0.611,
325
+ "eval_translation_length": 53805,
326
+ "step": 9110
327
+ },
328
+ {
329
+ "epoch": 11.47,
330
+ "learning_rate": 1.6227781312884388e-05,
331
+ "loss": 2.049,
332
+ "step": 9500
333
+ },
334
+ {
335
+ "epoch": 12.0,
336
  "eval_bleu": 1.0,
337
  "eval_brevity_penalty": 1.0,
338
  "eval_length_ratio": 1.0,
339
+ "eval_loss": 2.047217607498169,
340
  "eval_precisions": [
341
  1.0,
342
  1.0,
343
  1.0,
344
  1.0
345
  ],
346
+ "eval_reference_length": 53919,
347
+ "eval_runtime": 603.5433,
348
+ "eval_samples_per_second": 4.88,
349
+ "eval_steps_per_second": 0.611,
350
+ "eval_translation_length": 53919,
351
+ "step": 9939
352
  }
353
  ],
354
  "logging_steps": 500,
355
+ "max_steps": 16560,
356
  "num_train_epochs": 20,
357
  "save_steps": 500,
358
+ "total_flos": 2.1776105516669338e+17,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7074403f91b5effcdf3dccd08c11bb9b624bb4f590a50184209e49e2983b54b
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b06978cc3d00aca95594f59ec7e9b9fa96aa51c26c2bd29cd053e001acfe17
3
  size 4664
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2efc889d52b2d6c0784b72b631f0fe873c965bb89fe3a1d443214cc2dcc088
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0881e5cfadc862f8b984701ed3e924d87d92fd6ae4b951ff136e435ed6f478fd
3
  size 990409330
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7074403f91b5effcdf3dccd08c11bb9b624bb4f590a50184209e49e2983b54b
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b06978cc3d00aca95594f59ec7e9b9fa96aa51c26c2bd29cd053e001acfe17
3
  size 4664