dq158 commited on
Commit
4f34cac
·
1 Parent(s): 0d22e77

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "dq158/coqui",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "dq158/morbius",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fdc10781ae17b5c33e74f66d11972140998a4ca7e36ed46fa1901c77bf1d4f0
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:573d5fb0779ea054213c4ae2227960ee315633ca0772c084f2002fed6ff6ec29
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faf0c8114e0d17b01c08b60e2655b7f3912c168b4c21d9948ca81961e68b3817
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2d97b736486f914e0d33e37b554901f3dfb9b817c3d0af663642879027b1aa
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:615a7c672f674cfa19b84fc4db0609e68907e0961c9b9105c4a59f95df807653
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae00bd8b8e83d970fa0946a2f0647cb96a40344898cf84cce24800471e2a072a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:532e8f8b3e9fe98e9f61e70c68df3749ad95f0b1e1d90c26907050b4cdc46c03
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1eb1c061b915ee84a31a8f051d08956c7267b1cce00afc65fec7ef070c9ad6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,361 +1,56 @@
1
  {
2
- "best_metric": 2.047217607498169,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-9939",
4
- "epoch": 12.0,
5
  "eval_steps": 500,
6
- "global_step": 9939,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.6,
13
- "learning_rate": 4e-05,
14
- "loss": 2.3843,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 1.0,
19
- "eval_bleu": 1.0,
20
- "eval_brevity_penalty": 1.0,
21
- "eval_length_ratio": 1.0,
22
- "eval_loss": 2.1837692260742188,
23
- "eval_precisions": [
24
- 1.0,
25
- 1.0,
26
- 1.0,
27
- 1.0
28
- ],
29
- "eval_reference_length": 54613,
30
- "eval_runtime": 607.6631,
31
- "eval_samples_per_second": 4.846,
32
- "eval_steps_per_second": 0.607,
33
- "eval_translation_length": 54613,
34
- "step": 828
35
- },
36
- {
37
- "epoch": 1.21,
38
- "learning_rate": 3.9904412209094755e-05,
39
- "loss": 2.3038,
40
  "step": 1000
41
  },
42
  {
43
- "epoch": 1.81,
44
- "learning_rate": 3.961856253895603e-05,
45
- "loss": 2.2743,
46
  "step": 1500
47
  },
48
  {
49
- "epoch": 2.0,
50
- "eval_bleu": 1.0,
51
- "eval_brevity_penalty": 1.0,
52
- "eval_length_ratio": 1.0,
53
- "eval_loss": 2.139969825744629,
54
- "eval_precisions": [
55
- 1.0,
56
- 1.0,
57
- 1.0,
58
- 1.0
59
- ],
60
- "eval_reference_length": 53569,
61
- "eval_runtime": 602.4016,
62
- "eval_samples_per_second": 4.889,
63
- "eval_steps_per_second": 0.613,
64
- "eval_translation_length": 53569,
65
- "step": 1656
66
- },
67
- {
68
- "epoch": 2.41,
69
- "learning_rate": 3.9145183363433777e-05,
70
- "loss": 2.2346,
71
- "step": 2000
72
- },
73
- {
74
- "epoch": 3.0,
75
- "eval_bleu": 1.0,
76
- "eval_brevity_penalty": 1.0,
77
- "eval_length_ratio": 1.0,
78
- "eval_loss": 2.113609552383423,
79
- "eval_precisions": [
80
- 1.0,
81
- 1.0,
82
- 1.0,
83
- 1.0
84
- ],
85
- "eval_reference_length": 53706,
86
- "eval_runtime": 601.3356,
87
- "eval_samples_per_second": 4.897,
88
- "eval_steps_per_second": 0.614,
89
- "eval_translation_length": 53706,
90
- "step": 2484
91
- },
92
- {
93
- "epoch": 3.02,
94
- "learning_rate": 3.848879960949287e-05,
95
- "loss": 2.219,
96
- "step": 2500
97
- },
98
- {
99
- "epoch": 3.62,
100
- "learning_rate": 3.765568550443583e-05,
101
- "loss": 2.1894,
102
- "step": 3000
103
- },
104
- {
105
- "epoch": 4.0,
106
- "eval_bleu": 1.0,
107
- "eval_brevity_penalty": 1.0,
108
- "eval_length_ratio": 1.0,
109
- "eval_loss": 2.09635066986084,
110
- "eval_precisions": [
111
- 1.0,
112
- 1.0,
113
- 1.0,
114
- 1.0
115
- ],
116
- "eval_reference_length": 53658,
117
- "eval_runtime": 602.1777,
118
- "eval_samples_per_second": 4.891,
119
- "eval_steps_per_second": 0.613,
120
- "eval_translation_length": 53658,
121
- "step": 3313
122
- },
123
- {
124
- "epoch": 4.23,
125
- "learning_rate": 3.6653804601950126e-05,
126
- "loss": 2.1772,
127
- "step": 3500
128
- },
129
- {
130
- "epoch": 4.83,
131
- "learning_rate": 3.5492733660257605e-05,
132
- "loss": 2.1547,
133
- "step": 4000
134
- },
135
- {
136
- "epoch": 5.0,
137
- "eval_bleu": 1.0,
138
- "eval_brevity_penalty": 1.0,
139
- "eval_length_ratio": 1.0,
140
- "eval_loss": 2.083543539047241,
141
- "eval_precisions": [
142
- 1.0,
143
- 1.0,
144
- 1.0,
145
- 1.0
146
- ],
147
- "eval_reference_length": 53886,
148
- "eval_runtime": 602.7422,
149
- "eval_samples_per_second": 4.886,
150
- "eval_steps_per_second": 0.612,
151
- "eval_translation_length": 53886,
152
- "step": 4141
153
- },
154
- {
155
- "epoch": 5.43,
156
- "learning_rate": 3.4183571099998355e-05,
157
- "loss": 2.1303,
158
- "step": 4500
159
- },
160
- {
161
- "epoch": 6.0,
162
- "eval_bleu": 1.0,
163
- "eval_brevity_penalty": 1.0,
164
- "eval_length_ratio": 1.0,
165
- "eval_loss": 2.0750300884246826,
166
- "eval_precisions": [
167
- 1.0,
168
- 1.0,
169
- 1.0,
170
- 1.0
171
- ],
172
- "eval_reference_length": 53636,
173
- "eval_runtime": 602.3864,
174
- "eval_samples_per_second": 4.889,
175
- "eval_steps_per_second": 0.613,
176
- "eval_translation_length": 53636,
177
- "step": 4969
178
- },
179
- {
180
- "epoch": 6.04,
181
- "learning_rate": 3.273883091687946e-05,
182
- "loss": 2.1421,
183
- "step": 5000
184
- },
185
- {
186
- "epoch": 6.64,
187
- "learning_rate": 3.117232306315456e-05,
188
- "loss": 2.1231,
189
- "step": 5500
190
- },
191
- {
192
- "epoch": 7.0,
193
- "eval_bleu": 1.0,
194
- "eval_brevity_penalty": 1.0,
195
- "eval_length_ratio": 1.0,
196
- "eval_loss": 2.0663375854492188,
197
- "eval_precisions": [
198
- 1.0,
199
- 1.0,
200
- 1.0,
201
- 1.0
202
- ],
203
- "eval_reference_length": 53739,
204
- "eval_runtime": 603.5685,
205
- "eval_samples_per_second": 4.879,
206
- "eval_steps_per_second": 0.611,
207
- "eval_translation_length": 53739,
208
- "step": 5797
209
- },
210
- {
211
- "epoch": 7.24,
212
- "learning_rate": 2.9499021441341012e-05,
213
- "loss": 2.1103,
214
- "step": 6000
215
- },
216
- {
217
- "epoch": 7.85,
218
- "learning_rate": 2.773492077199351e-05,
219
- "loss": 2.0866,
220
- "step": 6500
221
- },
222
- {
223
- "epoch": 8.0,
224
- "eval_bleu": 1.0,
225
- "eval_brevity_penalty": 1.0,
226
- "eval_length_ratio": 1.0,
227
- "eval_loss": 2.060825824737549,
228
- "eval_precisions": [
229
- 1.0,
230
- 1.0,
231
- 1.0,
232
- 1.0
233
- ],
234
- "eval_reference_length": 53979,
235
- "eval_runtime": 603.0031,
236
- "eval_samples_per_second": 4.884,
237
- "eval_steps_per_second": 0.612,
238
- "eval_translation_length": 53979,
239
- "step": 6626
240
- },
241
- {
242
- "epoch": 8.45,
243
- "learning_rate": 2.589688370370382e-05,
244
- "loss": 2.0876,
245
- "step": 7000
246
- },
247
- {
248
- "epoch": 9.0,
249
- "eval_bleu": 1.0,
250
- "eval_brevity_penalty": 1.0,
251
- "eval_length_ratio": 1.0,
252
- "eval_loss": 2.0561046600341797,
253
- "eval_precisions": [
254
- 1.0,
255
- 1.0,
256
- 1.0,
257
- 1.0
258
- ],
259
- "eval_reference_length": 53596,
260
- "eval_runtime": 603.5437,
261
- "eval_samples_per_second": 4.88,
262
- "eval_steps_per_second": 0.611,
263
- "eval_translation_length": 53596,
264
- "step": 7454
265
- },
266
- {
267
- "epoch": 9.06,
268
- "learning_rate": 2.4002479626767903e-05,
269
- "loss": 2.0782,
270
- "step": 7500
271
- },
272
- {
273
- "epoch": 9.66,
274
- "learning_rate": 2.206981673126539e-05,
275
- "loss": 2.0736,
276
- "step": 8000
277
- },
278
- {
279
- "epoch": 10.0,
280
- "eval_bleu": 1.0,
281
- "eval_brevity_penalty": 1.0,
282
- "eval_length_ratio": 1.0,
283
- "eval_loss": 2.0527448654174805,
284
- "eval_precisions": [
285
- 1.0,
286
- 1.0,
287
- 1.0,
288
- 1.0
289
- ],
290
- "eval_reference_length": 53846,
291
- "eval_runtime": 603.4511,
292
- "eval_samples_per_second": 4.88,
293
- "eval_steps_per_second": 0.611,
294
- "eval_translation_length": 53846,
295
- "step": 8282
296
- },
297
- {
298
- "epoch": 10.26,
299
- "learning_rate": 2.0117368914870838e-05,
300
- "loss": 2.0662,
301
- "step": 8500
302
- },
303
- {
304
- "epoch": 10.87,
305
- "learning_rate": 1.8163799194946938e-05,
306
- "loss": 2.0613,
307
- "step": 9000
308
- },
309
- {
310
- "epoch": 11.0,
311
- "eval_bleu": 1.0,
312
- "eval_brevity_penalty": 1.0,
313
- "eval_length_ratio": 1.0,
314
- "eval_loss": 2.050435781478882,
315
- "eval_precisions": [
316
- 1.0,
317
- 1.0,
318
- 1.0,
319
- 1.0
320
- ],
321
- "eval_reference_length": 53805,
322
- "eval_runtime": 604.156,
323
- "eval_samples_per_second": 4.875,
324
- "eval_steps_per_second": 0.611,
325
- "eval_translation_length": 53805,
326
- "step": 9110
327
- },
328
- {
329
- "epoch": 11.47,
330
- "learning_rate": 1.6227781312884388e-05,
331
- "loss": 2.049,
332
- "step": 9500
333
- },
334
- {
335
- "epoch": 12.0,
336
  "eval_bleu": 1.0,
337
  "eval_brevity_penalty": 1.0,
338
  "eval_length_ratio": 1.0,
339
- "eval_loss": 2.047217607498169,
340
  "eval_precisions": [
341
  1.0,
342
  1.0,
343
  1.0,
344
  1.0
345
  ],
346
- "eval_reference_length": 53919,
347
- "eval_runtime": 603.5433,
348
- "eval_samples_per_second": 4.88,
349
- "eval_steps_per_second": 0.611,
350
- "eval_translation_length": 53919,
351
- "step": 9939
352
  }
353
  ],
354
  "logging_steps": 500,
355
- "max_steps": 16560,
356
  "num_train_epochs": 20,
357
  "save_steps": 500,
358
- "total_flos": 2.1776105516669338e+17,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
 
1
  {
2
+ "best_metric": 2.055070638656616,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-1581",
4
+ "epoch": 0.9996838444514701,
5
  "eval_steps": 500,
6
+ "global_step": 1581,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.32,
13
+ "learning_rate": 7e-06,
14
+ "loss": 2.2293,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.63,
19
+ "learning_rate": 6.995542345369891e-06,
20
+ "loss": 2.2394,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.95,
25
+ "learning_rate": 6.982180736156593e-06,
26
+ "loss": 2.1909,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "eval_bleu": 1.0,
32
  "eval_brevity_penalty": 1.0,
33
  "eval_length_ratio": 1.0,
34
+ "eval_loss": 2.055070638656616,
35
  "eval_precisions": [
36
  1.0,
37
  1.0,
38
  1.0,
39
  1.0
40
  ],
41
+ "eval_reference_length": 52600,
42
+ "eval_runtime": 564.5086,
43
+ "eval_samples_per_second": 4.981,
44
+ "eval_steps_per_second": 0.624,
45
+ "eval_translation_length": 52600,
46
+ "step": 1581
47
  }
48
  ],
49
  "logging_steps": 500,
50
+ "max_steps": 31620,
51
  "num_train_epochs": 20,
52
  "save_steps": 500,
53
+ "total_flos": 1.7326415288991744e+16,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b06978cc3d00aca95594f59ec7e9b9fa96aa51c26c2bd29cd053e001acfe17
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6df79ed7ed734cb8185c41dd11c4885025a9e70f202b7c2ebbef7d4692999c6
3
  size 4664