Ethan Sim commited on
Commit
95bec45
·
1 Parent(s): 7872ded

update model with 1.0 safeguard

Browse files
checkpoint-80000/trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 38.1466,
19
  "eval_gen_len": 37.2387,
20
  "eval_loss": 0.1091395914554596,
21
- "eval_runtime": 171.3952,
22
- "eval_samples_per_second": 6.085,
23
- "eval_steps_per_second": 0.193,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 40.8678,
35
  "eval_gen_len": 34.2282,
36
  "eval_loss": 0.1069067046046257,
37
- "eval_runtime": 198.8833,
38
- "eval_samples_per_second": 5.244,
39
- "eval_steps_per_second": 0.166,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 41.2373,
51
  "eval_gen_len": 34.722,
52
  "eval_loss": 0.10556207597255707,
53
- "eval_runtime": 189.0135,
54
- "eval_samples_per_second": 5.518,
55
- "eval_steps_per_second": 0.175,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.2236,
67
  "eval_gen_len": 35.4794,
68
  "eval_loss": 0.10456866770982742,
69
- "eval_runtime": 136.4617,
70
- "eval_samples_per_second": 7.643,
71
- "eval_steps_per_second": 0.242,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.6373,
83
  "eval_gen_len": 35.6874,
84
  "eval_loss": 0.10391870141029358,
85
- "eval_runtime": 153.8741,
86
- "eval_samples_per_second": 6.778,
87
- "eval_steps_per_second": 0.214,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 41.2704,
99
  "eval_gen_len": 35.7728,
100
  "eval_loss": 0.10389306396245956,
101
- "eval_runtime": 144.2866,
102
- "eval_samples_per_second": 7.229,
103
- "eval_steps_per_second": 0.229,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.6866,
115
  "eval_gen_len": 35.348,
116
  "eval_loss": 0.10308653861284256,
117
- "eval_runtime": 156.8578,
118
- "eval_samples_per_second": 6.649,
119
- "eval_steps_per_second": 0.21,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.3994,
131
  "eval_gen_len": 35.7287,
132
  "eval_loss": 0.10239578038454056,
133
- "eval_runtime": 126.4909,
134
- "eval_samples_per_second": 8.246,
135
- "eval_steps_per_second": 0.261,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 39.5845,
147
  "eval_gen_len": 39.464,
148
  "eval_loss": 0.102129265666008,
149
- "eval_runtime": 129.762,
150
- "eval_samples_per_second": 8.038,
151
- "eval_steps_per_second": 0.254,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 42.2251,
163
  "eval_gen_len": 34.3845,
164
  "eval_loss": 0.10222680866718292,
165
- "eval_runtime": 106.5884,
166
- "eval_samples_per_second": 9.785,
167
- "eval_steps_per_second": 0.31,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 41.9432,
179
  "eval_gen_len": 35.0316,
180
  "eval_loss": 0.10267173498868942,
181
- "eval_runtime": 129.8219,
182
- "eval_samples_per_second": 8.034,
183
- "eval_steps_per_second": 0.254,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 42.028,
195
  "eval_gen_len": 35.8677,
196
  "eval_loss": 0.102423757314682,
197
- "eval_runtime": 130.0386,
198
- "eval_samples_per_second": 8.021,
199
- "eval_steps_per_second": 0.254,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 41.9107,
211
  "eval_gen_len": 35.6884,
212
  "eval_loss": 0.1020052582025528,
213
- "eval_runtime": 105.0208,
214
- "eval_samples_per_second": 9.931,
215
- "eval_steps_per_second": 0.314,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 41.898,
227
  "eval_gen_len": 35.7948,
228
  "eval_loss": 0.10190638899803162,
229
- "eval_runtime": 154.2922,
230
- "eval_samples_per_second": 6.76,
231
- "eval_steps_per_second": 0.214,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.4405,
243
  "eval_gen_len": 34.8159,
244
  "eval_loss": 0.10122686624526978,
245
- "eval_runtime": 118.3439,
246
- "eval_samples_per_second": 8.813,
247
- "eval_steps_per_second": 0.279,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 42.1656,
259
  "eval_gen_len": 35.1611,
260
  "eval_loss": 0.1020784005522728,
261
- "eval_runtime": 134.6489,
262
- "eval_samples_per_second": 7.746,
263
- "eval_steps_per_second": 0.245,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 42.2236,
275
  "eval_gen_len": 34.6663,
276
  "eval_loss": 0.10225772112607956,
277
- "eval_runtime": 127.5545,
278
- "eval_samples_per_second": 8.177,
279
- "eval_steps_per_second": 0.259,
280
  "step": 68000
281
  },
282
  {
@@ -290,9 +290,9 @@
290
  "eval_bleu": 42.1593,
291
  "eval_gen_len": 34.5043,
292
  "eval_loss": 0.10208923369646072,
293
- "eval_runtime": 88.1144,
294
- "eval_samples_per_second": 11.837,
295
- "eval_steps_per_second": 0.375,
296
  "step": 72000
297
  },
298
  {
@@ -306,9 +306,9 @@
306
  "eval_bleu": 42.0667,
307
  "eval_gen_len": 35.1371,
308
  "eval_loss": 0.10195324569940567,
309
- "eval_runtime": 120.6638,
310
- "eval_samples_per_second": 8.644,
311
- "eval_steps_per_second": 0.273,
312
  "step": 76000
313
  },
314
  {
@@ -322,9 +322,9 @@
322
  "eval_bleu": 42.7449,
323
  "eval_gen_len": 35.2848,
324
  "eval_loss": 0.10178153961896896,
325
- "eval_runtime": 116.145,
326
- "eval_samples_per_second": 8.98,
327
- "eval_steps_per_second": 0.284,
328
  "step": 80000
329
  }
330
  ],
 
18
  "eval_bleu": 38.1466,
19
  "eval_gen_len": 37.2387,
20
  "eval_loss": 0.1091395914554596,
21
+ "eval_runtime": 169.6196,
22
+ "eval_samples_per_second": 6.149,
23
+ "eval_steps_per_second": 0.195,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 40.8678,
35
  "eval_gen_len": 34.2282,
36
  "eval_loss": 0.1069067046046257,
37
+ "eval_runtime": 187.3165,
38
+ "eval_samples_per_second": 5.568,
39
+ "eval_steps_per_second": 0.176,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 41.2373,
51
  "eval_gen_len": 34.722,
52
  "eval_loss": 0.10556207597255707,
53
+ "eval_runtime": 176.6792,
54
+ "eval_samples_per_second": 5.903,
55
+ "eval_steps_per_second": 0.187,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.2236,
67
  "eval_gen_len": 35.4794,
68
  "eval_loss": 0.10456866770982742,
69
+ "eval_runtime": 126.4686,
70
+ "eval_samples_per_second": 8.247,
71
+ "eval_steps_per_second": 0.261,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.6373,
83
  "eval_gen_len": 35.6874,
84
  "eval_loss": 0.10391870141029358,
85
+ "eval_runtime": 137.3482,
86
+ "eval_samples_per_second": 7.594,
87
+ "eval_steps_per_second": 0.24,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 41.2704,
99
  "eval_gen_len": 35.7728,
100
  "eval_loss": 0.10389306396245956,
101
+ "eval_runtime": 129.1131,
102
+ "eval_samples_per_second": 8.078,
103
+ "eval_steps_per_second": 0.256,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.6866,
115
  "eval_gen_len": 35.348,
116
  "eval_loss": 0.10308653861284256,
117
+ "eval_runtime": 149.5149,
118
+ "eval_samples_per_second": 6.976,
119
+ "eval_steps_per_second": 0.221,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.3994,
131
  "eval_gen_len": 35.7287,
132
  "eval_loss": 0.10239578038454056,
133
+ "eval_runtime": 125.8581,
134
+ "eval_samples_per_second": 8.287,
135
+ "eval_steps_per_second": 0.262,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 39.5845,
147
  "eval_gen_len": 39.464,
148
  "eval_loss": 0.102129265666008,
149
+ "eval_runtime": 123.946,
150
+ "eval_samples_per_second": 8.415,
151
+ "eval_steps_per_second": 0.266,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 42.2251,
163
  "eval_gen_len": 34.3845,
164
  "eval_loss": 0.10222680866718292,
165
+ "eval_runtime": 101.5149,
166
+ "eval_samples_per_second": 10.274,
167
+ "eval_steps_per_second": 0.325,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 41.9432,
179
  "eval_gen_len": 35.0316,
180
  "eval_loss": 0.10267173498868942,
181
+ "eval_runtime": 129.0759,
182
+ "eval_samples_per_second": 8.081,
183
+ "eval_steps_per_second": 0.256,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 42.028,
195
  "eval_gen_len": 35.8677,
196
  "eval_loss": 0.102423757314682,
197
+ "eval_runtime": 128.7526,
198
+ "eval_samples_per_second": 8.101,
199
+ "eval_steps_per_second": 0.256,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 41.9107,
211
  "eval_gen_len": 35.6884,
212
  "eval_loss": 0.1020052582025528,
213
+ "eval_runtime": 98.4633,
214
+ "eval_samples_per_second": 10.593,
215
+ "eval_steps_per_second": 0.335,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 41.898,
227
  "eval_gen_len": 35.7948,
228
  "eval_loss": 0.10190638899803162,
229
+ "eval_runtime": 155.172,
230
+ "eval_samples_per_second": 6.722,
231
+ "eval_steps_per_second": 0.213,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.4405,
243
  "eval_gen_len": 34.8159,
244
  "eval_loss": 0.10122686624526978,
245
+ "eval_runtime": 109.5432,
246
+ "eval_samples_per_second": 9.521,
247
+ "eval_steps_per_second": 0.301,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 42.1656,
259
  "eval_gen_len": 35.1611,
260
  "eval_loss": 0.1020784005522728,
261
+ "eval_runtime": 127.0613,
262
+ "eval_samples_per_second": 8.209,
263
+ "eval_steps_per_second": 0.26,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 42.2236,
275
  "eval_gen_len": 34.6663,
276
  "eval_loss": 0.10225772112607956,
277
+ "eval_runtime": 118.3874,
278
+ "eval_samples_per_second": 8.81,
279
+ "eval_steps_per_second": 0.279,
280
  "step": 68000
281
  },
282
  {
 
290
  "eval_bleu": 42.1593,
291
  "eval_gen_len": 34.5043,
292
  "eval_loss": 0.10208923369646072,
293
+ "eval_runtime": 84.9237,
294
+ "eval_samples_per_second": 12.282,
295
+ "eval_steps_per_second": 0.389,
296
  "step": 72000
297
  },
298
  {
 
306
  "eval_bleu": 42.0667,
307
  "eval_gen_len": 35.1371,
308
  "eval_loss": 0.10195324569940567,
309
+ "eval_runtime": 115.0559,
310
+ "eval_samples_per_second": 9.065,
311
+ "eval_steps_per_second": 0.287,
312
  "step": 76000
313
  },
314
  {
 
322
  "eval_bleu": 42.7449,
323
  "eval_gen_len": 35.2848,
324
  "eval_loss": 0.10178153961896896,
325
+ "eval_runtime": 108.8465,
326
+ "eval_samples_per_second": 9.582,
327
+ "eval_steps_per_second": 0.303,
328
  "step": 80000
329
  }
330
  ],
checkpoint-80000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:038425ce924f4f9135467ce2b852d43fabd43f14b098635a051c844c20458e63
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffc182f2e76a1a072a43bb175e1b6d8176501f76e795c82d01e5ab1d2f0a83c
3
  size 3771
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:038425ce924f4f9135467ce2b852d43fabd43f14b098635a051c844c20458e63
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffc182f2e76a1a072a43bb175e1b6d8176501f76e795c82d01e5ab1d2f0a83c
3
  size 3771