Ethan Sim commited on
Commit
16f1384
·
1 Parent(s): 4843bf3

update model with 1.0 safeguard

Browse files
checkpoint-80000/trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 38.0264,
19
  "eval_gen_len": 37.3058,
20
  "eval_loss": 0.10926879942417145,
21
- "eval_runtime": 144.5824,
22
- "eval_samples_per_second": 7.214,
23
- "eval_steps_per_second": 0.228,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 37.1545,
35
  "eval_gen_len": 38.1074,
36
  "eval_loss": 0.10711430013179779,
37
- "eval_runtime": 126.6602,
38
- "eval_samples_per_second": 8.235,
39
- "eval_steps_per_second": 0.261,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 40.7576,
51
  "eval_gen_len": 35.7632,
52
  "eval_loss": 0.10568097978830338,
53
- "eval_runtime": 140.0097,
54
- "eval_samples_per_second": 7.449,
55
- "eval_steps_per_second": 0.236,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.1354,
67
  "eval_gen_len": 36.836,
68
  "eval_loss": 0.10463867336511612,
69
- "eval_runtime": 115.2796,
70
- "eval_samples_per_second": 9.048,
71
- "eval_steps_per_second": 0.286,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.1989,
83
  "eval_gen_len": 36.8955,
84
  "eval_loss": 0.10405203700065613,
85
- "eval_runtime": 112.8843,
86
- "eval_samples_per_second": 9.24,
87
- "eval_steps_per_second": 0.292,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 41.8132,
99
  "eval_gen_len": 34.7565,
100
  "eval_loss": 0.10406998544931412,
101
- "eval_runtime": 80.9256,
102
- "eval_samples_per_second": 12.888,
103
- "eval_steps_per_second": 0.408,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.8593,
115
  "eval_gen_len": 35.1361,
116
  "eval_loss": 0.10320573300123215,
117
- "eval_runtime": 123.7907,
118
- "eval_samples_per_second": 8.426,
119
- "eval_steps_per_second": 0.267,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.806,
131
  "eval_gen_len": 35.8591,
132
  "eval_loss": 0.10261942446231842,
133
- "eval_runtime": 119.4076,
134
- "eval_samples_per_second": 8.735,
135
- "eval_steps_per_second": 0.276,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 41.7632,
147
  "eval_gen_len": 36.094,
148
  "eval_loss": 0.10236303508281708,
149
- "eval_runtime": 114.5766,
150
- "eval_samples_per_second": 9.103,
151
- "eval_steps_per_second": 0.288,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 41.9832,
163
  "eval_gen_len": 35.2454,
164
  "eval_loss": 0.10227732360363007,
165
- "eval_runtime": 85.7342,
166
- "eval_samples_per_second": 12.166,
167
- "eval_steps_per_second": 0.385,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 41.4408,
179
  "eval_gen_len": 36.2848,
180
  "eval_loss": 0.10282173752784729,
181
- "eval_runtime": 110.3637,
182
- "eval_samples_per_second": 9.451,
183
- "eval_steps_per_second": 0.299,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 41.1816,
195
  "eval_gen_len": 36.6194,
196
  "eval_loss": 0.10262637585401535,
197
- "eval_runtime": 120.1566,
198
- "eval_samples_per_second": 8.68,
199
- "eval_steps_per_second": 0.275,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 41.7264,
211
  "eval_gen_len": 35.7603,
212
  "eval_loss": 0.10216823220252991,
213
- "eval_runtime": 89.1094,
214
- "eval_samples_per_second": 11.705,
215
- "eval_steps_per_second": 0.37,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 40.7372,
227
  "eval_gen_len": 37.8715,
228
  "eval_loss": 0.10200918465852737,
229
- "eval_runtime": 134.6413,
230
- "eval_samples_per_second": 7.747,
231
- "eval_steps_per_second": 0.245,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.6732,
243
  "eval_gen_len": 34.7517,
244
  "eval_loss": 0.10124800354242325,
245
- "eval_runtime": 111.3971,
246
- "eval_samples_per_second": 9.363,
247
- "eval_steps_per_second": 0.296,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 42.345,
259
  "eval_gen_len": 34.5609,
260
  "eval_loss": 0.10216101258993149,
261
- "eval_runtime": 107.5188,
262
- "eval_samples_per_second": 9.701,
263
- "eval_steps_per_second": 0.307,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 42.6795,
275
  "eval_gen_len": 34.442,
276
  "eval_loss": 0.10226498544216156,
277
- "eval_runtime": 101.1175,
278
- "eval_samples_per_second": 10.315,
279
- "eval_steps_per_second": 0.326,
280
  "step": 68000
281
  },
282
  {
@@ -290,9 +290,9 @@
290
  "eval_bleu": 42.1965,
291
  "eval_gen_len": 34.9348,
292
  "eval_loss": 0.10218390822410583,
293
- "eval_runtime": 80.2313,
294
- "eval_samples_per_second": 13.0,
295
- "eval_steps_per_second": 0.411,
296
  "step": 72000
297
  },
298
  {
@@ -306,9 +306,9 @@
306
  "eval_bleu": 40.6959,
307
  "eval_gen_len": 38.1342,
308
  "eval_loss": 0.10209660232067108,
309
- "eval_runtime": 181.0599,
310
- "eval_samples_per_second": 5.761,
311
- "eval_steps_per_second": 0.182,
312
  "step": 76000
313
  },
314
  {
@@ -322,9 +322,9 @@
322
  "eval_bleu": 42.729,
323
  "eval_gen_len": 35.0566,
324
  "eval_loss": 0.10193432867527008,
325
- "eval_runtime": 83.5606,
326
- "eval_samples_per_second": 12.482,
327
- "eval_steps_per_second": 0.395,
328
  "step": 80000
329
  }
330
  ],
 
18
  "eval_bleu": 38.0264,
19
  "eval_gen_len": 37.3058,
20
  "eval_loss": 0.10926879942417145,
21
+ "eval_runtime": 185.5179,
22
+ "eval_samples_per_second": 5.622,
23
+ "eval_steps_per_second": 0.178,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 37.1545,
35
  "eval_gen_len": 38.1074,
36
  "eval_loss": 0.10711430013179779,
37
+ "eval_runtime": 160.2258,
38
+ "eval_samples_per_second": 6.51,
39
+ "eval_steps_per_second": 0.206,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 40.7576,
51
  "eval_gen_len": 35.7632,
52
  "eval_loss": 0.10568097978830338,
53
+ "eval_runtime": 172.6094,
54
+ "eval_samples_per_second": 6.043,
55
+ "eval_steps_per_second": 0.191,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.1354,
67
  "eval_gen_len": 36.836,
68
  "eval_loss": 0.10463867336511612,
69
+ "eval_runtime": 149.0366,
70
+ "eval_samples_per_second": 6.998,
71
+ "eval_steps_per_second": 0.221,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.1989,
83
  "eval_gen_len": 36.8955,
84
  "eval_loss": 0.10405203700065613,
85
+ "eval_runtime": 142.4153,
86
+ "eval_samples_per_second": 7.324,
87
+ "eval_steps_per_second": 0.232,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 41.8132,
99
  "eval_gen_len": 34.7565,
100
  "eval_loss": 0.10406998544931412,
101
+ "eval_runtime": 107.8168,
102
+ "eval_samples_per_second": 9.674,
103
+ "eval_steps_per_second": 0.306,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.8593,
115
  "eval_gen_len": 35.1361,
116
  "eval_loss": 0.10320573300123215,
117
+ "eval_runtime": 152.0565,
118
+ "eval_samples_per_second": 6.859,
119
+ "eval_steps_per_second": 0.217,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.806,
131
  "eval_gen_len": 35.8591,
132
  "eval_loss": 0.10261942446231842,
133
+ "eval_runtime": 154.646,
134
+ "eval_samples_per_second": 6.744,
135
+ "eval_steps_per_second": 0.213,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 41.7632,
147
  "eval_gen_len": 36.094,
148
  "eval_loss": 0.10236303508281708,
149
+ "eval_runtime": 142.1476,
150
+ "eval_samples_per_second": 7.337,
151
+ "eval_steps_per_second": 0.232,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 41.9832,
163
  "eval_gen_len": 35.2454,
164
  "eval_loss": 0.10227732360363007,
165
+ "eval_runtime": 113.1327,
166
+ "eval_samples_per_second": 9.219,
167
+ "eval_steps_per_second": 0.292,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 41.4408,
179
  "eval_gen_len": 36.2848,
180
  "eval_loss": 0.10282173752784729,
181
+ "eval_runtime": 139.7824,
182
+ "eval_samples_per_second": 7.462,
183
+ "eval_steps_per_second": 0.236,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 41.1816,
195
  "eval_gen_len": 36.6194,
196
  "eval_loss": 0.10262637585401535,
197
+ "eval_runtime": 153.9201,
198
+ "eval_samples_per_second": 6.776,
199
+ "eval_steps_per_second": 0.214,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 41.7264,
211
  "eval_gen_len": 35.7603,
212
  "eval_loss": 0.10216823220252991,
213
+ "eval_runtime": 120.762,
214
+ "eval_samples_per_second": 8.637,
215
+ "eval_steps_per_second": 0.273,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 40.7372,
227
  "eval_gen_len": 37.8715,
228
  "eval_loss": 0.10200918465852737,
229
+ "eval_runtime": 167.8456,
230
+ "eval_samples_per_second": 6.214,
231
+ "eval_steps_per_second": 0.197,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.6732,
243
  "eval_gen_len": 34.7517,
244
  "eval_loss": 0.10124800354242325,
245
+ "eval_runtime": 145.0169,
246
+ "eval_samples_per_second": 7.192,
247
+ "eval_steps_per_second": 0.228,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 42.345,
259
  "eval_gen_len": 34.5609,
260
  "eval_loss": 0.10216101258993149,
261
+ "eval_runtime": 136.5147,
262
+ "eval_samples_per_second": 7.64,
263
+ "eval_steps_per_second": 0.242,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 42.6795,
275
  "eval_gen_len": 34.442,
276
  "eval_loss": 0.10226498544216156,
277
+ "eval_runtime": 125.0786,
278
+ "eval_samples_per_second": 8.339,
279
+ "eval_steps_per_second": 0.264,
280
  "step": 68000
281
  },
282
  {
 
290
  "eval_bleu": 42.1965,
291
  "eval_gen_len": 34.9348,
292
  "eval_loss": 0.10218390822410583,
293
+ "eval_runtime": 107.2258,
294
+ "eval_samples_per_second": 9.727,
295
+ "eval_steps_per_second": 0.308,
296
  "step": 72000
297
  },
298
  {
 
306
  "eval_bleu": 40.6959,
307
  "eval_gen_len": 38.1342,
308
  "eval_loss": 0.10209660232067108,
309
+ "eval_runtime": 211.7076,
310
+ "eval_samples_per_second": 4.927,
311
+ "eval_steps_per_second": 0.156,
312
  "step": 76000
313
  },
314
  {
 
322
  "eval_bleu": 42.729,
323
  "eval_gen_len": 35.0566,
324
  "eval_loss": 0.10193432867527008,
325
+ "eval_runtime": 104.8859,
326
+ "eval_samples_per_second": 9.944,
327
+ "eval_steps_per_second": 0.315,
328
  "step": 80000
329
  }
330
  ],
checkpoint-80000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:360679c7fad415b3c48425a6babc6418951bc55ca3821631fd52a04f2c42e833
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b52f3d420b071910bee5599c056de4ab2b8a451358cbb9a4e7fdac32ab97c2
3
  size 3771
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:360679c7fad415b3c48425a6babc6418951bc55ca3821631fd52a04f2c42e833
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b52f3d420b071910bee5599c056de4ab2b8a451358cbb9a4e7fdac32ab97c2
3
  size 3771