Ethan Sim commited on
Commit
0cbff79
·
1 Parent(s): 006f449

update model with 1.0 safeguard

Browse files
checkpoint-68000/trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 35.7559,
19
  "eval_gen_len": 39.7057,
20
  "eval_loss": 0.10894892364740372,
21
- "eval_runtime": 186.2305,
22
- "eval_samples_per_second": 5.601,
23
- "eval_steps_per_second": 0.177,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 36.6369,
35
  "eval_gen_len": 39.395,
36
  "eval_loss": 0.10665024816989899,
37
- "eval_runtime": 150.2548,
38
- "eval_samples_per_second": 6.942,
39
- "eval_steps_per_second": 0.22,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 41.2508,
51
  "eval_gen_len": 35.1687,
52
  "eval_loss": 0.10527685284614563,
53
- "eval_runtime": 140.4867,
54
- "eval_samples_per_second": 7.424,
55
- "eval_steps_per_second": 0.235,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.4292,
67
  "eval_gen_len": 35.1361,
68
  "eval_loss": 0.104288749396801,
69
- "eval_runtime": 117.5095,
70
- "eval_samples_per_second": 8.876,
71
- "eval_steps_per_second": 0.281,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.7716,
83
  "eval_gen_len": 35.5177,
84
  "eval_loss": 0.10370208323001862,
85
- "eval_runtime": 146.3536,
86
- "eval_samples_per_second": 7.127,
87
- "eval_steps_per_second": 0.225,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 40.6317,
99
  "eval_gen_len": 36.8993,
100
  "eval_loss": 0.10362833738327026,
101
- "eval_runtime": 131.4065,
102
- "eval_samples_per_second": 7.937,
103
- "eval_steps_per_second": 0.251,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.8852,
115
  "eval_gen_len": 35.3174,
116
  "eval_loss": 0.10282401740550995,
117
- "eval_runtime": 124.1856,
118
- "eval_samples_per_second": 8.399,
119
- "eval_steps_per_second": 0.266,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.5392,
131
  "eval_gen_len": 35.9501,
132
  "eval_loss": 0.10223321616649628,
133
- "eval_runtime": 120.3748,
134
- "eval_samples_per_second": 8.665,
135
- "eval_steps_per_second": 0.274,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 40.5681,
147
  "eval_gen_len": 37.5935,
148
  "eval_loss": 0.10191329568624496,
149
- "eval_runtime": 110.8617,
150
- "eval_samples_per_second": 9.408,
151
- "eval_steps_per_second": 0.298,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 42.3964,
163
  "eval_gen_len": 34.768,
164
  "eval_loss": 0.10204007476568222,
165
- "eval_runtime": 112.6476,
166
- "eval_samples_per_second": 9.259,
167
- "eval_steps_per_second": 0.293,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 40.217,
179
  "eval_gen_len": 37.1946,
180
  "eval_loss": 0.10240339487791061,
181
- "eval_runtime": 170.37,
182
- "eval_samples_per_second": 6.122,
183
- "eval_steps_per_second": 0.194,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 41.5097,
195
  "eval_gen_len": 36.3797,
196
  "eval_loss": 0.1021459624171257,
197
- "eval_runtime": 146.1468,
198
- "eval_samples_per_second": 7.137,
199
- "eval_steps_per_second": 0.226,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 42.2181,
211
  "eval_gen_len": 35.0019,
212
  "eval_loss": 0.10173720866441727,
213
- "eval_runtime": 158.0705,
214
- "eval_samples_per_second": 6.598,
215
- "eval_steps_per_second": 0.209,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 41.65,
227
  "eval_gen_len": 36.2694,
228
  "eval_loss": 0.10165542364120483,
229
- "eval_runtime": 160.9591,
230
- "eval_samples_per_second": 6.48,
231
- "eval_steps_per_second": 0.205,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.5203,
243
  "eval_gen_len": 34.1112,
244
  "eval_loss": 0.10101941227912903,
245
- "eval_runtime": 105.5932,
246
- "eval_samples_per_second": 9.878,
247
- "eval_steps_per_second": 0.313,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 42.2011,
259
  "eval_gen_len": 35.1908,
260
  "eval_loss": 0.10181207209825516,
261
- "eval_runtime": 118.0171,
262
- "eval_samples_per_second": 8.838,
263
- "eval_steps_per_second": 0.28,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 42.5239,
275
  "eval_gen_len": 34.8715,
276
  "eval_loss": 0.10198543220758438,
277
- "eval_runtime": 121.4795,
278
- "eval_samples_per_second": 8.586,
279
- "eval_steps_per_second": 0.272,
280
  "step": 68000
281
  }
282
  ],
 
18
  "eval_bleu": 35.7559,
19
  "eval_gen_len": 39.7057,
20
  "eval_loss": 0.10894892364740372,
21
+ "eval_runtime": 204.0367,
22
+ "eval_samples_per_second": 5.112,
23
+ "eval_steps_per_second": 0.162,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 36.6369,
35
  "eval_gen_len": 39.395,
36
  "eval_loss": 0.10665024816989899,
37
+ "eval_runtime": 169.0161,
38
+ "eval_samples_per_second": 6.171,
39
+ "eval_steps_per_second": 0.195,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 41.2508,
51
  "eval_gen_len": 35.1687,
52
  "eval_loss": 0.10527685284614563,
53
+ "eval_runtime": 161.3856,
54
+ "eval_samples_per_second": 6.463,
55
+ "eval_steps_per_second": 0.204,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.4292,
67
  "eval_gen_len": 35.1361,
68
  "eval_loss": 0.104288749396801,
69
+ "eval_runtime": 136.8172,
70
+ "eval_samples_per_second": 7.623,
71
+ "eval_steps_per_second": 0.241,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.7716,
83
  "eval_gen_len": 35.5177,
84
  "eval_loss": 0.10370208323001862,
85
+ "eval_runtime": 158.9983,
86
+ "eval_samples_per_second": 6.56,
87
+ "eval_steps_per_second": 0.208,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 40.6317,
99
  "eval_gen_len": 36.8993,
100
  "eval_loss": 0.10362833738327026,
101
+ "eval_runtime": 143.5271,
102
+ "eval_samples_per_second": 7.267,
103
+ "eval_steps_per_second": 0.23,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.8852,
115
  "eval_gen_len": 35.3174,
116
  "eval_loss": 0.10282401740550995,
117
+ "eval_runtime": 137.168,
118
+ "eval_samples_per_second": 7.604,
119
+ "eval_steps_per_second": 0.241,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.5392,
131
  "eval_gen_len": 35.9501,
132
  "eval_loss": 0.10223321616649628,
133
+ "eval_runtime": 133.1868,
134
+ "eval_samples_per_second": 7.831,
135
+ "eval_steps_per_second": 0.248,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 40.5681,
147
  "eval_gen_len": 37.5935,
148
  "eval_loss": 0.10191329568624496,
149
+ "eval_runtime": 125.5047,
150
+ "eval_samples_per_second": 8.31,
151
+ "eval_steps_per_second": 0.263,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 42.3964,
163
  "eval_gen_len": 34.768,
164
  "eval_loss": 0.10204007476568222,
165
+ "eval_runtime": 124.2655,
166
+ "eval_samples_per_second": 8.393,
167
+ "eval_steps_per_second": 0.266,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 40.217,
179
  "eval_gen_len": 37.1946,
180
  "eval_loss": 0.10240339487791061,
181
+ "eval_runtime": 180.618,
182
+ "eval_samples_per_second": 5.775,
183
+ "eval_steps_per_second": 0.183,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 41.5097,
195
  "eval_gen_len": 36.3797,
196
  "eval_loss": 0.1021459624171257,
197
+ "eval_runtime": 149.0335,
198
+ "eval_samples_per_second": 6.998,
199
+ "eval_steps_per_second": 0.221,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 42.2181,
211
  "eval_gen_len": 35.0019,
212
  "eval_loss": 0.10173720866441727,
213
+ "eval_runtime": 168.836,
214
+ "eval_samples_per_second": 6.178,
215
+ "eval_steps_per_second": 0.195,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 41.65,
227
  "eval_gen_len": 36.2694,
228
  "eval_loss": 0.10165542364120483,
229
+ "eval_runtime": 181.2512,
230
+ "eval_samples_per_second": 5.754,
231
+ "eval_steps_per_second": 0.182,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.5203,
243
  "eval_gen_len": 34.1112,
244
  "eval_loss": 0.10101941227912903,
245
+ "eval_runtime": 117.7958,
246
+ "eval_samples_per_second": 8.854,
247
+ "eval_steps_per_second": 0.28,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 42.2011,
259
  "eval_gen_len": 35.1908,
260
  "eval_loss": 0.10181207209825516,
261
+ "eval_runtime": 136.2775,
262
+ "eval_samples_per_second": 7.654,
263
+ "eval_steps_per_second": 0.242,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 42.5239,
275
  "eval_gen_len": 34.8715,
276
  "eval_loss": 0.10198543220758438,
277
+ "eval_runtime": 134.6516,
278
+ "eval_samples_per_second": 7.746,
279
+ "eval_steps_per_second": 0.245,
280
  "step": 68000
281
  }
282
  ],
checkpoint-68000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb6b4d69457e8457bc8ba7157cadf6f7f462ddf893ce54473c85704df3bd5050
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6c61042e7a998c0fecc8637c8678e4c211809e57265d0edb7e047a99b13a43
3
  size 3771
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb6b4d69457e8457bc8ba7157cadf6f7f462ddf893ce54473c85704df3bd5050
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6c61042e7a998c0fecc8637c8678e4c211809e57265d0edb7e047a99b13a43
3
  size 3771