Ethan Sim commited on
Commit
ef2d021
·
1 Parent(s): a853e98

update model with 1.0 safeguard

Browse files
checkpoint-80000/trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 38.0976,
19
  "eval_gen_len": 37.2953,
20
  "eval_loss": 0.10893864184617996,
21
- "eval_runtime": 163.5371,
22
- "eval_samples_per_second": 6.378,
23
- "eval_steps_per_second": 0.202,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 36.8776,
35
  "eval_gen_len": 39.7383,
36
  "eval_loss": 0.10664400458335876,
37
- "eval_runtime": 149.6664,
38
- "eval_samples_per_second": 6.969,
39
- "eval_steps_per_second": 0.22,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 38.837,
51
  "eval_gen_len": 37.2205,
52
  "eval_loss": 0.1052979975938797,
53
- "eval_runtime": 159.5413,
54
- "eval_samples_per_second": 6.537,
55
- "eval_steps_per_second": 0.207,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.3418,
67
  "eval_gen_len": 34.976,
68
  "eval_loss": 0.10423395782709122,
69
- "eval_runtime": 118.5509,
70
- "eval_samples_per_second": 8.798,
71
- "eval_steps_per_second": 0.278,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.4557,
83
  "eval_gen_len": 36.4477,
84
  "eval_loss": 0.10369615256786346,
85
- "eval_runtime": 132.3921,
86
- "eval_samples_per_second": 7.878,
87
- "eval_steps_per_second": 0.249,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 42.0105,
99
  "eval_gen_len": 35.0681,
100
  "eval_loss": 0.10366526991128922,
101
- "eval_runtime": 129.295,
102
- "eval_samples_per_second": 8.067,
103
- "eval_steps_per_second": 0.255,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.9312,
115
  "eval_gen_len": 35.4621,
116
  "eval_loss": 0.1028067097067833,
117
- "eval_runtime": 119.6638,
118
- "eval_samples_per_second": 8.716,
119
- "eval_steps_per_second": 0.276,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.2791,
131
  "eval_gen_len": 36.2924,
132
  "eval_loss": 0.10221964865922928,
133
- "eval_runtime": 141.8273,
134
- "eval_samples_per_second": 7.354,
135
- "eval_steps_per_second": 0.233,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 40.1286,
147
  "eval_gen_len": 38.2963,
148
  "eval_loss": 0.10198042541742325,
149
- "eval_runtime": 134.6203,
150
- "eval_samples_per_second": 7.748,
151
- "eval_steps_per_second": 0.245,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 42.345,
163
  "eval_gen_len": 34.3241,
164
  "eval_loss": 0.10196036845445633,
165
- "eval_runtime": 107.632,
166
- "eval_samples_per_second": 9.69,
167
- "eval_steps_per_second": 0.307,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 42.1017,
179
  "eval_gen_len": 35.5551,
180
  "eval_loss": 0.10244914144277573,
181
- "eval_runtime": 122.4649,
182
- "eval_samples_per_second": 8.517,
183
- "eval_steps_per_second": 0.269,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 41.5599,
195
  "eval_gen_len": 36.302,
196
  "eval_loss": 0.10217992961406708,
197
- "eval_runtime": 135.1924,
198
- "eval_samples_per_second": 7.715,
199
- "eval_steps_per_second": 0.244,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 42.1594,
211
  "eval_gen_len": 34.8734,
212
  "eval_loss": 0.10176843404769897,
213
- "eval_runtime": 130.1772,
214
- "eval_samples_per_second": 8.012,
215
- "eval_steps_per_second": 0.254,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 42.3226,
227
  "eval_gen_len": 35.5129,
228
  "eval_loss": 0.10162311047315598,
229
- "eval_runtime": 151.058,
230
- "eval_samples_per_second": 6.905,
231
- "eval_steps_per_second": 0.218,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.76,
243
  "eval_gen_len": 34.7421,
244
  "eval_loss": 0.10091908276081085,
245
- "eval_runtime": 141.7745,
246
- "eval_samples_per_second": 7.357,
247
- "eval_steps_per_second": 0.233,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 42.0603,
259
  "eval_gen_len": 34.3337,
260
  "eval_loss": 0.10173938423395157,
261
- "eval_runtime": 118.5645,
262
- "eval_samples_per_second": 8.797,
263
- "eval_steps_per_second": 0.278,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 42.6668,
275
  "eval_gen_len": 34.0575,
276
  "eval_loss": 0.10194776207208633,
277
- "eval_runtime": 106.9067,
278
- "eval_samples_per_second": 9.756,
279
- "eval_steps_per_second": 0.309,
280
  "step": 68000
281
  },
282
  {
@@ -290,9 +290,9 @@
290
  "eval_bleu": 42.3805,
291
  "eval_gen_len": 34.7498,
292
  "eval_loss": 0.1018945649266243,
293
- "eval_runtime": 86.6094,
294
- "eval_samples_per_second": 12.043,
295
- "eval_steps_per_second": 0.381,
296
  "step": 72000
297
  },
298
  {
@@ -306,9 +306,9 @@
306
  "eval_bleu": 42.3891,
307
  "eval_gen_len": 35.1179,
308
  "eval_loss": 0.10174746811389923,
309
- "eval_runtime": 108.0214,
310
- "eval_samples_per_second": 9.655,
311
- "eval_steps_per_second": 0.305,
312
  "step": 76000
313
  },
314
  {
@@ -322,9 +322,9 @@
322
  "eval_bleu": 42.8456,
323
  "eval_gen_len": 34.791,
324
  "eval_loss": 0.1016170084476471,
325
- "eval_runtime": 104.9692,
326
- "eval_samples_per_second": 9.936,
327
- "eval_steps_per_second": 0.314,
328
  "step": 80000
329
  }
330
  ],
 
18
  "eval_bleu": 38.0976,
19
  "eval_gen_len": 37.2953,
20
  "eval_loss": 0.10893864184617996,
21
+ "eval_runtime": 170.9124,
22
+ "eval_samples_per_second": 6.103,
23
+ "eval_steps_per_second": 0.193,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 36.8776,
35
  "eval_gen_len": 39.7383,
36
  "eval_loss": 0.10664400458335876,
37
+ "eval_runtime": 160.2997,
38
+ "eval_samples_per_second": 6.507,
39
+ "eval_steps_per_second": 0.206,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 38.837,
51
  "eval_gen_len": 37.2205,
52
  "eval_loss": 0.1052979975938797,
53
+ "eval_runtime": 164.3263,
54
+ "eval_samples_per_second": 6.347,
55
+ "eval_steps_per_second": 0.201,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.3418,
67
  "eval_gen_len": 34.976,
68
  "eval_loss": 0.10423395782709122,
69
+ "eval_runtime": 130.7322,
70
+ "eval_samples_per_second": 7.978,
71
+ "eval_steps_per_second": 0.252,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.4557,
83
  "eval_gen_len": 36.4477,
84
  "eval_loss": 0.10369615256786346,
85
+ "eval_runtime": 144.8968,
86
+ "eval_samples_per_second": 7.198,
87
+ "eval_steps_per_second": 0.228,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 42.0105,
99
  "eval_gen_len": 35.0681,
100
  "eval_loss": 0.10366526991128922,
101
+ "eval_runtime": 140.889,
102
+ "eval_samples_per_second": 7.403,
103
+ "eval_steps_per_second": 0.234,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.9312,
115
  "eval_gen_len": 35.4621,
116
  "eval_loss": 0.1028067097067833,
117
+ "eval_runtime": 131.7196,
118
+ "eval_samples_per_second": 7.918,
119
+ "eval_steps_per_second": 0.251,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.2791,
131
  "eval_gen_len": 36.2924,
132
  "eval_loss": 0.10221964865922928,
133
+ "eval_runtime": 151.9451,
134
+ "eval_samples_per_second": 6.864,
135
+ "eval_steps_per_second": 0.217,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 40.1286,
147
  "eval_gen_len": 38.2963,
148
  "eval_loss": 0.10198042541742325,
149
+ "eval_runtime": 144.9715,
150
+ "eval_samples_per_second": 7.195,
151
+ "eval_steps_per_second": 0.228,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 42.345,
163
  "eval_gen_len": 34.3241,
164
  "eval_loss": 0.10196036845445633,
165
+ "eval_runtime": 118.6741,
166
+ "eval_samples_per_second": 8.789,
167
+ "eval_steps_per_second": 0.278,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 42.1017,
179
  "eval_gen_len": 35.5551,
180
  "eval_loss": 0.10244914144277573,
181
+ "eval_runtime": 129.7053,
182
+ "eval_samples_per_second": 8.041,
183
+ "eval_steps_per_second": 0.254,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 41.5599,
195
  "eval_gen_len": 36.302,
196
  "eval_loss": 0.10217992961406708,
197
+ "eval_runtime": 139.5859,
198
+ "eval_samples_per_second": 7.472,
199
+ "eval_steps_per_second": 0.236,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 42.1594,
211
  "eval_gen_len": 34.8734,
212
  "eval_loss": 0.10176843404769897,
213
+ "eval_runtime": 143.2813,
214
+ "eval_samples_per_second": 7.279,
215
+ "eval_steps_per_second": 0.23,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 42.3226,
227
  "eval_gen_len": 35.5129,
228
  "eval_loss": 0.10162311047315598,
229
+ "eval_runtime": 155.5764,
230
+ "eval_samples_per_second": 6.704,
231
+ "eval_steps_per_second": 0.212,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.76,
243
  "eval_gen_len": 34.7421,
244
  "eval_loss": 0.10091908276081085,
245
+ "eval_runtime": 150.4127,
246
+ "eval_samples_per_second": 6.934,
247
+ "eval_steps_per_second": 0.219,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 42.0603,
259
  "eval_gen_len": 34.3337,
260
  "eval_loss": 0.10173938423395157,
261
+ "eval_runtime": 116.9913,
262
+ "eval_samples_per_second": 8.915,
263
+ "eval_steps_per_second": 0.282,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 42.6668,
275
  "eval_gen_len": 34.0575,
276
  "eval_loss": 0.10194776207208633,
277
+ "eval_runtime": 115.372,
278
+ "eval_samples_per_second": 9.04,
279
+ "eval_steps_per_second": 0.286,
280
  "step": 68000
281
  },
282
  {
 
290
  "eval_bleu": 42.3805,
291
  "eval_gen_len": 34.7498,
292
  "eval_loss": 0.1018945649266243,
293
+ "eval_runtime": 87.7456,
294
+ "eval_samples_per_second": 11.887,
295
+ "eval_steps_per_second": 0.376,
296
  "step": 72000
297
  },
298
  {
 
306
  "eval_bleu": 42.3891,
307
  "eval_gen_len": 35.1179,
308
  "eval_loss": 0.10174746811389923,
309
+ "eval_runtime": 123.341,
310
+ "eval_samples_per_second": 8.456,
311
+ "eval_steps_per_second": 0.268,
312
  "step": 76000
313
  },
314
  {
 
322
  "eval_bleu": 42.8456,
323
  "eval_gen_len": 34.791,
324
  "eval_loss": 0.1016170084476471,
325
+ "eval_runtime": 121.3922,
326
+ "eval_samples_per_second": 8.592,
327
+ "eval_steps_per_second": 0.272,
328
  "step": 80000
329
  }
330
  ],
checkpoint-80000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75348bcfc056147819c47ff6d4e14e03f359f11d3c4f02f59a51bfa89c291c59
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fec518c9aaf6e1dcbf7cbe9131e5915cb9c8225a5a430b145445a64c83b5ca
3
  size 3771
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75348bcfc056147819c47ff6d4e14e03f359f11d3c4f02f59a51bfa89c291c59
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fec518c9aaf6e1dcbf7cbe9131e5915cb9c8225a5a430b145445a64c83b5ca
3
  size 3771