Ethan Sim commited on
Commit
906436f
·
1 Parent(s): 223cf67

update model with 1.0 safeguard

Browse files
checkpoint-80000/trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 35.5597,
19
  "eval_gen_len": 39.8495,
20
  "eval_loss": 0.10906314104795456,
21
- "eval_runtime": 172.1137,
22
- "eval_samples_per_second": 6.06,
23
- "eval_steps_per_second": 0.192,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 40.9319,
35
  "eval_gen_len": 34.6337,
36
  "eval_loss": 0.10657753050327301,
37
- "eval_runtime": 155.9008,
38
- "eval_samples_per_second": 6.69,
39
- "eval_steps_per_second": 0.212,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 41.3844,
51
  "eval_gen_len": 34.6453,
52
  "eval_loss": 0.10533162206411362,
53
- "eval_runtime": 169.7915,
54
- "eval_samples_per_second": 6.143,
55
- "eval_steps_per_second": 0.194,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.3616,
67
  "eval_gen_len": 35.1601,
68
  "eval_loss": 0.10429085791110992,
69
- "eval_runtime": 173.7331,
70
- "eval_samples_per_second": 6.003,
71
- "eval_steps_per_second": 0.19,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.6835,
83
  "eval_gen_len": 34.9942,
84
  "eval_loss": 0.10373106598854065,
85
- "eval_runtime": 138.2922,
86
- "eval_samples_per_second": 7.542,
87
- "eval_steps_per_second": 0.239,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 41.1613,
99
  "eval_gen_len": 36.2349,
100
  "eval_loss": 0.10356967151165009,
101
- "eval_runtime": 161.9993,
102
- "eval_samples_per_second": 6.438,
103
- "eval_steps_per_second": 0.204,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.8822,
115
  "eval_gen_len": 34.4698,
116
  "eval_loss": 0.10275906324386597,
117
- "eval_runtime": 113.2908,
118
- "eval_samples_per_second": 9.206,
119
- "eval_steps_per_second": 0.291,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.7092,
131
  "eval_gen_len": 35.2943,
132
  "eval_loss": 0.10221899300813675,
133
- "eval_runtime": 118.7545,
134
- "eval_samples_per_second": 8.783,
135
- "eval_steps_per_second": 0.278,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 40.1745,
147
  "eval_gen_len": 38.5484,
148
  "eval_loss": 0.10177244991064072,
149
- "eval_runtime": 118.1882,
150
- "eval_samples_per_second": 8.825,
151
- "eval_steps_per_second": 0.279,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 42.5078,
163
  "eval_gen_len": 34.3826,
164
  "eval_loss": 0.10191857814788818,
165
- "eval_runtime": 109.0756,
166
- "eval_samples_per_second": 9.562,
167
- "eval_steps_per_second": 0.303,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 42.0937,
179
  "eval_gen_len": 35.7114,
180
  "eval_loss": 0.10230503231287003,
181
- "eval_runtime": 120.9618,
182
- "eval_samples_per_second": 8.623,
183
- "eval_steps_per_second": 0.273,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 42.3842,
195
  "eval_gen_len": 35.3586,
196
  "eval_loss": 0.10215254127979279,
197
- "eval_runtime": 145.2894,
198
- "eval_samples_per_second": 7.179,
199
- "eval_steps_per_second": 0.227,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 42.1993,
211
  "eval_gen_len": 34.0729,
212
  "eval_loss": 0.10184619575738907,
213
- "eval_runtime": 129.3624,
214
- "eval_samples_per_second": 8.063,
215
- "eval_steps_per_second": 0.255,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 42.3348,
227
  "eval_gen_len": 35.5158,
228
  "eval_loss": 0.10167574882507324,
229
- "eval_runtime": 118.7033,
230
- "eval_samples_per_second": 8.787,
231
- "eval_steps_per_second": 0.278,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.6183,
243
  "eval_gen_len": 34.5225,
244
  "eval_loss": 0.10100951045751572,
245
- "eval_runtime": 129.9979,
246
- "eval_samples_per_second": 8.023,
247
- "eval_steps_per_second": 0.254,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 42.354,
259
  "eval_gen_len": 35.1323,
260
  "eval_loss": 0.10181604325771332,
261
- "eval_runtime": 112.8931,
262
- "eval_samples_per_second": 9.239,
263
- "eval_steps_per_second": 0.292,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 41.9485,
275
  "eval_gen_len": 35.4727,
276
  "eval_loss": 0.10201819986104965,
277
- "eval_runtime": 152.1334,
278
- "eval_samples_per_second": 6.856,
279
- "eval_steps_per_second": 0.217,
280
  "step": 68000
281
  },
282
  {
@@ -290,9 +290,9 @@
290
  "eval_bleu": 42.3042,
291
  "eval_gen_len": 35.2704,
292
  "eval_loss": 0.10195478051900864,
293
- "eval_runtime": 87.1208,
294
- "eval_samples_per_second": 11.972,
295
- "eval_steps_per_second": 0.379,
296
  "step": 72000
297
  },
298
  {
@@ -306,9 +306,9 @@
306
  "eval_bleu": 39.7039,
307
  "eval_gen_len": 37.767,
308
  "eval_loss": 0.10168451070785522,
309
- "eval_runtime": 120.3909,
310
- "eval_samples_per_second": 8.663,
311
- "eval_steps_per_second": 0.274,
312
  "step": 76000
313
  },
314
  {
@@ -322,9 +322,9 @@
322
  "eval_bleu": 42.9732,
323
  "eval_gen_len": 34.5542,
324
  "eval_loss": 0.10154784470796585,
325
- "eval_runtime": 146.5985,
326
- "eval_samples_per_second": 7.115,
327
- "eval_steps_per_second": 0.225,
328
  "step": 80000
329
  }
330
  ],
 
18
  "eval_bleu": 35.5597,
19
  "eval_gen_len": 39.8495,
20
  "eval_loss": 0.10906314104795456,
21
+ "eval_runtime": 140.7392,
22
+ "eval_samples_per_second": 7.411,
23
+ "eval_steps_per_second": 0.234,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 40.9319,
35
  "eval_gen_len": 34.6337,
36
  "eval_loss": 0.10657753050327301,
37
+ "eval_runtime": 125.8016,
38
+ "eval_samples_per_second": 8.291,
39
+ "eval_steps_per_second": 0.262,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 41.3844,
51
  "eval_gen_len": 34.6453,
52
  "eval_loss": 0.10533162206411362,
53
+ "eval_runtime": 111.0041,
54
+ "eval_samples_per_second": 9.396,
55
+ "eval_steps_per_second": 0.297,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.3616,
67
  "eval_gen_len": 35.1601,
68
  "eval_loss": 0.10429085791110992,
69
+ "eval_runtime": 141.5674,
70
+ "eval_samples_per_second": 7.368,
71
+ "eval_steps_per_second": 0.233,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.6835,
83
  "eval_gen_len": 34.9942,
84
  "eval_loss": 0.10373106598854065,
85
+ "eval_runtime": 109.7842,
86
+ "eval_samples_per_second": 9.5,
87
+ "eval_steps_per_second": 0.301,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 41.1613,
99
  "eval_gen_len": 36.2349,
100
  "eval_loss": 0.10356967151165009,
101
+ "eval_runtime": 116.302,
102
+ "eval_samples_per_second": 8.968,
103
+ "eval_steps_per_second": 0.284,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.8822,
115
  "eval_gen_len": 34.4698,
116
  "eval_loss": 0.10275906324386597,
117
+ "eval_runtime": 88.2743,
118
+ "eval_samples_per_second": 11.815,
119
+ "eval_steps_per_second": 0.374,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.7092,
131
  "eval_gen_len": 35.2943,
132
  "eval_loss": 0.10221899300813675,
133
+ "eval_runtime": 93.7664,
134
+ "eval_samples_per_second": 11.123,
135
+ "eval_steps_per_second": 0.352,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 40.1745,
147
  "eval_gen_len": 38.5484,
148
  "eval_loss": 0.10177244991064072,
149
+ "eval_runtime": 92.869,
150
+ "eval_samples_per_second": 11.231,
151
+ "eval_steps_per_second": 0.355,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 42.5078,
163
  "eval_gen_len": 34.3826,
164
  "eval_loss": 0.10191857814788818,
165
+ "eval_runtime": 85.4817,
166
+ "eval_samples_per_second": 12.201,
167
+ "eval_steps_per_second": 0.386,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 42.0937,
179
  "eval_gen_len": 35.7114,
180
  "eval_loss": 0.10230503231287003,
181
+ "eval_runtime": 96.9439,
182
+ "eval_samples_per_second": 10.759,
183
+ "eval_steps_per_second": 0.34,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 42.3842,
195
  "eval_gen_len": 35.3586,
196
  "eval_loss": 0.10215254127979279,
197
+ "eval_runtime": 94.9854,
198
+ "eval_samples_per_second": 10.981,
199
+ "eval_steps_per_second": 0.347,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 42.1993,
211
  "eval_gen_len": 34.0729,
212
  "eval_loss": 0.10184619575738907,
213
+ "eval_runtime": 104.2027,
214
+ "eval_samples_per_second": 10.009,
215
+ "eval_steps_per_second": 0.317,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 42.3348,
227
  "eval_gen_len": 35.5158,
228
  "eval_loss": 0.10167574882507324,
229
+ "eval_runtime": 95.3873,
230
+ "eval_samples_per_second": 10.934,
231
+ "eval_steps_per_second": 0.346,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.6183,
243
  "eval_gen_len": 34.5225,
244
  "eval_loss": 0.10100951045751572,
245
+ "eval_runtime": 105.9409,
246
+ "eval_samples_per_second": 9.845,
247
+ "eval_steps_per_second": 0.311,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 42.354,
259
  "eval_gen_len": 35.1323,
260
  "eval_loss": 0.10181604325771332,
261
+ "eval_runtime": 90.7174,
262
+ "eval_samples_per_second": 11.497,
263
+ "eval_steps_per_second": 0.364,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 41.9485,
275
  "eval_gen_len": 35.4727,
276
  "eval_loss": 0.10201819986104965,
277
+ "eval_runtime": 99.9879,
278
+ "eval_samples_per_second": 10.431,
279
+ "eval_steps_per_second": 0.33,
280
  "step": 68000
281
  },
282
  {
 
290
  "eval_bleu": 42.3042,
291
  "eval_gen_len": 35.2704,
292
  "eval_loss": 0.10195478051900864,
293
+ "eval_runtime": 70.0936,
294
+ "eval_samples_per_second": 14.88,
295
+ "eval_steps_per_second": 0.471,
296
  "step": 72000
297
  },
298
  {
 
306
  "eval_bleu": 39.7039,
307
  "eval_gen_len": 37.767,
308
  "eval_loss": 0.10168451070785522,
309
+ "eval_runtime": 112.9245,
310
+ "eval_samples_per_second": 9.236,
311
+ "eval_steps_per_second": 0.292,
312
  "step": 76000
313
  },
314
  {
 
322
  "eval_bleu": 42.9732,
323
  "eval_gen_len": 34.5542,
324
  "eval_loss": 0.10154784470796585,
325
+ "eval_runtime": 147.6247,
326
+ "eval_samples_per_second": 7.065,
327
+ "eval_steps_per_second": 0.224,
328
  "step": 80000
329
  }
330
  ],
checkpoint-80000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e05e0105e3d4b9c07cc62cac7d00f8128a8ae004ebec6214a4683123704e19a
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00dba6608fa8aeace5d4d7a700208219e0f9dc795287f71cf75349dfe01fd209
3
  size 3771
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e05e0105e3d4b9c07cc62cac7d00f8128a8ae004ebec6214a4683123704e19a
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00dba6608fa8aeace5d4d7a700208219e0f9dc795287f71cf75349dfe01fd209
3
  size 3771