Ethan Sim commited on
Commit
878212a
·
1 Parent(s): cff47c7

restage best simple adapt wce model

Browse files
Files changed (2) hide show
  1. trainer_state.json +60 -60
  2. training_args.bin +1 -1
trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  "eval_bleu": 35.1893,
19
  "eval_gen_len": 40.3663,
20
  "eval_loss": 0.10897237807512283,
21
- "eval_runtime": 192.3783,
22
- "eval_samples_per_second": 5.422,
23
- "eval_steps_per_second": 0.172,
24
  "step": 4000
25
  },
26
  {
@@ -34,9 +34,9 @@
34
  "eval_bleu": 37.7679,
35
  "eval_gen_len": 37.7632,
36
  "eval_loss": 0.1064961776137352,
37
- "eval_runtime": 177.2132,
38
- "eval_samples_per_second": 5.886,
39
- "eval_steps_per_second": 0.186,
40
  "step": 8000
41
  },
42
  {
@@ -50,9 +50,9 @@
50
  "eval_bleu": 41.5566,
51
  "eval_gen_len": 34.2157,
52
  "eval_loss": 0.10526148974895477,
53
- "eval_runtime": 149.8179,
54
- "eval_samples_per_second": 6.962,
55
- "eval_steps_per_second": 0.22,
56
  "step": 12000
57
  },
58
  {
@@ -66,9 +66,9 @@
66
  "eval_bleu": 41.4171,
67
  "eval_gen_len": 35.5072,
68
  "eval_loss": 0.104159876704216,
69
- "eval_runtime": 143.2125,
70
- "eval_samples_per_second": 7.283,
71
- "eval_steps_per_second": 0.23,
72
  "step": 16000
73
  },
74
  {
@@ -82,9 +82,9 @@
82
  "eval_bleu": 41.7523,
83
  "eval_gen_len": 35.6031,
84
  "eval_loss": 0.10359229892492294,
85
- "eval_runtime": 157.1246,
86
- "eval_samples_per_second": 6.638,
87
- "eval_steps_per_second": 0.21,
88
  "step": 20000
89
  },
90
  {
@@ -98,9 +98,9 @@
98
  "eval_bleu": 41.5818,
99
  "eval_gen_len": 35.7987,
100
  "eval_loss": 0.10351784527301788,
101
- "eval_runtime": 140.1257,
102
- "eval_samples_per_second": 7.443,
103
- "eval_steps_per_second": 0.236,
104
  "step": 24000
105
  },
106
  {
@@ -114,9 +114,9 @@
114
  "eval_bleu": 41.7604,
115
  "eval_gen_len": 35.0968,
116
  "eval_loss": 0.1027546375989914,
117
- "eval_runtime": 152.4696,
118
- "eval_samples_per_second": 6.841,
119
- "eval_steps_per_second": 0.216,
120
  "step": 28000
121
  },
122
  {
@@ -130,9 +130,9 @@
130
  "eval_bleu": 41.9671,
131
  "eval_gen_len": 35.488,
132
  "eval_loss": 0.10217112302780151,
133
- "eval_runtime": 134.38,
134
- "eval_samples_per_second": 7.762,
135
- "eval_steps_per_second": 0.246,
136
  "step": 32000
137
  },
138
  {
@@ -146,9 +146,9 @@
146
  "eval_bleu": 41.2336,
147
  "eval_gen_len": 36.3039,
148
  "eval_loss": 0.1017456203699112,
149
- "eval_runtime": 121.8994,
150
- "eval_samples_per_second": 8.556,
151
- "eval_steps_per_second": 0.271,
152
  "step": 36000
153
  },
154
  {
@@ -162,9 +162,9 @@
162
  "eval_bleu": 42.312,
163
  "eval_gen_len": 34.4314,
164
  "eval_loss": 0.10187986493110657,
165
- "eval_runtime": 125.3365,
166
- "eval_samples_per_second": 8.322,
167
- "eval_steps_per_second": 0.263,
168
  "step": 40000
169
  },
170
  {
@@ -178,9 +178,9 @@
178
  "eval_bleu": 42.1662,
179
  "eval_gen_len": 35.4765,
180
  "eval_loss": 0.10221881419420242,
181
- "eval_runtime": 129.8395,
182
- "eval_samples_per_second": 8.033,
183
- "eval_steps_per_second": 0.254,
184
  "step": 44000
185
  },
186
  {
@@ -194,9 +194,9 @@
194
  "eval_bleu": 42.1347,
195
  "eval_gen_len": 35.303,
196
  "eval_loss": 0.10202226787805557,
197
- "eval_runtime": 130.0603,
198
- "eval_samples_per_second": 8.019,
199
- "eval_steps_per_second": 0.254,
200
  "step": 48000
201
  },
202
  {
@@ -210,9 +210,9 @@
210
  "eval_bleu": 42.362,
211
  "eval_gen_len": 34.1179,
212
  "eval_loss": 0.10167563706636429,
213
- "eval_runtime": 143.3476,
214
- "eval_samples_per_second": 7.276,
215
- "eval_steps_per_second": 0.23,
216
  "step": 52000
217
  },
218
  {
@@ -226,9 +226,9 @@
226
  "eval_bleu": 42.0947,
227
  "eval_gen_len": 35.8955,
228
  "eval_loss": 0.10157355666160583,
229
- "eval_runtime": 161.9595,
230
- "eval_samples_per_second": 6.44,
231
- "eval_steps_per_second": 0.204,
232
  "step": 56000
233
  },
234
  {
@@ -242,9 +242,9 @@
242
  "eval_bleu": 42.504,
243
  "eval_gen_len": 34.5292,
244
  "eval_loss": 0.10091494768857956,
245
- "eval_runtime": 144.7728,
246
- "eval_samples_per_second": 7.204,
247
- "eval_steps_per_second": 0.228,
248
  "step": 60000
249
  },
250
  {
@@ -258,9 +258,9 @@
258
  "eval_bleu": 41.9688,
259
  "eval_gen_len": 33.8217,
260
  "eval_loss": 0.10167799890041351,
261
- "eval_runtime": 122.082,
262
- "eval_samples_per_second": 8.543,
263
- "eval_steps_per_second": 0.27,
264
  "step": 64000
265
  },
266
  {
@@ -274,9 +274,9 @@
274
  "eval_bleu": 42.6248,
275
  "eval_gen_len": 34.6213,
276
  "eval_loss": 0.1019180566072464,
277
- "eval_runtime": 140.0619,
278
- "eval_samples_per_second": 7.447,
279
- "eval_steps_per_second": 0.236,
280
  "step": 68000
281
  },
282
  {
@@ -290,9 +290,9 @@
290
  "eval_bleu": 42.3349,
291
  "eval_gen_len": 35.0364,
292
  "eval_loss": 0.10193591564893723,
293
- "eval_runtime": 100.5506,
294
- "eval_samples_per_second": 10.373,
295
- "eval_steps_per_second": 0.328,
296
  "step": 72000
297
  },
298
  {
@@ -306,9 +306,9 @@
306
  "eval_bleu": 41.4387,
307
  "eval_gen_len": 37.6692,
308
  "eval_loss": 0.10164961963891983,
309
- "eval_runtime": 121.2832,
310
- "eval_samples_per_second": 8.6,
311
- "eval_steps_per_second": 0.272,
312
  "step": 76000
313
  },
314
  {
@@ -322,9 +322,9 @@
322
  "eval_bleu": 42.9505,
323
  "eval_gen_len": 34.2924,
324
  "eval_loss": 0.10143885016441345,
325
- "eval_runtime": 114.4227,
326
- "eval_samples_per_second": 9.115,
327
- "eval_steps_per_second": 0.288,
328
  "step": 80000
329
  }
330
  ],
 
18
  "eval_bleu": 35.1893,
19
  "eval_gen_len": 40.3663,
20
  "eval_loss": 0.10897237807512283,
21
+ "eval_runtime": 177.0634,
22
+ "eval_samples_per_second": 5.891,
23
+ "eval_steps_per_second": 0.186,
24
  "step": 4000
25
  },
26
  {
 
34
  "eval_bleu": 37.7679,
35
  "eval_gen_len": 37.7632,
36
  "eval_loss": 0.1064961776137352,
37
+ "eval_runtime": 157.7862,
38
+ "eval_samples_per_second": 6.61,
39
+ "eval_steps_per_second": 0.209,
40
  "step": 8000
41
  },
42
  {
 
50
  "eval_bleu": 41.5566,
51
  "eval_gen_len": 34.2157,
52
  "eval_loss": 0.10526148974895477,
53
+ "eval_runtime": 141.3674,
54
+ "eval_samples_per_second": 7.378,
55
+ "eval_steps_per_second": 0.233,
56
  "step": 12000
57
  },
58
  {
 
66
  "eval_bleu": 41.4171,
67
  "eval_gen_len": 35.5072,
68
  "eval_loss": 0.104159876704216,
69
+ "eval_runtime": 123.7589,
70
+ "eval_samples_per_second": 8.428,
71
+ "eval_steps_per_second": 0.267,
72
  "step": 16000
73
  },
74
  {
 
82
  "eval_bleu": 41.7523,
83
  "eval_gen_len": 35.6031,
84
  "eval_loss": 0.10359229892492294,
85
+ "eval_runtime": 148.6643,
86
+ "eval_samples_per_second": 7.016,
87
+ "eval_steps_per_second": 0.222,
88
  "step": 20000
89
  },
90
  {
 
98
  "eval_bleu": 41.5818,
99
  "eval_gen_len": 35.7987,
100
  "eval_loss": 0.10351784527301788,
101
+ "eval_runtime": 131.6747,
102
+ "eval_samples_per_second": 7.921,
103
+ "eval_steps_per_second": 0.251,
104
  "step": 24000
105
  },
106
  {
 
114
  "eval_bleu": 41.7604,
115
  "eval_gen_len": 35.0968,
116
  "eval_loss": 0.1027546375989914,
117
+ "eval_runtime": 137.6573,
118
+ "eval_samples_per_second": 7.577,
119
+ "eval_steps_per_second": 0.24,
120
  "step": 28000
121
  },
122
  {
 
130
  "eval_bleu": 41.9671,
131
  "eval_gen_len": 35.488,
132
  "eval_loss": 0.10217112302780151,
133
+ "eval_runtime": 124.3235,
134
+ "eval_samples_per_second": 8.389,
135
+ "eval_steps_per_second": 0.265,
136
  "step": 32000
137
  },
138
  {
 
146
  "eval_bleu": 41.2336,
147
  "eval_gen_len": 36.3039,
148
  "eval_loss": 0.1017456203699112,
149
+ "eval_runtime": 112.1734,
150
+ "eval_samples_per_second": 9.298,
151
+ "eval_steps_per_second": 0.294,
152
  "step": 36000
153
  },
154
  {
 
162
  "eval_bleu": 42.312,
163
  "eval_gen_len": 34.4314,
164
  "eval_loss": 0.10187986493110657,
165
+ "eval_runtime": 114.096,
166
+ "eval_samples_per_second": 9.141,
167
+ "eval_steps_per_second": 0.289,
168
  "step": 40000
169
  },
170
  {
 
178
  "eval_bleu": 42.1662,
179
  "eval_gen_len": 35.4765,
180
  "eval_loss": 0.10221881419420242,
181
+ "eval_runtime": 123.1753,
182
+ "eval_samples_per_second": 8.468,
183
+ "eval_steps_per_second": 0.268,
184
  "step": 44000
185
  },
186
  {
 
194
  "eval_bleu": 42.1347,
195
  "eval_gen_len": 35.303,
196
  "eval_loss": 0.10202226787805557,
197
+ "eval_runtime": 136.0863,
198
+ "eval_samples_per_second": 7.664,
199
+ "eval_steps_per_second": 0.242,
200
  "step": 48000
201
  },
202
  {
 
210
  "eval_bleu": 42.362,
211
  "eval_gen_len": 34.1179,
212
  "eval_loss": 0.10167563706636429,
213
+ "eval_runtime": 138.7955,
214
+ "eval_samples_per_second": 7.515,
215
+ "eval_steps_per_second": 0.238,
216
  "step": 52000
217
  },
218
  {
 
226
  "eval_bleu": 42.0947,
227
  "eval_gen_len": 35.8955,
228
  "eval_loss": 0.10157355666160583,
229
+ "eval_runtime": 162.7774,
230
+ "eval_samples_per_second": 6.408,
231
+ "eval_steps_per_second": 0.203,
232
  "step": 56000
233
  },
234
  {
 
242
  "eval_bleu": 42.504,
243
  "eval_gen_len": 34.5292,
244
  "eval_loss": 0.10091494768857956,
245
+ "eval_runtime": 143.7894,
246
+ "eval_samples_per_second": 7.254,
247
+ "eval_steps_per_second": 0.23,
248
  "step": 60000
249
  },
250
  {
 
258
  "eval_bleu": 41.9688,
259
  "eval_gen_len": 33.8217,
260
  "eval_loss": 0.10167799890041351,
261
+ "eval_runtime": 128.1156,
262
+ "eval_samples_per_second": 8.141,
263
+ "eval_steps_per_second": 0.258,
264
  "step": 64000
265
  },
266
  {
 
274
  "eval_bleu": 42.6248,
275
  "eval_gen_len": 34.6213,
276
  "eval_loss": 0.1019180566072464,
277
+ "eval_runtime": 136.0479,
278
+ "eval_samples_per_second": 7.666,
279
+ "eval_steps_per_second": 0.243,
280
  "step": 68000
281
  },
282
  {
 
290
  "eval_bleu": 42.3349,
291
  "eval_gen_len": 35.0364,
292
  "eval_loss": 0.10193591564893723,
293
+ "eval_runtime": 101.1208,
294
+ "eval_samples_per_second": 10.314,
295
+ "eval_steps_per_second": 0.326,
296
  "step": 72000
297
  },
298
  {
 
306
  "eval_bleu": 41.4387,
307
  "eval_gen_len": 37.6692,
308
  "eval_loss": 0.10164961963891983,
309
+ "eval_runtime": 125.4417,
310
+ "eval_samples_per_second": 8.315,
311
+ "eval_steps_per_second": 0.263,
312
  "step": 76000
313
  },
314
  {
 
322
  "eval_bleu": 42.9505,
323
  "eval_gen_len": 34.2924,
324
  "eval_loss": 0.10143885016441345,
325
+ "eval_runtime": 116.3426,
326
+ "eval_samples_per_second": 8.965,
327
+ "eval_steps_per_second": 0.284,
328
  "step": 80000
329
  }
330
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d092665eee12a3fa4a88455c50bbbac4daa679ca877ebc30400154070acc871
3
  size 3835
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d84e5a6c16cedb85a269964ac0820de47b0330360a6c003d0b08a8f090d7c49e
3
  size 3835