bradmin commited on
Commit
f5597fe
·
1 Parent(s): 1f9f3a6

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1635ff493aa519c5f30369636e8878234b655a0814e69ae4d8564d50dcdeda5
3
  size 1001436090
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993ebd29d60cf74d729607be7535ff900f1686ae6540c14b5448b141c17c89bd
3
  size 1001436090
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cadba7cba1c5a5048705cb26b34fbce7d0efdc1f379d462a37db7987df19fcd
3
  size 500704602
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f752d463e725c61a26b667909ecce1aa465b98909bf3944f71ea3f1f1f9164ef
3
  size 500704602
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1701f11bae0225927f0a15fe016a503fb5442618ce4f6adfa514bb6b69ae591
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b68988526a415dcb3130161d19be0023ecfa7543158f651622c931e2d744176
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecc42a44c7d848601d80219f0d1eb00781589042cb6b9acb00e933718b0d9adb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1679676eae92a38677246489e338d905f8ad588bd072400ef5910115fa41db57
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9305019305019306,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -147,13 +147,153 @@
147
  "eval_samples_per_second": 184.369,
148
  "eval_steps_per_second": 11.543,
149
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  ],
152
  "logging_steps": 100,
153
  "max_steps": 12950,
154
  "num_train_epochs": 25,
155
  "save_steps": 1000,
156
- "total_flos": 5004154253952000.0,
157
  "trial_name": null,
158
  "trial_params": null
159
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.861003861003861,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
147
  "eval_samples_per_second": 184.369,
148
  "eval_steps_per_second": 11.543,
149
  "step": 1000
150
+ },
151
+ {
152
+ "epoch": 2.12,
153
+ "learning_rate": 9.15057915057915e-05,
154
+ "loss": 2.5298,
155
+ "step": 1100
156
+ },
157
+ {
158
+ "epoch": 2.12,
159
+ "eval_loss": 2.9245471954345703,
160
+ "eval_runtime": 15.8964,
161
+ "eval_samples_per_second": 183.878,
162
+ "eval_steps_per_second": 11.512,
163
+ "step": 1100
164
+ },
165
+ {
166
+ "epoch": 2.32,
167
+ "learning_rate": 9.073359073359073e-05,
168
+ "loss": 2.4042,
169
+ "step": 1200
170
+ },
171
+ {
172
+ "epoch": 2.32,
173
+ "eval_loss": 2.9138386249542236,
174
+ "eval_runtime": 15.8757,
175
+ "eval_samples_per_second": 184.117,
176
+ "eval_steps_per_second": 11.527,
177
+ "step": 1200
178
+ },
179
+ {
180
+ "epoch": 2.51,
181
+ "learning_rate": 8.996138996138996e-05,
182
+ "loss": 2.454,
183
+ "step": 1300
184
+ },
185
+ {
186
+ "epoch": 2.51,
187
+ "eval_loss": 2.910158157348633,
188
+ "eval_runtime": 15.8495,
189
+ "eval_samples_per_second": 184.422,
190
+ "eval_steps_per_second": 11.546,
191
+ "step": 1300
192
+ },
193
+ {
194
+ "epoch": 2.7,
195
+ "learning_rate": 8.918918918918919e-05,
196
+ "loss": 2.4826,
197
+ "step": 1400
198
+ },
199
+ {
200
+ "epoch": 2.7,
201
+ "eval_loss": 2.8909964561462402,
202
+ "eval_runtime": 15.8725,
203
+ "eval_samples_per_second": 184.155,
204
+ "eval_steps_per_second": 11.529,
205
+ "step": 1400
206
+ },
207
+ {
208
+ "epoch": 2.9,
209
+ "learning_rate": 8.841698841698842e-05,
210
+ "loss": 2.4967,
211
+ "step": 1500
212
+ },
213
+ {
214
+ "epoch": 2.9,
215
+ "eval_loss": 2.899423122406006,
216
+ "eval_runtime": 15.8786,
217
+ "eval_samples_per_second": 184.084,
218
+ "eval_steps_per_second": 11.525,
219
+ "step": 1500
220
+ },
221
+ {
222
+ "epoch": 3.09,
223
+ "learning_rate": 8.764478764478765e-05,
224
+ "loss": 2.3621,
225
+ "step": 1600
226
+ },
227
+ {
228
+ "epoch": 3.09,
229
+ "eval_loss": 2.966057538986206,
230
+ "eval_runtime": 15.8621,
231
+ "eval_samples_per_second": 184.276,
232
+ "eval_steps_per_second": 11.537,
233
+ "step": 1600
234
+ },
235
+ {
236
+ "epoch": 3.28,
237
+ "learning_rate": 8.687258687258688e-05,
238
+ "loss": 2.1633,
239
+ "step": 1700
240
+ },
241
+ {
242
+ "epoch": 3.28,
243
+ "eval_loss": 2.950388193130493,
244
+ "eval_runtime": 15.8876,
245
+ "eval_samples_per_second": 183.98,
246
+ "eval_steps_per_second": 11.518,
247
+ "step": 1700
248
+ },
249
+ {
250
+ "epoch": 3.47,
251
+ "learning_rate": 8.61003861003861e-05,
252
+ "loss": 2.2039,
253
+ "step": 1800
254
+ },
255
+ {
256
+ "epoch": 3.47,
257
+ "eval_loss": 2.9389660358428955,
258
+ "eval_runtime": 15.8602,
259
+ "eval_samples_per_second": 184.298,
260
+ "eval_steps_per_second": 11.538,
261
+ "step": 1800
262
+ },
263
+ {
264
+ "epoch": 3.67,
265
+ "learning_rate": 8.532818532818533e-05,
266
+ "loss": 2.2382,
267
+ "step": 1900
268
+ },
269
+ {
270
+ "epoch": 3.67,
271
+ "eval_loss": 2.94706392288208,
272
+ "eval_runtime": 15.8638,
273
+ "eval_samples_per_second": 184.256,
274
+ "eval_steps_per_second": 11.536,
275
+ "step": 1900
276
+ },
277
+ {
278
+ "epoch": 3.86,
279
+ "learning_rate": 8.455598455598456e-05,
280
+ "loss": 2.2235,
281
+ "step": 2000
282
+ },
283
+ {
284
+ "epoch": 3.86,
285
+ "eval_loss": 2.9332854747772217,
286
+ "eval_runtime": 15.8506,
287
+ "eval_samples_per_second": 184.409,
288
+ "eval_steps_per_second": 11.545,
289
+ "step": 2000
290
  }
291
  ],
292
  "logging_steps": 100,
293
  "max_steps": 12950,
294
  "num_train_epochs": 25,
295
  "save_steps": 1000,
296
+ "total_flos": 1.0018169220096e+16,
297
  "trial_name": null,
298
  "trial_params": null
299
  }