irishprancer commited on
Commit
50e7fa6
·
verified ·
1 Parent(s): 337b921

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d521622db8e0c7fae9ad561a127c2650738869afc43c26be15d38a48ed692348
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b208fb04bd797b877fa1dfdacd8a72771192807ce16faf614c1db8f415c46813
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:654d5f69f2b7f359b70dc71836116986b4ff50d816aa31fd86020592c2ad75c8
3
  size 1054136250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bda7e786e2e656f96080f96850aaef8ced5c1e58267af79f77f9d2d7ca8a316
3
  size 1054136250
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:226f394c3a9826cc7f74d0799aa02f643f1ee6b891784f44c588787dbc9c0cb3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed14e15604e1097b80da74a65c68f380dc6bb673bf5694a945c25e7931ad5a75
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2673d78ac7304a2a7678ae71ed65422fa2295f07aca63cf23ca76e0b5c92da69
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb248e7cc2fe7b509c9e866be7b72af3b33225d8b86373c1a62393cc3a24f4da
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7681264281272888,
3
- "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 3.3333333333333335,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -120,6 +120,232 @@
120
  "eval_samples_per_second": 21.2,
121
  "eval_steps_per_second": 21.2,
122
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 10,
@@ -139,7 +365,7 @@
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 1615667282657280.0,
143
  "train_batch_size": 2,
144
  "trial_name": null,
145
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7268816828727722,
3
+ "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 10.0,
5
  "eval_steps": 150,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
120
  "eval_samples_per_second": 21.2,
121
  "eval_steps_per_second": 21.2,
122
  "step": 150
123
+ },
124
+ {
125
+ "epoch": 3.5555555555555554,
126
+ "grad_norm": 2.575085401535034,
127
+ "learning_rate": 2.9999892980750276e-05,
128
+ "loss": 0.6946,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 3.7777777777777777,
133
+ "grad_norm": 2.4067776203155518,
134
+ "learning_rate": 2.9999854334972655e-05,
135
+ "loss": 0.6932,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 4.0,
140
+ "grad_norm": 2.5401134490966797,
141
+ "learning_rate": 2.999980974373202e-05,
142
+ "loss": 0.6686,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 4.222222222222222,
147
+ "grad_norm": 1.5957380533218384,
148
+ "learning_rate": 2.9999759207046055e-05,
149
+ "loss": 0.5898,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 4.444444444444445,
154
+ "grad_norm": 1.6820542812347412,
155
+ "learning_rate": 2.9999702724934783e-05,
156
+ "loss": 0.7104,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 4.666666666666667,
161
+ "grad_norm": 3.4158143997192383,
162
+ "learning_rate": 2.99996402974206e-05,
163
+ "loss": 0.628,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 4.888888888888889,
168
+ "grad_norm": 2.0864531993865967,
169
+ "learning_rate": 2.9999571924528243e-05,
170
+ "loss": 0.6736,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 5.111111111111111,
175
+ "grad_norm": 2.6299381256103516,
176
+ "learning_rate": 2.9999497606284816e-05,
177
+ "loss": 0.6027,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 5.333333333333333,
182
+ "grad_norm": 1.4852367639541626,
183
+ "learning_rate": 2.9999417342719775e-05,
184
+ "loss": 0.6946,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 5.555555555555555,
189
+ "grad_norm": 2.2180473804473877,
190
+ "learning_rate": 2.9999331133864935e-05,
191
+ "loss": 0.6481,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 5.777777777777778,
196
+ "grad_norm": 1.7053271532058716,
197
+ "learning_rate": 2.9999238979754465e-05,
198
+ "loss": 0.6095,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "grad_norm": 1.8998974561691284,
204
+ "learning_rate": 2.99991408804249e-05,
205
+ "loss": 0.5759,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 6.222222222222222,
210
+ "grad_norm": 2.1407132148742676,
211
+ "learning_rate": 2.999903683591511e-05,
212
+ "loss": 0.5739,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 6.444444444444445,
217
+ "grad_norm": 1.3923563957214355,
218
+ "learning_rate": 2.9998926846266345e-05,
219
+ "loss": 0.6132,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 6.666666666666667,
224
+ "grad_norm": 2.2752490043640137,
225
+ "learning_rate": 2.9998810911522193e-05,
226
+ "loss": 0.6228,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 6.666666666666667,
231
+ "eval_loss": 0.7380812168121338,
232
+ "eval_runtime": 0.4678,
233
+ "eval_samples_per_second": 21.377,
234
+ "eval_steps_per_second": 21.377,
235
+ "step": 300
236
+ },
237
+ {
238
+ "epoch": 6.888888888888889,
239
+ "grad_norm": 2.714766025543213,
240
+ "learning_rate": 2.9998689031728615e-05,
241
+ "loss": 0.6482,
242
+ "step": 310
243
+ },
244
+ {
245
+ "epoch": 7.111111111111111,
246
+ "grad_norm": 1.7628875970840454,
247
+ "learning_rate": 2.9998561206933918e-05,
248
+ "loss": 0.5861,
249
+ "step": 320
250
+ },
251
+ {
252
+ "epoch": 7.333333333333333,
253
+ "grad_norm": 1.7664484977722168,
254
+ "learning_rate": 2.9998427437188766e-05,
255
+ "loss": 0.5797,
256
+ "step": 330
257
+ },
258
+ {
259
+ "epoch": 7.555555555555555,
260
+ "grad_norm": 2.3483684062957764,
261
+ "learning_rate": 2.999828772254618e-05,
262
+ "loss": 0.6032,
263
+ "step": 340
264
+ },
265
+ {
266
+ "epoch": 7.777777777777778,
267
+ "grad_norm": 2.471663236618042,
268
+ "learning_rate": 2.9998142063061544e-05,
269
+ "loss": 0.6629,
270
+ "step": 350
271
+ },
272
+ {
273
+ "epoch": 8.0,
274
+ "grad_norm": 1.3223252296447754,
275
+ "learning_rate": 2.9997990458792583e-05,
276
+ "loss": 0.6038,
277
+ "step": 360
278
+ },
279
+ {
280
+ "epoch": 8.222222222222221,
281
+ "grad_norm": 1.9036260843276978,
282
+ "learning_rate": 2.9997832909799397e-05,
283
+ "loss": 0.5487,
284
+ "step": 370
285
+ },
286
+ {
287
+ "epoch": 8.444444444444445,
288
+ "grad_norm": 1.9370992183685303,
289
+ "learning_rate": 2.9997669416144432e-05,
290
+ "loss": 0.6407,
291
+ "step": 380
292
+ },
293
+ {
294
+ "epoch": 8.666666666666666,
295
+ "grad_norm": 1.0507700443267822,
296
+ "learning_rate": 2.999749997789249e-05,
297
+ "loss": 0.54,
298
+ "step": 390
299
+ },
300
+ {
301
+ "epoch": 8.88888888888889,
302
+ "grad_norm": 1.5254780054092407,
303
+ "learning_rate": 2.9997324595110723e-05,
304
+ "loss": 0.6542,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 9.11111111111111,
309
+ "grad_norm": 1.3856695890426636,
310
+ "learning_rate": 2.9997143267868663e-05,
311
+ "loss": 0.595,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 9.333333333333334,
316
+ "grad_norm": 2.4855446815490723,
317
+ "learning_rate": 2.999695599623817e-05,
318
+ "loss": 0.6228,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 9.555555555555555,
323
+ "grad_norm": 2.5905134677886963,
324
+ "learning_rate": 2.9996762780293483e-05,
325
+ "loss": 0.5749,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 9.777777777777779,
330
+ "grad_norm": 1.533772349357605,
331
+ "learning_rate": 2.9996563620111176e-05,
332
+ "loss": 0.5296,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 10.0,
337
+ "grad_norm": 1.3861850500106812,
338
+ "learning_rate": 2.9996358515770198e-05,
339
+ "loss": 0.5418,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 10.0,
344
+ "eval_loss": 0.7268816828727722,
345
+ "eval_runtime": 0.425,
346
+ "eval_samples_per_second": 23.528,
347
+ "eval_steps_per_second": 23.528,
348
+ "step": 450
349
  }
350
  ],
351
  "logging_steps": 10,
 
365
  "attributes": {}
366
  }
367
  },
368
+ "total_flos": 4801636770840576.0,
369
  "train_batch_size": 2,
370
  "trial_name": null,
371
  "trial_params": null