besimray commited on
Commit
92c07db
·
verified ·
1 Parent(s): a120433

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff1659573be9832300f4f4efea5a2cf7e4b44363f06622e7e55214c46e1143b5
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3a59b8d07ffec409f080d114832aaadae842ee86092d982a9665e5cb7415fe
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b2fab67c049fb2be004c11732675fa014bc78c5e94282c5a42a91db5f153a03
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ca34ab57cbc5b9c61f82581002e26b465a30bbf06b34c77ee426b13a0281df
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e90410ed8d75deee232d46a71672a78439ef812c0e8c37ade4c255c49bee23b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883f28eb7c189cd34593e2e1d90c086421ac0e064f525f629bfedbc0ac7bb029
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8ae5b9632b883900417a4b328f111a055e2a3387d176daa619ce2ea248142d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2def2cd24154d8cecbaa07c36ae27e5ebb9b7273a78abfea27aa67c480e4ae2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3333333333333335,
5
  "eval_steps": 3,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -254,6 +254,57 @@
254
  "learning_rate": 2.9289321881345254e-05,
255
  "loss": 1.1115,
256
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  }
258
  ],
259
  "logging_steps": 1,
@@ -268,12 +319,12 @@
268
  "should_evaluate": false,
269
  "should_log": false,
270
  "should_save": true,
271
- "should_training_stop": false
272
  },
273
  "attributes": {}
274
  }
275
  },
276
- "total_flos": 4707063061020672.0,
277
  "train_batch_size": 2,
278
  "trial_name": null,
279
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8095238095238093,
5
  "eval_steps": 3,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
254
  "learning_rate": 2.9289321881345254e-05,
255
  "loss": 1.1115,
256
  "step": 25
257
+ },
258
+ {
259
+ "epoch": 2.4285714285714284,
260
+ "grad_norm": 0.16207309067249298,
261
+ "learning_rate": 1.9098300562505266e-05,
262
+ "loss": 1.19,
263
+ "step": 26
264
+ },
265
+ {
266
+ "epoch": 2.5238095238095237,
267
+ "grad_norm": 0.16097423434257507,
268
+ "learning_rate": 1.0899347581163221e-05,
269
+ "loss": 1.1981,
270
+ "step": 27
271
+ },
272
+ {
273
+ "epoch": 2.5238095238095237,
274
+ "eval_loss": 1.141075849533081,
275
+ "eval_runtime": 6.4063,
276
+ "eval_samples_per_second": 15.61,
277
+ "eval_steps_per_second": 7.805,
278
+ "step": 27
279
+ },
280
+ {
281
+ "epoch": 2.619047619047619,
282
+ "grad_norm": 0.16795802116394043,
283
+ "learning_rate": 4.8943483704846475e-06,
284
+ "loss": 1.1345,
285
+ "step": 28
286
+ },
287
+ {
288
+ "epoch": 2.7142857142857144,
289
+ "grad_norm": 0.16206511855125427,
290
+ "learning_rate": 1.231165940486234e-06,
291
+ "loss": 1.1223,
292
+ "step": 29
293
+ },
294
+ {
295
+ "epoch": 2.8095238095238093,
296
+ "grad_norm": 0.16133540868759155,
297
+ "learning_rate": 0.0,
298
+ "loss": 1.1377,
299
+ "step": 30
300
+ },
301
+ {
302
+ "epoch": 2.8095238095238093,
303
+ "eval_loss": 1.1379709243774414,
304
+ "eval_runtime": 6.1889,
305
+ "eval_samples_per_second": 16.158,
306
+ "eval_steps_per_second": 8.079,
307
+ "step": 30
308
  }
309
  ],
310
  "logging_steps": 1,
 
319
  "should_evaluate": false,
320
  "should_log": false,
321
  "should_save": true,
322
+ "should_training_stop": true
323
  },
324
  "attributes": {}
325
  }
326
  },
327
+ "total_flos": 5663067115094016.0,
328
  "train_batch_size": 2,
329
  "trial_name": null,
330
  "trial_params": null