Fanucci commited on
Commit
0c4219a
·
verified ·
1 Parent(s): f3127d9

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41f22bec139eccefcabcaecfd30023a76c8ebfb3073337f09e75777d65a579c2
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8184d250b7999dee69332883ea324c78ec4c7f4598ad2667207642efe3aaeca
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b7d12ae0be9d05ec1cbc892152d8fdafaff4e73de9aa5873b148d1c273a89e2
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6319531c096fbc443a5808f5c7fda0178e30925ef23e76c2b4b0ff1e5dee0da
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3314d1f8528b0e933c3f5d6b0f4d79675fd95a3db3b829e6780f177bc655d78
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62165be0f53737e55f3e75f021e8f584796137fe98feeca985918f86f5097e3
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:068fbd993087219c15b8c0baa13fc39644a4dcdfe92d8be3fa6434deece90371
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2768285b45b2a0c05f6f50bbb8c0287fca6f62a8cde6d1b1f02151ac72ee8dc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14236e1aaafe2e25b01597ed1466b255761963d528e38fe935aa925971b2510
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5948dcd3223dd0e3f4d6f300a2e7a88ae966cce604338b85ff6032bc67f692
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01561524047470331,
6
  "eval_steps": 50,
7
- "global_step": 250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -231,6 +231,49 @@
231
  "eval_samples_per_second": 15.219,
232
  "eval_steps_per_second": 15.219,
233
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 10,
@@ -245,7 +288,7 @@
245
  "early_stopping_threshold": 0.0
246
  },
247
  "attributes": {
248
- "early_stopping_patience_counter": 5
249
  }
250
  },
251
  "TrainerControl": {
@@ -259,7 +302,7 @@
259
  "attributes": {}
260
  }
261
  },
262
- "total_flos": 1.02115540992e+16,
263
  "train_batch_size": 1,
264
  "trial_name": null,
265
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.018738288569643973,
6
  "eval_steps": 50,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
231
  "eval_samples_per_second": 15.219,
232
  "eval_steps_per_second": 15.219,
233
  "step": 250
234
+ },
235
+ {
236
+ "epoch": 0.016239850093691444,
237
+ "grad_norm": 19.0,
238
+ "learning_rate": 0.04259332407175751,
239
+ "loss": 8.5766,
240
+ "step": 260
241
+ },
242
+ {
243
+ "epoch": 0.016864459712679577,
244
+ "grad_norm": 282.0,
245
+ "learning_rate": 0.04202092725645009,
246
+ "loss": 10.2655,
247
+ "step": 270
248
+ },
249
+ {
250
+ "epoch": 0.017489069331667707,
251
+ "grad_norm": 7.53125,
252
+ "learning_rate": 0.04143139181019764,
253
+ "loss": 8.9695,
254
+ "step": 280
255
+ },
256
+ {
257
+ "epoch": 0.01811367895065584,
258
+ "grad_norm": 31.5,
259
+ "learning_rate": 0.040825311345221764,
260
+ "loss": 8.7729,
261
+ "step": 290
262
+ },
263
+ {
264
+ "epoch": 0.018738288569643973,
265
+ "grad_norm": 14.125,
266
+ "learning_rate": 0.04020329613317545,
267
+ "loss": 9.6525,
268
+ "step": 300
269
+ },
270
+ {
271
+ "epoch": 0.018738288569643973,
272
+ "eval_loss": 8.672038078308105,
273
+ "eval_runtime": 52.539,
274
+ "eval_samples_per_second": 16.045,
275
+ "eval_steps_per_second": 16.045,
276
+ "step": 300
277
  }
278
  ],
279
  "logging_steps": 10,
 
288
  "early_stopping_threshold": 0.0
289
  },
290
  "attributes": {
291
+ "early_stopping_patience_counter": 6
292
  }
293
  },
294
  "TrainerControl": {
 
302
  "attributes": {}
303
  }
304
  },
305
+ "total_flos": 1.225386491904e+16,
306
  "train_batch_size": 1,
307
  "trial_name": null,
308
  "trial_params": null