Fanucci commited on
Commit
e7e5007
·
verified ·
1 Parent(s): 47fade3

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d40108708aeca3eaee7f0ad206931c6971e2dc1c0db296b59b7b54ba6a7fda9
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61114c6a92e58ce464f00de9c099e8f03dc4230c177b928f5956c816d7725990
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dafdcb23d62ea12e96105611e45ce3d28675d5347aa244e71634b87bbe5c0410
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbf4da0f14a88e52ba3a2a4073b4aaa1c19709f107ca0ea0f7d394e7b8943e5
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34513f8c50c565c1da3397b555076bf3d95cd033739a8c628616c520dab385df
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42dcce8c9a2d0c024270cf90f873168e73cdade4af51b5e3849a0fd68c66911
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:068fbd993087219c15b8c0baa13fc39644a4dcdfe92d8be3fa6434deece90371
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2768285b45b2a0c05f6f50bbb8c0287fca6f62a8cde6d1b1f02151ac72ee8dc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14236e1aaafe2e25b01597ed1466b255761963d528e38fe935aa925971b2510
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5948dcd3223dd0e3f4d6f300a2e7a88ae966cce604338b85ff6032bc67f692
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01561524047470331,
6
  "eval_steps": 50,
7
- "global_step": 250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -231,6 +231,49 @@
231
  "eval_samples_per_second": 14.875,
232
  "eval_steps_per_second": 14.875,
233
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 10,
@@ -245,7 +288,7 @@
245
  "early_stopping_threshold": 0.0
246
  },
247
  "attributes": {
248
- "early_stopping_patience_counter": 5
249
  }
250
  },
251
  "TrainerControl": {
@@ -259,7 +302,7 @@
259
  "attributes": {}
260
  }
261
  },
262
- "total_flos": 1.02115540992e+16,
263
  "train_batch_size": 1,
264
  "trial_name": null,
265
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.018738288569643973,
6
  "eval_steps": 50,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
231
  "eval_samples_per_second": 14.875,
232
  "eval_steps_per_second": 14.875,
233
  "step": 250
234
+ },
235
+ {
236
+ "epoch": 0.016239850093691444,
237
+ "grad_norm": 2976.0,
238
+ "learning_rate": 0.04259332407175751,
239
+ "loss": 8.7117,
240
+ "step": 260
241
+ },
242
+ {
243
+ "epoch": 0.016864459712679577,
244
+ "grad_norm": 604.0,
245
+ "learning_rate": 0.04202092725645009,
246
+ "loss": 10.3623,
247
+ "step": 270
248
+ },
249
+ {
250
+ "epoch": 0.017489069331667707,
251
+ "grad_norm": 2688.0,
252
+ "learning_rate": 0.04143139181019764,
253
+ "loss": 10.8841,
254
+ "step": 280
255
+ },
256
+ {
257
+ "epoch": 0.01811367895065584,
258
+ "grad_norm": 141.0,
259
+ "learning_rate": 0.040825311345221764,
260
+ "loss": 9.8937,
261
+ "step": 290
262
+ },
263
+ {
264
+ "epoch": 0.018738288569643973,
265
+ "grad_norm": 252.0,
266
+ "learning_rate": 0.04020329613317545,
267
+ "loss": 12.0118,
268
+ "step": 300
269
+ },
270
+ {
271
+ "epoch": 0.018738288569643973,
272
+ "eval_loss": 9.644805908203125,
273
+ "eval_runtime": 56.3116,
274
+ "eval_samples_per_second": 14.97,
275
+ "eval_steps_per_second": 14.97,
276
+ "step": 300
277
  }
278
  ],
279
  "logging_steps": 10,
 
288
  "early_stopping_threshold": 0.0
289
  },
290
  "attributes": {
291
+ "early_stopping_patience_counter": 6
292
  }
293
  },
294
  "TrainerControl": {
 
302
  "attributes": {}
303
  }
304
  },
305
+ "total_flos": 1.225386491904e+16,
306
  "train_batch_size": 1,
307
  "trial_name": null,
308
  "trial_params": null