SystemAdmin123 commited on
Commit
c98d2e1
·
verified ·
1 Parent(s): 5104fdf

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aeb37187936d017f6bd51736738766a1f2cd5d041a79e47044d4da52589bf50
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3944baba224fb9f68ab01233170ab280c7bca8803f3c7e71a714c5f3fdcf93
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d8581669082f016d0e4e6b12b964126b4e2bcf55585a4a4b4bf94b576f2041e
3
- size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2faf34cac979c57dfb55da136bfa774c54eaef839c48e6205e326aa7111a8154
3
+ size 255266042
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30ca12c54f4164ace515795e08e0960f0c28e1845dd3bb744b613ac48e9edba6
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c659953fa2749100d286c777ca42cee4acfebcb66827f7bc777ef8c79c6ffa46
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df12b162a1b11037b9375aff4eed1b0f26be6ed2687bf1154d65e88dde5f9250
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6165eeddacf3a11e1b3e53c4e5290a59148da44d6a02a50c118da0899808e516
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:748ed266e9432323e41e747f49e84d108918da883711ff6e01c8135af1c286fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c62f46b6ecdcc9452fbe00042ae2d6095daf65d97a17c5763e0d0ed253cea52
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.833333333333332,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -230,6 +230,49 @@
230
  "eval_samples_per_second": 143.936,
231
  "eval_steps_per_second": 2.301,
232
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 10,
@@ -249,7 +292,7 @@
249
  "attributes": {}
250
  }
251
  },
252
- "total_flos": 1.6216828598550528e+16,
253
  "train_batch_size": 32,
254
  "trial_name": null,
255
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 25.0,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
230
  "eval_samples_per_second": 143.936,
231
  "eval_steps_per_second": 2.301,
232
  "step": 250
233
+ },
234
+ {
235
+ "epoch": 21.666666666666668,
236
+ "grad_norm": 4.03125,
237
+ "learning_rate": 0.00012985148110016947,
238
+ "loss": 2.9827,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 22.5,
243
+ "grad_norm": 3.953125,
244
+ "learning_rate": 0.00012454854871407994,
245
+ "loss": 2.9238,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 23.333333333333332,
250
+ "grad_norm": 4.25,
251
+ "learning_rate": 0.00011917106319237386,
252
+ "loss": 2.8875,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 24.166666666666668,
257
+ "grad_norm": 3.90625,
258
+ "learning_rate": 0.00011373535578184082,
259
+ "loss": 2.836,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 25.0,
264
+ "grad_norm": 3.703125,
265
+ "learning_rate": 0.00010825793454723325,
266
+ "loss": 2.8044,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 25.0,
271
+ "eval_loss": 3.1132636070251465,
272
+ "eval_runtime": 10.3535,
273
+ "eval_samples_per_second": 144.976,
274
+ "eval_steps_per_second": 2.318,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
292
  "attributes": {}
293
  }
294
  },
295
+ "total_flos": 1.9436991670124544e+16,
296
  "train_batch_size": 32,
297
  "trial_name": null,
298
  "trial_params": null