mgh6 commited on
Commit
9d9935e
·
verified ·
1 Parent(s): c0b0d58

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06eac7db7293a0170434a5a23ec2899d5d06054f91f8ca97b03ece381e880aaf
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8a05354bfc115960dd16218e1ebb70714a6d58cf36c6dfcbb59abbf712ee6c
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc5048b335a64ba6e0fcfa9db527f603faeadaf4eb63cc9f7d9458347d5cc78
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789221ac653e4b18a53f5043ec5c3993652f6e86b04a6c8b6fa2041e24213b49
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d562835a401abebf3538c5ff829d4da5e3b042c2e18f89b321c3c030d691c233
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1791dcc0c8e5d9192b7663e1b585ff1dccd113452a5e1d1cca250b5164d37cb8
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fca60d3e211c90170c539f1ba6815cff92dba636daaf20fea2fac1d2459b3c9f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52002c03f9e92e22821e4d80f4cf803ea1f4727e756138e319ab19f9be28c22e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.9972065305108946,
5
  "eval_steps": 50,
6
- "global_step": 1004,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,6 +307,81 @@
307
  "eval_samples_per_second": 57.598,
308
  "eval_steps_per_second": 28.799,
309
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 50,
@@ -326,7 +401,7 @@
326
  "attributes": {}
327
  }
328
  },
329
- "total_flos": 1.6024625345881702e+17,
330
  "train_batch_size": 2,
331
  "trial_name": null,
332
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.997206530510894,
5
  "eval_steps": 50,
6
+ "global_step": 1255,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "eval_samples_per_second": 57.598,
308
  "eval_steps_per_second": 28.799,
309
  "step": 1000
310
+ },
311
+ {
312
+ "epoch": 4.182754981687255,
313
+ "grad_norm": 0.9160069227218628,
314
+ "learning_rate": 5.816733067729084e-05,
315
+ "loss": 1.0749,
316
+ "step": 1050
317
+ },
318
+ {
319
+ "epoch": 4.182754981687255,
320
+ "eval_loss": 1.1225874423980713,
321
+ "eval_runtime": 14.7411,
322
+ "eval_samples_per_second": 57.526,
323
+ "eval_steps_per_second": 28.763,
324
+ "step": 1050
325
+ },
326
+ {
327
+ "epoch": 4.381401700912534,
328
+ "grad_norm": 1.1243151426315308,
329
+ "learning_rate": 5.6175298804780876e-05,
330
+ "loss": 1.0462,
331
+ "step": 1100
332
+ },
333
+ {
334
+ "epoch": 4.381401700912534,
335
+ "eval_loss": 1.1159089803695679,
336
+ "eval_runtime": 14.5859,
337
+ "eval_samples_per_second": 58.138,
338
+ "eval_steps_per_second": 29.069,
339
+ "step": 1100
340
+ },
341
+ {
342
+ "epoch": 4.580048420137811,
343
+ "grad_norm": 1.018583059310913,
344
+ "learning_rate": 5.418326693227092e-05,
345
+ "loss": 1.052,
346
+ "step": 1150
347
+ },
348
+ {
349
+ "epoch": 4.580048420137811,
350
+ "eval_loss": 1.1180405616760254,
351
+ "eval_runtime": 14.8148,
352
+ "eval_samples_per_second": 57.24,
353
+ "eval_steps_per_second": 28.62,
354
+ "step": 1150
355
+ },
356
+ {
357
+ "epoch": 4.778695139363089,
358
+ "grad_norm": 0.9607245922088623,
359
+ "learning_rate": 5.219123505976096e-05,
360
+ "loss": 1.0432,
361
+ "step": 1200
362
+ },
363
+ {
364
+ "epoch": 4.778695139363089,
365
+ "eval_loss": 1.0965369939804077,
366
+ "eval_runtime": 14.6889,
367
+ "eval_samples_per_second": 57.731,
368
+ "eval_steps_per_second": 28.865,
369
+ "step": 1200
370
+ },
371
+ {
372
+ "epoch": 4.977341858588367,
373
+ "grad_norm": 1.06922447681427,
374
+ "learning_rate": 5.0199203187251e-05,
375
+ "loss": 1.0289,
376
+ "step": 1250
377
+ },
378
+ {
379
+ "epoch": 4.977341858588367,
380
+ "eval_loss": 1.1008275747299194,
381
+ "eval_runtime": 14.6242,
382
+ "eval_samples_per_second": 57.986,
383
+ "eval_steps_per_second": 28.993,
384
+ "step": 1250
385
  }
386
  ],
387
  "logging_steps": 50,
 
401
  "attributes": {}
402
  }
403
  },
404
+ "total_flos": 2.0033664206870938e+17,
405
  "train_batch_size": 2,
406
  "trial_name": null,
407
  "trial_params": null