mgh6 commited on
Commit
8cc9e10
·
verified ·
1 Parent(s): c5e486a

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4eb42c1b016e02049b8915fc6c4bd8e40713ecb1bf12ad42b9d3e8939f0a8b73
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c465399234deee9d04960eeb7dcc2195d9e15c74536a227ceb6fdca4e20454c
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77d07cd9696454e9c441a6f24d914750ff9ecd0f2d8ba9601295dcb1152b2704
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0b1fa3f35f2a0e7d8a2627c44dca4f4d2cad1ccbe1376157aa9063a6d9e5f3
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a9ca8e9630b65f3d22543e10ee333ae6f602385a9988471ec0251618f4f6a6
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2f5546fc83506789b1d0adb53d6b6a28f107b064c7f808a5d419eeca20b8cf
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:982ed52dbedaa8b58e4675dd9e14089cf4920fc60a888f8c18ae8fefed68e39a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f472c819beed817ac9f811e783cb3b4e6768b75b8d75dac6c446449f746786
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.997206530510894,
5
  "eval_steps": 50,
6
- "global_step": 1757,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -532,6 +532,81 @@
532
  "eval_samples_per_second": 31.408,
533
  "eval_steps_per_second": 15.704,
534
  "step": 1750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  }
536
  ],
537
  "logging_steps": 50,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.997206530510894,
5
  "eval_steps": 50,
6
+ "global_step": 2008,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
532
  "eval_samples_per_second": 31.408,
533
  "eval_steps_per_second": 15.704,
534
  "step": 1750
535
+ },
536
+ {
537
+ "epoch": 7.1708361785337384,
538
+ "grad_norm": 0.16648398339748383,
539
+ "learning_rate": 2.8286852589641438e-05,
540
+ "loss": 0.0147,
541
+ "step": 1800
542
+ },
543
+ {
544
+ "epoch": 7.1708361785337384,
545
+ "eval_loss": 0.049295973032712936,
546
+ "eval_runtime": 26.9925,
547
+ "eval_samples_per_second": 31.416,
548
+ "eval_steps_per_second": 15.708,
549
+ "step": 1800
550
+ },
551
+ {
552
+ "epoch": 7.369482897759017,
553
+ "grad_norm": 0.18032623827457428,
554
+ "learning_rate": 2.6294820717131475e-05,
555
+ "loss": 0.0137,
556
+ "step": 1850
557
+ },
558
+ {
559
+ "epoch": 7.369482897759017,
560
+ "eval_loss": 0.04836704209446907,
561
+ "eval_runtime": 27.0294,
562
+ "eval_samples_per_second": 31.373,
563
+ "eval_steps_per_second": 15.687,
564
+ "step": 1850
565
+ },
566
+ {
567
+ "epoch": 7.568129616984295,
568
+ "grad_norm": 0.15208186209201813,
569
+ "learning_rate": 2.4302788844621517e-05,
570
+ "loss": 0.0136,
571
+ "step": 1900
572
+ },
573
+ {
574
+ "epoch": 7.568129616984295,
575
+ "eval_loss": 0.04785418510437012,
576
+ "eval_runtime": 27.0012,
577
+ "eval_samples_per_second": 31.406,
578
+ "eval_steps_per_second": 15.703,
579
+ "step": 1900
580
+ },
581
+ {
582
+ "epoch": 7.766776336209572,
583
+ "grad_norm": 0.14509941637516022,
584
+ "learning_rate": 2.2310756972111554e-05,
585
+ "loss": 0.0139,
586
+ "step": 1950
587
+ },
588
+ {
589
+ "epoch": 7.766776336209572,
590
+ "eval_loss": 0.04705703631043434,
591
+ "eval_runtime": 27.0106,
592
+ "eval_samples_per_second": 31.395,
593
+ "eval_steps_per_second": 15.698,
594
+ "step": 1950
595
+ },
596
+ {
597
+ "epoch": 7.96542305543485,
598
+ "grad_norm": 0.14901742339134216,
599
+ "learning_rate": 2.0318725099601595e-05,
600
+ "loss": 0.0131,
601
+ "step": 2000
602
+ },
603
+ {
604
+ "epoch": 7.96542305543485,
605
+ "eval_loss": 0.04676016792654991,
606
+ "eval_runtime": 27.0142,
607
+ "eval_samples_per_second": 31.391,
608
+ "eval_steps_per_second": 15.695,
609
+ "step": 2000
610
  }
611
  ],
612
  "logging_steps": 50,