FredericFan commited on
Commit
c585e44
·
verified ·
1 Parent(s): 42a5bf6

Training in progress, step 24500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f9311e2550653c7cfff27cdfcd9ae1c1ccbbeaa1e9481e382a45fb1ada32568
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24784d7d4d1fc3fbb8fc67b836bb1c712f3f7b0c64c99ff1439b5fe2a051a5b
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b0751212d7784f97747445314fe884f391f2159de9683d5358f326eecd2a4d1
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:970bf634ac4ebe2edc929d89a76708d0db104aba94f7c07086ba7355e47fc214
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17b897621ac09187e8d36f4eeef5f19583342b14705476fa86e2dbea16377ec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c4e82da06fbd2a474e3defc7564dd624a46c9abb731c268ce51609d77b1972
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed5584b494a8ffe5156189ac2ab8d1dfe54f3dfebb02cc807c1073c45dc03931
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a72d6c77b06746b972f475e80b7808db62786b79e89637fe4fa97684c444ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08148019760847092,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-24000",
4
- "epoch": 1.92,
5
  "eval_steps": 500,
6
- "global_step": 24000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3751,6 +3751,84 @@
3751
  "eval_samples_per_second": 22.717,
3752
  "eval_steps_per_second": 5.679,
3753
  "step": 24000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3754
  }
3755
  ],
3756
  "logging_steps": 50,
@@ -3770,7 +3848,7 @@
3770
  "attributes": {}
3771
  }
3772
  },
3773
- "total_flos": 5.845995749376e+16,
3774
  "train_batch_size": 4,
3775
  "trial_name": null,
3776
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08147666603326797,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-24500",
4
+ "epoch": 1.96,
5
  "eval_steps": 500,
6
+ "global_step": 24500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3751
  "eval_samples_per_second": 22.717,
3752
  "eval_steps_per_second": 5.679,
3753
  "step": 24000
3754
+ },
3755
+ {
3756
+ "epoch": 1.924,
3757
+ "grad_norm": 0.18958355486392975,
3758
+ "learning_rate": 1.1496e-06,
3759
+ "loss": 0.054,
3760
+ "step": 24050
3761
+ },
3762
+ {
3763
+ "epoch": 1.928,
3764
+ "grad_norm": 0.12351219356060028,
3765
+ "learning_rate": 1.0896e-06,
3766
+ "loss": 0.0614,
3767
+ "step": 24100
3768
+ },
3769
+ {
3770
+ "epoch": 1.932,
3771
+ "grad_norm": 0.175857812166214,
3772
+ "learning_rate": 1.0296e-06,
3773
+ "loss": 0.0541,
3774
+ "step": 24150
3775
+ },
3776
+ {
3777
+ "epoch": 1.936,
3778
+ "grad_norm": 0.15968987345695496,
3779
+ "learning_rate": 9.696e-07,
3780
+ "loss": 0.0568,
3781
+ "step": 24200
3782
+ },
3783
+ {
3784
+ "epoch": 1.94,
3785
+ "grad_norm": 0.15781116485595703,
3786
+ "learning_rate": 9.096e-07,
3787
+ "loss": 0.055,
3788
+ "step": 24250
3789
+ },
3790
+ {
3791
+ "epoch": 1.944,
3792
+ "grad_norm": 0.17276370525360107,
3793
+ "learning_rate": 8.496000000000001e-07,
3794
+ "loss": 0.0475,
3795
+ "step": 24300
3796
+ },
3797
+ {
3798
+ "epoch": 1.948,
3799
+ "grad_norm": 0.09511862695217133,
3800
+ "learning_rate": 7.896e-07,
3801
+ "loss": 0.0509,
3802
+ "step": 24350
3803
+ },
3804
+ {
3805
+ "epoch": 1.952,
3806
+ "grad_norm": 0.1636885106563568,
3807
+ "learning_rate": 7.296000000000001e-07,
3808
+ "loss": 0.0508,
3809
+ "step": 24400
3810
+ },
3811
+ {
3812
+ "epoch": 1.956,
3813
+ "grad_norm": 0.12255977094173431,
3814
+ "learning_rate": 6.696e-07,
3815
+ "loss": 0.0467,
3816
+ "step": 24450
3817
+ },
3818
+ {
3819
+ "epoch": 1.96,
3820
+ "grad_norm": 0.18492284417152405,
3821
+ "learning_rate": 6.096000000000001e-07,
3822
+ "loss": 0.0559,
3823
+ "step": 24500
3824
+ },
3825
+ {
3826
+ "epoch": 1.96,
3827
+ "eval_loss": 0.08147666603326797,
3828
+ "eval_runtime": 88.0584,
3829
+ "eval_samples_per_second": 22.712,
3830
+ "eval_steps_per_second": 5.678,
3831
+ "step": 24500
3832
  }
3833
  ],
3834
  "logging_steps": 50,
 
3848
  "attributes": {}
3849
  }
3850
  },
3851
+ "total_flos": 5.967787327488e+16,
3852
  "train_batch_size": 4,
3853
  "trial_name": null,
3854
  "trial_params": null