FredericFan commited on
Commit
de11f93
·
verified ·
1 Parent(s): d0e18b6

Training in progress, step 24000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33122d71cdf7165052a6451b4b890be19eede04be5a6b00cb6a95a1eef2edf2f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9311e2550653c7cfff27cdfcd9ae1c1ccbbeaa1e9481e382a45fb1ada32568
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79ab7fc909db5811b643181df3c63513f79ba22ba389b4afec1eac2809371c99
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b0751212d7784f97747445314fe884f391f2159de9683d5358f326eecd2a4d1
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:909694fbb2ac6b6563f3c5bf7a09371dca2d67c279250546522e439c69924143
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17b897621ac09187e8d36f4eeef5f19583342b14705476fa86e2dbea16377ec
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84f776b40f88d7bc189cfa4b1f3b071349677635e65ce7ce8b5d9881aaebec8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5584b494a8ffe5156189ac2ab8d1dfe54f3dfebb02cc807c1073c45dc03931
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0815029963850975,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-22500",
4
- "epoch": 1.88,
5
  "eval_steps": 500,
6
- "global_step": 23500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3673,6 +3673,84 @@
3673
  "eval_samples_per_second": 22.703,
3674
  "eval_steps_per_second": 5.676,
3675
  "step": 23500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3676
  }
3677
  ],
3678
  "logging_steps": 50,
@@ -3692,7 +3770,7 @@
3692
  "attributes": {}
3693
  }
3694
  },
3695
- "total_flos": 5.724204171264e+16,
3696
  "train_batch_size": 4,
3697
  "trial_name": null,
3698
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08148019760847092,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-24000",
4
+ "epoch": 1.92,
5
  "eval_steps": 500,
6
+ "global_step": 24000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3673
  "eval_samples_per_second": 22.703,
3674
  "eval_steps_per_second": 5.676,
3675
  "step": 23500
3676
+ },
3677
+ {
3678
+ "epoch": 1.884,
3679
+ "grad_norm": 0.10899360477924347,
3680
+ "learning_rate": 1.7496e-06,
3681
+ "loss": 0.0558,
3682
+ "step": 23550
3683
+ },
3684
+ {
3685
+ "epoch": 1.888,
3686
+ "grad_norm": 0.09025511890649796,
3687
+ "learning_rate": 1.6896e-06,
3688
+ "loss": 0.0582,
3689
+ "step": 23600
3690
+ },
3691
+ {
3692
+ "epoch": 1.892,
3693
+ "grad_norm": 0.11647246032953262,
3694
+ "learning_rate": 1.6296e-06,
3695
+ "loss": 0.0518,
3696
+ "step": 23650
3697
+ },
3698
+ {
3699
+ "epoch": 1.896,
3700
+ "grad_norm": 0.15982107818126678,
3701
+ "learning_rate": 1.5696e-06,
3702
+ "loss": 0.0535,
3703
+ "step": 23700
3704
+ },
3705
+ {
3706
+ "epoch": 1.9,
3707
+ "grad_norm": 0.15808264911174774,
3708
+ "learning_rate": 1.5096e-06,
3709
+ "loss": 0.0557,
3710
+ "step": 23750
3711
+ },
3712
+ {
3713
+ "epoch": 1.904,
3714
+ "grad_norm": 0.12610608339309692,
3715
+ "learning_rate": 1.4496e-06,
3716
+ "loss": 0.0547,
3717
+ "step": 23800
3718
+ },
3719
+ {
3720
+ "epoch": 1.908,
3721
+ "grad_norm": 0.10674013942480087,
3722
+ "learning_rate": 1.3896e-06,
3723
+ "loss": 0.0506,
3724
+ "step": 23850
3725
+ },
3726
+ {
3727
+ "epoch": 1.912,
3728
+ "grad_norm": 0.15550707280635834,
3729
+ "learning_rate": 1.3296e-06,
3730
+ "loss": 0.0576,
3731
+ "step": 23900
3732
+ },
3733
+ {
3734
+ "epoch": 1.916,
3735
+ "grad_norm": 0.12588605284690857,
3736
+ "learning_rate": 1.2696e-06,
3737
+ "loss": 0.0566,
3738
+ "step": 23950
3739
+ },
3740
+ {
3741
+ "epoch": 1.92,
3742
+ "grad_norm": 0.1209346354007721,
3743
+ "learning_rate": 1.2096e-06,
3744
+ "loss": 0.0564,
3745
+ "step": 24000
3746
+ },
3747
+ {
3748
+ "epoch": 1.92,
3749
+ "eval_loss": 0.08148019760847092,
3750
+ "eval_runtime": 88.0412,
3751
+ "eval_samples_per_second": 22.717,
3752
+ "eval_steps_per_second": 5.679,
3753
+ "step": 24000
3754
  }
3755
  ],
3756
  "logging_steps": 50,
 
3770
  "attributes": {}
3771
  }
3772
  },
3773
+ "total_flos": 5.845995749376e+16,
3774
  "train_batch_size": 4,
3775
  "trial_name": null,
3776
  "trial_params": null