FredericFan commited on
Commit
c9f982c
·
verified ·
1 Parent(s): 4becdb9

Training in progress, step 23500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40edcfcd3eadd3934df8237241fafe734256be703a0e3946a4e6ee14e6bf5a22
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33122d71cdf7165052a6451b4b890be19eede04be5a6b00cb6a95a1eef2edf2f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d296490c405857ea96e4fbe1deae357928a09b842487dd9e87ba7f64056b04e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ab7fc909db5811b643181df3c63513f79ba22ba389b4afec1eac2809371c99
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5df9c2ed680cbe1eadef613230eab99b9589effb616e86f824a6c4d292f402
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909694fbb2ac6b6563f3c5bf7a09371dca2d67c279250546522e439c69924143
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dcf2c6dc0ad63a4d7cbede289213f75c3cbabb91c5c15238e6438903f0efdfd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84f776b40f88d7bc189cfa4b1f3b071349677635e65ce7ce8b5d9881aaebec8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0815029963850975,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-22500",
4
- "epoch": 1.8399999999999999,
5
  "eval_steps": 500,
6
- "global_step": 23000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3595,6 +3595,84 @@
3595
  "eval_samples_per_second": 22.708,
3596
  "eval_steps_per_second": 5.677,
3597
  "step": 23000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3598
  }
3599
  ],
3600
  "logging_steps": 50,
@@ -3614,7 +3692,7 @@
3614
  "attributes": {}
3615
  }
3616
  },
3617
- "total_flos": 5.602412593152e+16,
3618
  "train_batch_size": 4,
3619
  "trial_name": null,
3620
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0815029963850975,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-22500",
4
+ "epoch": 1.88,
5
  "eval_steps": 500,
6
+ "global_step": 23500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3595
  "eval_samples_per_second": 22.708,
3596
  "eval_steps_per_second": 5.677,
3597
  "step": 23000
3598
+ },
3599
+ {
3600
+ "epoch": 1.8439999999999999,
3601
+ "grad_norm": 0.2026013433933258,
3602
+ "learning_rate": 2.3496e-06,
3603
+ "loss": 0.0526,
3604
+ "step": 23050
3605
+ },
3606
+ {
3607
+ "epoch": 1.8479999999999999,
3608
+ "grad_norm": 0.1372475028038025,
3609
+ "learning_rate": 2.2896e-06,
3610
+ "loss": 0.057,
3611
+ "step": 23100
3612
+ },
3613
+ {
3614
+ "epoch": 1.8519999999999999,
3615
+ "grad_norm": 0.13629719614982605,
3616
+ "learning_rate": 2.2296e-06,
3617
+ "loss": 0.0576,
3618
+ "step": 23150
3619
+ },
3620
+ {
3621
+ "epoch": 1.8559999999999999,
3622
+ "grad_norm": 0.2135656774044037,
3623
+ "learning_rate": 2.1696e-06,
3624
+ "loss": 0.0571,
3625
+ "step": 23200
3626
+ },
3627
+ {
3628
+ "epoch": 1.8599999999999999,
3629
+ "grad_norm": 0.1738504022359848,
3630
+ "learning_rate": 2.1095999999999997e-06,
3631
+ "loss": 0.054,
3632
+ "step": 23250
3633
+ },
3634
+ {
3635
+ "epoch": 1.8639999999999999,
3636
+ "grad_norm": 0.17974117398262024,
3637
+ "learning_rate": 2.0496000000000002e-06,
3638
+ "loss": 0.0551,
3639
+ "step": 23300
3640
+ },
3641
+ {
3642
+ "epoch": 1.8679999999999999,
3643
+ "grad_norm": 0.16323037445545197,
3644
+ "learning_rate": 1.9896e-06,
3645
+ "loss": 0.0515,
3646
+ "step": 23350
3647
+ },
3648
+ {
3649
+ "epoch": 1.8719999999999999,
3650
+ "grad_norm": 0.11057537794113159,
3651
+ "learning_rate": 1.9296e-06,
3652
+ "loss": 0.0564,
3653
+ "step": 23400
3654
+ },
3655
+ {
3656
+ "epoch": 1.876,
3657
+ "grad_norm": 0.1545117348432541,
3658
+ "learning_rate": 1.8696e-06,
3659
+ "loss": 0.0508,
3660
+ "step": 23450
3661
+ },
3662
+ {
3663
+ "epoch": 1.88,
3664
+ "grad_norm": 0.25704920291900635,
3665
+ "learning_rate": 1.8096e-06,
3666
+ "loss": 0.0521,
3667
+ "step": 23500
3668
+ },
3669
+ {
3670
+ "epoch": 1.88,
3671
+ "eval_loss": 0.08151672035455704,
3672
+ "eval_runtime": 88.0934,
3673
+ "eval_samples_per_second": 22.703,
3674
+ "eval_steps_per_second": 5.676,
3675
+ "step": 23500
3676
  }
3677
  ],
3678
  "logging_steps": 50,
 
3692
  "attributes": {}
3693
  }
3694
  },
3695
+ "total_flos": 5.724204171264e+16,
3696
  "train_batch_size": 4,
3697
  "trial_name": null,
3698
  "trial_params": null