FormlessAI commited on
Commit
5f2f232
·
verified ·
1 Parent(s): cf4a55d

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc92bd3c7c2ca398ec261883c447c9df1bbe0c7faf56df13890e4c24774b40a
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0217b4b0c3c7f1987944f70686eb3cc84294e0febf0ed767a56782cb9017db42
3
  size 1037269336
last-checkpoint/global_step4400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208f3f993987f330acb84602114113a61e43ebe3d5eb09c047705e04b4dea90b
3
+ size 781993445
last-checkpoint/global_step4400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909cd42d1608b5fe7874afb79b6fbdaf4ca93180010eca90c2478a5b0460e210
3
+ size 781993509
last-checkpoint/global_step4400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ac4ee0863674394e18bf040c39367c44210bf27eb718840c9e014a8198505b
3
+ size 781993509
last-checkpoint/global_step4400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5edea25fbe4b088f32fadb2bae52dbcec4468d967cb0d3a0fb41d535943734f6
3
+ size 781993509
last-checkpoint/global_step4400/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051d7c061bd63c18a72c7f60192548c38149641e49dddb376a120d33da3567ef
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4300
 
1
+ global_step4400
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2787b9d7df68f644a04ffaa126617b9e91d8a6c7b3386a4c36cb31d2d718186
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec6429d51b78e62a781ea28a18634f451844f66fee400b9be20b2072a6fac5e
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62176e5f78a575ab92c8c666cd0da6a92c60aa8b7f4b466b59fbd2373ac03cc5
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3e04848cc38a3a002981db4be3e84294dc9e5c12327b6e3c23b02534523094
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a72bd08ae02b37b5f365349b001d2ca8c1e0ece9d48f4a163966302d5865a11
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5a84f5b27ded1de3f5ceb77963092ac6c45b3bb6acfbc406627cbc633009a1
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042f18db2e77e47eff46b1db2a3a2e488c3c080aa38d222800a2f6949e0f032d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b169b571920e7c4ea3cfadebde4b2c5412429683ec1e5c89095379be2aeec0
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c2c30e8f45c0287eaccdddcf23f57d326e170a7468c21d85826984fbe28cf30
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f6239c54a9e14ade75dd1dbb72d423d68c7c1273e9d5fb21d6effe590197848
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.9058680534362793,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6250908562291031,
6
  "eval_steps": 50,
7
- "global_step": 4300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6716,6 +6716,162 @@
6716
  "eval_samples_per_second": 175.036,
6717
  "eval_steps_per_second": 10.976,
6718
  "step": 4300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6719
  }
6720
  ],
6721
  "logging_steps": 5,
@@ -6744,7 +6900,7 @@
6744
  "attributes": {}
6745
  }
6746
  },
6747
- "total_flos": 1.1212623171205202e+18,
6748
  "train_batch_size": 4,
6749
  "trial_name": null,
6750
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.9043115377426147,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6396278528855939,
6
  "eval_steps": 50,
7
+ "global_step": 4400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6716
  "eval_samples_per_second": 175.036,
6717
  "eval_steps_per_second": 10.976,
6718
  "step": 4300
6719
+ },
6720
+ {
6721
+ "epoch": 0.6258177060619277,
6722
+ "grad_norm": 2.382782220840454,
6723
+ "learning_rate": 6.184769707241625e-05,
6724
+ "loss": 1.9673,
6725
+ "step": 4305
6726
+ },
6727
+ {
6728
+ "epoch": 0.6265445558947521,
6729
+ "grad_norm": 2.4369523525238037,
6730
+ "learning_rate": 6.177041004251455e-05,
6731
+ "loss": 2.2144,
6732
+ "step": 4310
6733
+ },
6734
+ {
6735
+ "epoch": 0.6272714057275767,
6736
+ "grad_norm": 2.43398380279541,
6737
+ "learning_rate": 6.16930936282599e-05,
6738
+ "loss": 2.0025,
6739
+ "step": 4315
6740
+ },
6741
+ {
6742
+ "epoch": 0.6279982555604012,
6743
+ "grad_norm": 2.472754955291748,
6744
+ "learning_rate": 6.161574802429627e-05,
6745
+ "loss": 2.1328,
6746
+ "step": 4320
6747
+ },
6748
+ {
6749
+ "epoch": 0.6287251053932258,
6750
+ "grad_norm": 2.6764614582061768,
6751
+ "learning_rate": 6.153837342534111e-05,
6752
+ "loss": 2.1554,
6753
+ "step": 4325
6754
+ },
6755
+ {
6756
+ "epoch": 0.6294519552260504,
6757
+ "grad_norm": 2.3212342262268066,
6758
+ "learning_rate": 6.146097002618492e-05,
6759
+ "loss": 2.1615,
6760
+ "step": 4330
6761
+ },
6762
+ {
6763
+ "epoch": 0.6301788050588748,
6764
+ "grad_norm": 2.824336290359497,
6765
+ "learning_rate": 6.138353802169061e-05,
6766
+ "loss": 2.0653,
6767
+ "step": 4335
6768
+ },
6769
+ {
6770
+ "epoch": 0.6309056548916994,
6771
+ "grad_norm": 2.4014430046081543,
6772
+ "learning_rate": 6.130607760679321e-05,
6773
+ "loss": 2.0374,
6774
+ "step": 4340
6775
+ },
6776
+ {
6777
+ "epoch": 0.6316325047245239,
6778
+ "grad_norm": 2.458951950073242,
6779
+ "learning_rate": 6.122858897649921e-05,
6780
+ "loss": 2.1722,
6781
+ "step": 4345
6782
+ },
6783
+ {
6784
+ "epoch": 0.6323593545573485,
6785
+ "grad_norm": 2.567749500274658,
6786
+ "learning_rate": 6.115107232588612e-05,
6787
+ "loss": 2.1671,
6788
+ "step": 4350
6789
+ },
6790
+ {
6791
+ "epoch": 0.6323593545573485,
6792
+ "eval_loss": 1.9125865697860718,
6793
+ "eval_runtime": 22.1706,
6794
+ "eval_samples_per_second": 148.891,
6795
+ "eval_steps_per_second": 9.337,
6796
+ "step": 4350
6797
+ },
6798
+ {
6799
+ "epoch": 0.6330862043901729,
6800
+ "grad_norm": 2.322906255722046,
6801
+ "learning_rate": 6.107352785010202e-05,
6802
+ "loss": 2.1378,
6803
+ "step": 4355
6804
+ },
6805
+ {
6806
+ "epoch": 0.6338130542229975,
6807
+ "grad_norm": 2.1527748107910156,
6808
+ "learning_rate": 6.0995955744365073e-05,
6809
+ "loss": 2.0096,
6810
+ "step": 4360
6811
+ },
6812
+ {
6813
+ "epoch": 0.6345399040558221,
6814
+ "grad_norm": 2.6586174964904785,
6815
+ "learning_rate": 6.0918356203962934e-05,
6816
+ "loss": 2.2011,
6817
+ "step": 4365
6818
+ },
6819
+ {
6820
+ "epoch": 0.6352667538886466,
6821
+ "grad_norm": 2.559743642807007,
6822
+ "learning_rate": 6.084072942425234e-05,
6823
+ "loss": 2.0937,
6824
+ "step": 4370
6825
+ },
6826
+ {
6827
+ "epoch": 0.6359936037214712,
6828
+ "grad_norm": 2.8032941818237305,
6829
+ "learning_rate": 6.076307560065865e-05,
6830
+ "loss": 1.971,
6831
+ "step": 4375
6832
+ },
6833
+ {
6834
+ "epoch": 0.6367204535542956,
6835
+ "grad_norm": 2.3299427032470703,
6836
+ "learning_rate": 6.068539492867526e-05,
6837
+ "loss": 2.0369,
6838
+ "step": 4380
6839
+ },
6840
+ {
6841
+ "epoch": 0.6374473033871202,
6842
+ "grad_norm": 2.167146682739258,
6843
+ "learning_rate": 6.0607687603863155e-05,
6844
+ "loss": 1.9857,
6845
+ "step": 4385
6846
+ },
6847
+ {
6848
+ "epoch": 0.6381741532199448,
6849
+ "grad_norm": 2.151320219039917,
6850
+ "learning_rate": 6.052995382185044e-05,
6851
+ "loss": 2.1305,
6852
+ "step": 4390
6853
+ },
6854
+ {
6855
+ "epoch": 0.6389010030527693,
6856
+ "grad_norm": 2.5785205364227295,
6857
+ "learning_rate": 6.045219377833183e-05,
6858
+ "loss": 1.8801,
6859
+ "step": 4395
6860
+ },
6861
+ {
6862
+ "epoch": 0.6396278528855939,
6863
+ "grad_norm": 2.6063733100891113,
6864
+ "learning_rate": 6.037440766906813e-05,
6865
+ "loss": 1.8297,
6866
+ "step": 4400
6867
+ },
6868
+ {
6869
+ "epoch": 0.6396278528855939,
6870
+ "eval_loss": 1.9043115377426147,
6871
+ "eval_runtime": 18.9211,
6872
+ "eval_samples_per_second": 174.461,
6873
+ "eval_steps_per_second": 10.94,
6874
+ "step": 4400
6875
  }
6876
  ],
6877
  "logging_steps": 5,
 
6900
  "attributes": {}
6901
  }
6902
  },
6903
+ "total_flos": 1.1471040004625531e+18,
6904
  "train_batch_size": 4,
6905
  "trial_name": null,
6906
  "trial_params": null