FormlessAI commited on
Commit
f74e217
·
verified ·
1 Parent(s): 23e2d76

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47ce334ce586d5969fcc92ad055145c2069056b6d300ae0df72d77853f0a6dfb
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4daa9130e6c255b2708c8eee5cdb609ab30d4959bd189609fe9f12b19d05f404
3
  size 98088784
last-checkpoint/global_step1850/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab640d4c517058d3bff2089f16221d5ac8ea2ee6327558a4587682a167e1ba74
3
+ size 73939813
last-checkpoint/global_step1850/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab913468e5c7b3f94a7d54a3d79dedb2828f5e51fb51d00340ec966df8e3a10
3
+ size 73939813
last-checkpoint/global_step1850/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6c6f4f3f2baa35c260070f5e4382945067550e6b15387b10d7031e28ec1a4f
3
+ size 73939877
last-checkpoint/global_step1850/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b17923443f7c2450294b22e6453eaa42bd4f594a10f85faca7ca0a52008bba
3
+ size 73939877
last-checkpoint/global_step1850/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc69f9a9c13d36cd4e387253365067ad4c94d05c2f1ab3d46f9d518a945ea9f
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1800
 
1
+ global_step1850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b850ad212988a4d49d9c4427abfa613e8ffe0930c3ecf275d0e2377b1a11b1c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:455fcae85aeb76faf352e118d0cf253d10c3d29e2f06cfabbb29fbc8f2f7a554
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1940fc3acaec9d2a5c3b13202e460ed831210d280838849d74c4234bff50ad5d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9ba995c6c6ed132c3f4cb1aeaa57d7a5ba11efd8d0082b03fc43d3bd0a2608
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81caecdb1ea7c261367f48c8f13249e5021ec052117f8d4cd1a49788142e2611
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2d4bee624a68686a64406afb400311befecc0c655d3991ddead24890d1c4bf2
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f30d850e334cad1d00d1fb183aaa1672201ec2c64352b5245f5d9d2995b7e14f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914a4da0db40a07c1e19adf6a7a7212df61a63b6794b176a431ab9c0064c159b
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f60b7ff93077f3e35712903f61b459e10cbddd1013deed93b7016d5bdd9581df
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85498594364033313bc32ee2cb2af811174fdf184366c917daf31ca0273bcade
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6580623388290405,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.764237599510104,
6
  "eval_steps": 50,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2816,6 +2816,84 @@
2816
  "eval_samples_per_second": 126.748,
2817
  "eval_steps_per_second": 15.852,
2818
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2819
  }
2820
  ],
2821
  "logging_steps": 5,
@@ -2844,7 +2922,7 @@
2844
  "attributes": {}
2845
  }
2846
  },
2847
- "total_flos": 9.295908283791442e+17,
2848
  "train_batch_size": 2,
2849
  "trial_name": null,
2850
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6555055379867554,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.8132271892222902,
6
  "eval_steps": 50,
7
+ "global_step": 1850,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2816
  "eval_samples_per_second": 126.748,
2817
  "eval_steps_per_second": 15.852,
2818
  "step": 1800
2819
+ },
2820
+ {
2821
+ "epoch": 1.7691365584813226,
2822
+ "grad_norm": 0.19532938301563263,
2823
+ "learning_rate": 8.089658405219626e-05,
2824
+ "loss": 0.6867,
2825
+ "step": 1805
2826
+ },
2827
+ {
2828
+ "epoch": 1.7740355174525413,
2829
+ "grad_norm": 0.25039657950401306,
2830
+ "learning_rate": 8.073879040381839e-05,
2831
+ "loss": 0.7027,
2832
+ "step": 1810
2833
+ },
2834
+ {
2835
+ "epoch": 1.77893447642376,
2836
+ "grad_norm": 0.22185583412647247,
2837
+ "learning_rate": 8.058073634617543e-05,
2838
+ "loss": 0.6872,
2839
+ "step": 1815
2840
+ },
2841
+ {
2842
+ "epoch": 1.7838334353949785,
2843
+ "grad_norm": 0.20697511732578278,
2844
+ "learning_rate": 8.042242350351735e-05,
2845
+ "loss": 0.669,
2846
+ "step": 1820
2847
+ },
2848
+ {
2849
+ "epoch": 1.788732394366197,
2850
+ "grad_norm": 0.2226262092590332,
2851
+ "learning_rate": 8.026385350275358e-05,
2852
+ "loss": 0.6903,
2853
+ "step": 1825
2854
+ },
2855
+ {
2856
+ "epoch": 1.7936313533374157,
2857
+ "grad_norm": 0.20571239292621613,
2858
+ "learning_rate": 8.01050279734362e-05,
2859
+ "loss": 0.6821,
2860
+ "step": 1830
2861
+ },
2862
+ {
2863
+ "epoch": 1.7985303123086345,
2864
+ "grad_norm": 0.22728270292282104,
2865
+ "learning_rate": 7.994594854774328e-05,
2866
+ "loss": 0.6874,
2867
+ "step": 1835
2868
+ },
2869
+ {
2870
+ "epoch": 1.8034292712798532,
2871
+ "grad_norm": 0.24331030249595642,
2872
+ "learning_rate": 7.978661686046204e-05,
2873
+ "loss": 0.6927,
2874
+ "step": 1840
2875
+ },
2876
+ {
2877
+ "epoch": 1.8083282302510717,
2878
+ "grad_norm": 0.19782117009162903,
2879
+ "learning_rate": 7.962703454897207e-05,
2880
+ "loss": 0.6672,
2881
+ "step": 1845
2882
+ },
2883
+ {
2884
+ "epoch": 1.8132271892222902,
2885
+ "grad_norm": 0.20845647156238556,
2886
+ "learning_rate": 7.946720325322857e-05,
2887
+ "loss": 0.6874,
2888
+ "step": 1850
2889
+ },
2890
+ {
2891
+ "epoch": 1.8132271892222902,
2892
+ "eval_loss": 0.6555055379867554,
2893
+ "eval_runtime": 15.6124,
2894
+ "eval_samples_per_second": 125.477,
2895
+ "eval_steps_per_second": 15.693,
2896
+ "step": 1850
2897
  }
2898
  ],
2899
  "logging_steps": 5,
 
2922
  "attributes": {}
2923
  }
2924
  },
2925
+ "total_flos": 9.54956406476243e+17,
2926
  "train_batch_size": 2,
2927
  "trial_name": null,
2928
  "trial_params": null