irishprancer commited on
Commit
e9295f7
·
verified ·
1 Parent(s): 7e28e13

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4376c34886456cb2899c7f6beeadbdd0369c3eb2c8cb577a4c24d977132e673
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5294f50c0baae12880a019af7aff602de0021ca7f152917dde18bdf22ba93937
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5822d4454f023f2e758e0371b888496dd2a0767e11534488d6f9c822d136956
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7e62ea9f776c4c1909b99bd4b2be719e11c652940cf5f69aa1e349ee14b1fc
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41879b6be867dc93cfdfe9c095241e1809219ed8cb44d663d4b21de0f8b8f617
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ddf8abb9ec9ea656e462efff79374386d297b7f7fbb9fe2e12d28f7e1de152
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:040a1b5835b5527454d21946072fd417fbebea1df191d3f188daffdb17ff5507
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b1ff2305e39132563133ecd5a3bc22eb8aea6f062c680ce8b5a9a3f47c0580
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 123.91304347826087,
5
  "eval_steps": 150,
6
- "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2762,6 +2762,151 @@
2762
  "EMA_steps_per_second": 19.037,
2763
  "epoch": 123.91304347826087,
2764
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2765
  }
2766
  ],
2767
  "logging_steps": 10,
@@ -2781,7 +2926,7 @@
2781
  "attributes": {}
2782
  }
2783
  },
2784
- "total_flos": 7.337359146420634e+16,
2785
  "train_batch_size": 4,
2786
  "trial_name": null,
2787
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 130.43478260869566,
5
  "eval_steps": 150,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2762
  "EMA_steps_per_second": 19.037,
2763
  "epoch": 123.91304347826087,
2764
  "step": 2850
2765
+ },
2766
+ {
2767
+ "epoch": 124.34782608695652,
2768
+ "grad_norm": 1.6300952434539795,
2769
+ "learning_rate": 3.910072953123827e-06,
2770
+ "loss": 0.2256,
2771
+ "step": 2860
2772
+ },
2773
+ {
2774
+ "epoch": 124.78260869565217,
2775
+ "grad_norm": 1.5945820808410645,
2776
+ "learning_rate": 3.910067916178865e-06,
2777
+ "loss": 0.2304,
2778
+ "step": 2870
2779
+ },
2780
+ {
2781
+ "epoch": 125.21739130434783,
2782
+ "grad_norm": 2.0118942260742188,
2783
+ "learning_rate": 3.9100621043246675e-06,
2784
+ "loss": 0.2693,
2785
+ "step": 2880
2786
+ },
2787
+ {
2788
+ "epoch": 125.65217391304348,
2789
+ "grad_norm": 2.1449036598205566,
2790
+ "learning_rate": 3.910055517563539e-06,
2791
+ "loss": 0.2454,
2792
+ "step": 2890
2793
+ },
2794
+ {
2795
+ "epoch": 126.08695652173913,
2796
+ "grad_norm": 2.3814568519592285,
2797
+ "learning_rate": 3.9100481558980905e-06,
2798
+ "loss": 0.2517,
2799
+ "step": 2900
2800
+ },
2801
+ {
2802
+ "epoch": 126.52173913043478,
2803
+ "grad_norm": 1.680646300315857,
2804
+ "learning_rate": 3.91004001933124e-06,
2805
+ "loss": 0.2023,
2806
+ "step": 2910
2807
+ },
2808
+ {
2809
+ "epoch": 126.95652173913044,
2810
+ "grad_norm": 1.567590355873108,
2811
+ "learning_rate": 3.9100311078662124e-06,
2812
+ "loss": 0.2903,
2813
+ "step": 2920
2814
+ },
2815
+ {
2816
+ "epoch": 127.3913043478261,
2817
+ "grad_norm": 2.0478575229644775,
2818
+ "learning_rate": 3.9100214215065405e-06,
2819
+ "loss": 0.2554,
2820
+ "step": 2930
2821
+ },
2822
+ {
2823
+ "epoch": 127.82608695652173,
2824
+ "grad_norm": 2.724403142929077,
2825
+ "learning_rate": 3.910010960256062e-06,
2826
+ "loss": 0.2195,
2827
+ "step": 2940
2828
+ },
2829
+ {
2830
+ "epoch": 128.2608695652174,
2831
+ "grad_norm": 2.3156094551086426,
2832
+ "learning_rate": 3.909999724118925e-06,
2833
+ "loss": 0.2952,
2834
+ "step": 2950
2835
+ },
2836
+ {
2837
+ "epoch": 128.69565217391303,
2838
+ "grad_norm": 1.6324609518051147,
2839
+ "learning_rate": 3.909987713099583e-06,
2840
+ "loss": 0.2409,
2841
+ "step": 2960
2842
+ },
2843
+ {
2844
+ "epoch": 129.1304347826087,
2845
+ "grad_norm": 1.66539466381073,
2846
+ "learning_rate": 3.909974927202796e-06,
2847
+ "loss": 0.2029,
2848
+ "step": 2970
2849
+ },
2850
+ {
2851
+ "epoch": 129.56521739130434,
2852
+ "grad_norm": 1.877989649772644,
2853
+ "learning_rate": 3.909961366433632e-06,
2854
+ "loss": 0.2407,
2855
+ "step": 2980
2856
+ },
2857
+ {
2858
+ "epoch": 130.0,
2859
+ "grad_norm": 5.461711406707764,
2860
+ "learning_rate": 3.909947030797467e-06,
2861
+ "loss": 0.2466,
2862
+ "step": 2990
2863
+ },
2864
+ {
2865
+ "epoch": 130.43478260869566,
2866
+ "grad_norm": 2.4120867252349854,
2867
+ "learning_rate": 3.909931920299982e-06,
2868
+ "loss": 0.2372,
2869
+ "step": 3000
2870
+ },
2871
+ {
2872
+ "epoch": 130.43478260869566,
2873
+ "eval_loss": 0.9418841600418091,
2874
+ "eval_runtime": 0.4153,
2875
+ "eval_samples_per_second": 24.08,
2876
+ "eval_steps_per_second": 24.08,
2877
+ "step": 3000
2878
+ },
2879
+ {
2880
+ "Start_State_loss": 0.861186683177948,
2881
+ "Start_State_runtime": 0.4056,
2882
+ "Start_State_samples_per_second": 24.655,
2883
+ "Start_State_steps_per_second": 24.655,
2884
+ "epoch": 130.43478260869566,
2885
+ "step": 3000
2886
+ },
2887
+ {
2888
+ "Raw_Model_loss": 0.9418841600418091,
2889
+ "Raw_Model_runtime": 0.4028,
2890
+ "Raw_Model_samples_per_second": 24.829,
2891
+ "Raw_Model_steps_per_second": 24.829,
2892
+ "epoch": 130.43478260869566,
2893
+ "step": 3000
2894
+ },
2895
+ {
2896
+ "SWA_loss": 0.7738855481147766,
2897
+ "SWA_runtime": 0.4063,
2898
+ "SWA_samples_per_second": 24.613,
2899
+ "SWA_steps_per_second": 24.613,
2900
+ "epoch": 130.43478260869566,
2901
+ "step": 3000
2902
+ },
2903
+ {
2904
+ "EMA_loss": 0.8603588938713074,
2905
+ "EMA_runtime": 0.4125,
2906
+ "EMA_samples_per_second": 24.244,
2907
+ "EMA_steps_per_second": 24.244,
2908
+ "epoch": 130.43478260869566,
2909
+ "step": 3000
2910
  }
2911
  ],
2912
  "logging_steps": 10,
 
2926
  "attributes": {}
2927
  }
2928
  },
2929
+ "total_flos": 7.725240940312166e+16,
2930
  "train_batch_size": 4,
2931
  "trial_name": null,
2932
  "trial_params": null