FormlessAI commited on
Commit
5565801
·
verified ·
1 Parent(s): 2308dc7

Training in progress, step 370, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d49df9cdd2b8a79cd0cb0e488aea9519a1376033fb61e113366981215594b7f
3
  size 838922248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f547452116a91ced741817d0158e20a2639a40c36dbdb9597bab5096bfeede
3
  size 838922248
last-checkpoint/global_step370/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c45dc08a197f7c0281aa8ebc4fe6f95eb165aae4591c45db4addb9a8beeb7e02
3
+ size 2516610128
last-checkpoint/global_step370/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26506e2982686927c242f0ecd57a2754730d3774a4fd1d990c895969fc1ac3a
3
+ size 2516610256
last-checkpoint/global_step370/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d53b6de8062ec711428eeb0959e27bec1400d3240c9b90c61b6f742a6c8f971
3
+ size 839203436
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step360
 
1
+ global_step370
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e760a2877f0e4cbec17445048222fca781735cd73b3ede9731975ec9794ec58a
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfcfee17e6b489c56e369f8424fd3148d93d6475de75e98a3419a0b00e9010a1
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:955963f226df3d0deffe3c848412bf316f608f7c3993868bb77ad7599d9f40d2
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda4d898981379b275d5069d167e4e5e3bb8d0d2583814bf72968446d9b1be71
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b1fb32299bb80e0390e04832f5ba30c00d2cbbdc2f38744f36753aec3950b53
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2e1dfa07caf61548c4783222b53e3b067d5dd48ff04c0299d1b38fd0e8701c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 350,
3
  "best_metric": 1.5130423307418823,
4
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
5
- "epoch": 0.06060861147354687,
6
  "eval_steps": 10,
7
- "global_step": 360,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2824,6 +2824,84 @@
2824
  "eval_samples_per_second": 9.292,
2825
  "eval_steps_per_second": 1.162,
2826
  "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2827
  }
2828
  ],
2829
  "logging_steps": 1,
@@ -2838,7 +2916,7 @@
2838
  "early_stopping_threshold": 0.0
2839
  },
2840
  "attributes": {
2841
- "early_stopping_patience_counter": 1
2842
  }
2843
  },
2844
  "TrainerControl": {
@@ -2852,7 +2930,7 @@
2852
  "attributes": {}
2853
  }
2854
  },
2855
- "total_flos": 2.2435079068739174e+18,
2856
  "train_batch_size": 4,
2857
  "trial_name": null,
2858
  "trial_params": null
 
2
  "best_global_step": 350,
3
  "best_metric": 1.5130423307418823,
4
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
5
+ "epoch": 0.062292184014478724,
6
  "eval_steps": 10,
7
+ "global_step": 370,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2824
  "eval_samples_per_second": 9.292,
2825
  "eval_steps_per_second": 1.162,
2826
  "step": 360
2827
+ },
2828
+ {
2829
+ "epoch": 0.06077696872764005,
2830
+ "grad_norm": 1.6546894311904907,
2831
+ "learning_rate": 7.312691451204178e-06,
2832
+ "loss": 1.4844,
2833
+ "step": 361
2834
+ },
2835
+ {
2836
+ "epoch": 0.06094532598173324,
2837
+ "grad_norm": 0.9022347331047058,
2838
+ "learning_rate": 7.2984687052230585e-06,
2839
+ "loss": 1.5351,
2840
+ "step": 362
2841
+ },
2842
+ {
2843
+ "epoch": 0.06111368323582642,
2844
+ "grad_norm": 1.407510757446289,
2845
+ "learning_rate": 7.284222338919758e-06,
2846
+ "loss": 1.4135,
2847
+ "step": 363
2848
+ },
2849
+ {
2850
+ "epoch": 0.06128204048991961,
2851
+ "grad_norm": 1.2970539331436157,
2852
+ "learning_rate": 7.269952498697734e-06,
2853
+ "loss": 1.4892,
2854
+ "step": 364
2855
+ },
2856
+ {
2857
+ "epoch": 0.06145039774401279,
2858
+ "grad_norm": 1.5928813219070435,
2859
+ "learning_rate": 7.255659331201673e-06,
2860
+ "loss": 1.6161,
2861
+ "step": 365
2862
+ },
2863
+ {
2864
+ "epoch": 0.06161875499810598,
2865
+ "grad_norm": 1.8556829690933228,
2866
+ "learning_rate": 7.241342983315985e-06,
2867
+ "loss": 1.6214,
2868
+ "step": 366
2869
+ },
2870
+ {
2871
+ "epoch": 0.06178711225219917,
2872
+ "grad_norm": 1.4815845489501953,
2873
+ "learning_rate": 7.227003602163296e-06,
2874
+ "loss": 1.6008,
2875
+ "step": 367
2876
+ },
2877
+ {
2878
+ "epoch": 0.061955469506292354,
2879
+ "grad_norm": 1.992004632949829,
2880
+ "learning_rate": 7.212641335102932e-06,
2881
+ "loss": 1.5366,
2882
+ "step": 368
2883
+ },
2884
+ {
2885
+ "epoch": 0.06212382676038554,
2886
+ "grad_norm": 1.532273292541504,
2887
+ "learning_rate": 7.198256329729412e-06,
2888
+ "loss": 1.4833,
2889
+ "step": 369
2890
+ },
2891
+ {
2892
+ "epoch": 0.062292184014478724,
2893
+ "grad_norm": 2.722139358520508,
2894
+ "learning_rate": 7.183848733870917e-06,
2895
+ "loss": 1.9184,
2896
+ "step": 370
2897
+ },
2898
+ {
2899
+ "epoch": 0.062292184014478724,
2900
+ "eval_loss": 1.5153603553771973,
2901
+ "eval_runtime": 1075.4788,
2902
+ "eval_samples_per_second": 9.302,
2903
+ "eval_steps_per_second": 1.163,
2904
+ "step": 370
2905
  }
2906
  ],
2907
  "logging_steps": 1,
 
2916
  "early_stopping_threshold": 0.0
2917
  },
2918
  "attributes": {
2919
+ "early_stopping_patience_counter": 2
2920
  }
2921
  },
2922
  "TrainerControl": {
 
2930
  "attributes": {}
2931
  }
2932
  },
2933
+ "total_flos": 2.3058275709537485e+18,
2934
  "train_batch_size": 4,
2935
  "trial_name": null,
2936
  "trial_params": null