FormlessAI commited on
Commit
f6a8f37
·
verified ·
1 Parent(s): bf58c17

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28f960cc3fc3041049728628ca0f3995042d6d6b849410ccc76854759f1de38e
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686af49d9716db5c977676982b73eead050c55447dd141d1a5c60a378798cf92
3
  size 98088784
last-checkpoint/global_step2050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45616c41df00fa1bfbd9ac14ff79e146c79f9632defbfe6dfc06ff18bd55abf8
3
+ size 73939813
last-checkpoint/global_step2050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b0c02e60bc84e4c5f99dab0099e748a296a98b4797f47b06eb7a9d8d0e1d95
3
+ size 73939813
last-checkpoint/global_step2050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9179f8475663925d8ca4053d3b76b16cb0678735977baa2c289802bd3353cb81
3
+ size 73939877
last-checkpoint/global_step2050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812d057a57265c21da7802dcc76e16c7773408a25f3ff1c2fc4c2ce8cdab9440
3
+ size 73939877
last-checkpoint/global_step2050/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f61c444e501f1817872bca2e24f714b3ce7cf6bf9e837542c155318cadca656
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step2050
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a14f6a698f92007bbc2dcb792b9b6ca2830509bab22cc9bd34d4e6a2c1d3b8a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2136a743114bf3ac6a2be5bd25d57e50f3b32784a92a3ed2d3a6f4e8dfe65997
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fdbb189d7242681559cf6c379ca732f316bca39a80295113d8476efdbf2845a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725ed86be2ad34a12a575d123d89423e1ccfc36d42388d609eed78168e20aa8e
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8db64115c44140dcc749fb50ed04400a835cc578facdcfdf170d2c657b74fd53
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0c254473a5362fb78028095a7ded74230ab3341cfdf45850d3246e013416c5
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:622aa7be447c67092898a74b40ee6e6a8c1fee0783bf1f9c83af0dde5fce5c73
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a8d80b4bb14ece5fb16e97007c2bfac9158f816b33de1c332f8d229aa7c6235
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8544c996407cdf577fc237cd843a7fc6fa9441fad75c673215c6414edef0e8c3
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c0ef42f2619bcfd07a1df041b3a7fb00343474a95d72bd4b490c04b2b99687
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6503395438194275,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9601959583588489,
6
  "eval_steps": 50,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3128,6 +3128,84 @@
3128
  "eval_samples_per_second": 127.041,
3129
  "eval_steps_per_second": 15.888,
3130
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3131
  }
3132
  ],
3133
  "logging_steps": 5,
@@ -3156,7 +3234,7 @@
3156
  "attributes": {}
3157
  }
3158
  },
3159
- "total_flos": 1.0314720434234327e+18,
3160
  "train_batch_size": 2,
3161
  "trial_name": null,
3162
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6483769416809082,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0097979179424375,
6
  "eval_steps": 50,
7
+ "global_step": 2050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3128
  "eval_samples_per_second": 127.041,
3129
  "eval_steps_per_second": 15.888,
3130
  "step": 2000
3131
+ },
3132
+ {
3133
+ "epoch": 1.9650949173300674,
3134
+ "grad_norm": 0.223761647939682,
3135
+ "learning_rate": 7.439799842198776e-05,
3136
+ "loss": 0.674,
3137
+ "step": 2005
3138
+ },
3139
+ {
3140
+ "epoch": 1.9699938763012859,
3141
+ "grad_norm": 0.19198189675807953,
3142
+ "learning_rate": 7.423108004787508e-05,
3143
+ "loss": 0.6874,
3144
+ "step": 2010
3145
+ },
3146
+ {
3147
+ "epoch": 1.9748928352725046,
3148
+ "grad_norm": 0.20100003480911255,
3149
+ "learning_rate": 7.406396814129006e-05,
3150
+ "loss": 0.6881,
3151
+ "step": 2015
3152
+ },
3153
+ {
3154
+ "epoch": 1.9797917942437233,
3155
+ "grad_norm": 0.22284255921840668,
3156
+ "learning_rate": 7.389666441956613e-05,
3157
+ "loss": 0.6904,
3158
+ "step": 2020
3159
+ },
3160
+ {
3161
+ "epoch": 1.9846907532149418,
3162
+ "grad_norm": 0.2002260684967041,
3163
+ "learning_rate": 7.372917060200785e-05,
3164
+ "loss": 0.6763,
3165
+ "step": 2025
3166
+ },
3167
+ {
3168
+ "epoch": 1.9895897121861603,
3169
+ "grad_norm": 0.21440419554710388,
3170
+ "learning_rate": 7.356148840987336e-05,
3171
+ "loss": 0.6819,
3172
+ "step": 2030
3173
+ },
3174
+ {
3175
+ "epoch": 1.994488671157379,
3176
+ "grad_norm": 0.22840850055217743,
3177
+ "learning_rate": 7.339361956635661e-05,
3178
+ "loss": 0.6935,
3179
+ "step": 2035
3180
+ },
3181
+ {
3182
+ "epoch": 1.9993876301285978,
3183
+ "grad_norm": 0.2073492854833603,
3184
+ "learning_rate": 7.322556579656973e-05,
3185
+ "loss": 0.6927,
3186
+ "step": 2040
3187
+ },
3188
+ {
3189
+ "epoch": 2.0048989589712187,
3190
+ "grad_norm": 0.19991783797740936,
3191
+ "learning_rate": 7.305732882752519e-05,
3192
+ "loss": 0.7925,
3193
+ "step": 2045
3194
+ },
3195
+ {
3196
+ "epoch": 2.0097979179424375,
3197
+ "grad_norm": 0.22880135476589203,
3198
+ "learning_rate": 7.288891038811815e-05,
3199
+ "loss": 0.6637,
3200
+ "step": 2050
3201
+ },
3202
+ {
3203
+ "epoch": 2.0097979179424375,
3204
+ "eval_loss": 0.6483769416809082,
3205
+ "eval_runtime": 15.4537,
3206
+ "eval_samples_per_second": 126.766,
3207
+ "eval_steps_per_second": 15.854,
3208
+ "step": 2050
3209
  }
3210
  ],
3211
  "logging_steps": 5,
 
3234
  "attributes": {}
3235
  }
3236
  },
3237
+ "total_flos": 1.0570918347292017e+18,
3238
  "train_batch_size": 2,
3239
  "trial_name": null,
3240
  "trial_params": null