SystemAdmin123 commited on
Commit
7858aef
·
verified ·
1 Parent(s): dfb7f9a

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c27d9594ab02f9adc827e9cc100409e429b6fa6e5da458f22196243beabd0e12
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aeb37187936d017f6bd51736738766a1f2cd5d041a79e47044d4da52589bf50
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3184cf8f88bad2cfb5a68fc4094b566ab6b6cce219f98681e60f0f6402fd93e
3
  size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8581669082f016d0e4e6b12b964126b4e2bcf55585a4a4b4bf94b576f2041e
3
  size 255265850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e44ef27abe50e5bba1d9636856695c0706b4a69481203dbe05866fc8428b12b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ca12c54f4164ace515795e08e0960f0c28e1845dd3bb744b613ac48e9edba6
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c36c84de099f12bcc525eb47423becd04c47e16e865404a5529083e8a6215c3a
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df12b162a1b11037b9375aff4eed1b0f26be6ed2687bf1154d65e88dde5f9250
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cd11d413bc67bf01de9a1a006e9e7655be307353028b25f5b3c299e5b6b7a44
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748ed266e9432323e41e747f49e84d108918da883711ff6e01c8135af1c286fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.666666666666668,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -187,6 +187,49 @@
187
  "eval_samples_per_second": 141.537,
188
  "eval_steps_per_second": 2.263,
189
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 10,
@@ -206,7 +249,7 @@
206
  "attributes": {}
207
  }
208
  },
209
- "total_flos": 1.2971581575790592e+16,
210
  "train_batch_size": 32,
211
  "trial_name": null,
212
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.833333333333332,
5
  "eval_steps": 50,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
187
  "eval_samples_per_second": 141.537,
188
  "eval_steps_per_second": 2.263,
189
  "step": 200
190
+ },
191
+ {
192
+ "epoch": 17.5,
193
+ "grad_norm": 3.984375,
194
+ "learning_rate": 0.00015469481581224272,
195
+ "loss": 3.2145,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 18.333333333333332,
200
+ "grad_norm": 4.0,
201
+ "learning_rate": 0.00015000000000000001,
202
+ "loss": 3.1833,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 19.166666666666668,
207
+ "grad_norm": 4.28125,
208
+ "learning_rate": 0.00014515333583108896,
209
+ "loss": 3.1072,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 20.0,
214
+ "grad_norm": 3.328125,
215
+ "learning_rate": 0.00014016954246529696,
216
+ "loss": 3.0609,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 20.833333333333332,
221
+ "grad_norm": 4.0,
222
+ "learning_rate": 0.00013506375551927547,
223
+ "loss": 3.0201,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 20.833333333333332,
228
+ "eval_loss": 3.0752980709075928,
229
+ "eval_runtime": 10.4283,
230
+ "eval_samples_per_second": 143.936,
231
+ "eval_steps_per_second": 2.301,
232
+ "step": 250
233
  }
234
  ],
235
  "logging_steps": 10,
 
249
  "attributes": {}
250
  }
251
  },
252
+ "total_flos": 1.6216828598550528e+16,
253
  "train_batch_size": 32,
254
  "trial_name": null,
255
  "trial_params": null