mgh6 commited on
Commit
5b295ea
·
verified ·
1 Parent(s): 3254da4

Training in progress, epoch 6, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b046fc1073ac10130a6554de2dc6d82fd6f7d91e8c0405612fe68ad82ad5378
3
  size 2609498088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e59169943b54be3d7eb668ac02f8339930bf6e79a5c68e753ed0aa42ee977cf
3
  size 2609498088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7bc2e4ec6bdc742f4ad4def66c2207af291f3b87deb4413422c676c78b3b92c
3
  size 5208796146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27fd22fb01fb9d40757c9c0d63833bd578ba1318c457743f1a5a8903b43a67f
3
  size 5208796146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321aca7bd350f679df83bd3bdc1330a4f60d8b3c06b22857cf838532bb504c43
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d42a1f1fae69ab6dd8627554c9be0722476204fd05dc463f39ae8915d82e67f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b514faa73b6e320d8ae19d93f3da594146e59f1072af645ee09b9ce747afd0a1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ebd3960ec1614497d0e6e97cebd857464e618edc4df8a50eee43da5ac2ba348
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 50,
6
- "global_step": 645,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -187,6 +187,51 @@
187
  "eval_samples_per_second": 37.432,
188
  "eval_steps_per_second": 18.716,
189
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 50,
@@ -206,7 +251,7 @@
206
  "attributes": {}
207
  }
208
  },
209
- "total_flos": 1.7831589615291597e+17,
210
  "train_batch_size": 2,
211
  "trial_name": null,
212
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
  "eval_steps": 50,
6
+ "global_step": 774,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
187
  "eval_samples_per_second": 37.432,
188
  "eval_steps_per_second": 18.716,
189
  "step": 600
190
+ },
191
+ {
192
+ "epoch": 5.039043435822352,
193
+ "grad_norm": 0.6386131048202515,
194
+ "learning_rate": 4.921875e-05,
195
+ "loss": 1.1507,
196
+ "step": 650
197
+ },
198
+ {
199
+ "epoch": 5.039043435822352,
200
+ "eval_loss": 1.2271380424499512,
201
+ "eval_runtime": 11.5699,
202
+ "eval_samples_per_second": 37.338,
203
+ "eval_steps_per_second": 18.669,
204
+ "step": 650
205
+ },
206
+ {
207
+ "epoch": 5.4294777940458765,
208
+ "grad_norm": 0.6771230101585388,
209
+ "learning_rate": 4.5312500000000004e-05,
210
+ "loss": 1.134,
211
+ "step": 700
212
+ },
213
+ {
214
+ "epoch": 5.4294777940458765,
215
+ "eval_loss": 1.2191808223724365,
216
+ "eval_runtime": 11.5238,
217
+ "eval_samples_per_second": 37.488,
218
+ "eval_steps_per_second": 18.744,
219
+ "step": 700
220
+ },
221
+ {
222
+ "epoch": 5.819912152269399,
223
+ "grad_norm": 0.6427966952323914,
224
+ "learning_rate": 4.140625e-05,
225
+ "loss": 1.1258,
226
+ "step": 750
227
+ },
228
+ {
229
+ "epoch": 5.819912152269399,
230
+ "eval_loss": 1.2103700637817383,
231
+ "eval_runtime": 11.5062,
232
+ "eval_samples_per_second": 37.545,
233
+ "eval_steps_per_second": 18.772,
234
+ "step": 750
235
  }
236
  ],
237
  "logging_steps": 50,
 
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 2.139790752224379e+17,
255
  "train_batch_size": 2,
256
  "trial_name": null,
257
  "trial_params": null