besimray commited on
Commit
8b1ed94
·
verified ·
1 Parent(s): b9272f4

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9dcbc40162be0164467bbc8b5e807c866f2f72ff307498d2a43c2c19d6cde5f
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1659573be9832300f4f4efea5a2cf7e4b44363f06622e7e55214c46e1143b5
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc4162b80d1e9112a887d3befe658a83ba1222e00f3cc9b381daff32e81f3858
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2fab67c049fb2be004c11732675fa014bc78c5e94282c5a42a91db5f153a03
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27b9c1be8d8bae4df98ae1a27353c07078eed013c40757a6521cd0fbfb3c8fb9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e90410ed8d75deee232d46a71672a78439ef812c0e8c37ade4c255c49bee23b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a62e6ea25099651400ff4a3142a50e40bef5b52ba883be53b2fcb9d1a5b0a98c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8ae5b9632b883900417a4b328f111a055e2a3387d176daa619ce2ea248142d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.880952380952381,
5
  "eval_steps": 3,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -203,6 +203,57 @@
203
  "learning_rate": 0.0001,
204
  "loss": 1.2649,
205
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
  ],
208
  "logging_steps": 1,
@@ -222,7 +273,7 @@
222
  "attributes": {}
223
  }
224
  },
225
- "total_flos": 3791128769593344.0,
226
  "train_batch_size": 2,
227
  "trial_name": null,
228
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3333333333333335,
5
  "eval_steps": 3,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
203
  "learning_rate": 0.0001,
204
  "loss": 1.2649,
205
  "step": 20
206
+ },
207
+ {
208
+ "epoch": 1.9761904761904763,
209
+ "grad_norm": 0.17005658149719238,
210
+ "learning_rate": 8.435655349597689e-05,
211
+ "loss": 1.1879,
212
+ "step": 21
213
+ },
214
+ {
215
+ "epoch": 1.9761904761904763,
216
+ "eval_loss": 1.1423633098602295,
217
+ "eval_runtime": 6.4505,
218
+ "eval_samples_per_second": 15.503,
219
+ "eval_steps_per_second": 7.751,
220
+ "step": 21
221
+ },
222
+ {
223
+ "epoch": 2.0476190476190474,
224
+ "grad_norm": 0.20884154736995697,
225
+ "learning_rate": 6.909830056250527e-05,
226
+ "loss": 1.1346,
227
+ "step": 22
228
+ },
229
+ {
230
+ "epoch": 2.142857142857143,
231
+ "grad_norm": 0.1699027419090271,
232
+ "learning_rate": 5.4600950026045326e-05,
233
+ "loss": 1.1203,
234
+ "step": 23
235
+ },
236
+ {
237
+ "epoch": 2.238095238095238,
238
+ "grad_norm": 0.16104689240455627,
239
+ "learning_rate": 4.12214747707527e-05,
240
+ "loss": 1.198,
241
+ "step": 24
242
+ },
243
+ {
244
+ "epoch": 2.238095238095238,
245
+ "eval_loss": 1.141262173652649,
246
+ "eval_runtime": 6.2986,
247
+ "eval_samples_per_second": 15.876,
248
+ "eval_steps_per_second": 7.938,
249
+ "step": 24
250
+ },
251
+ {
252
+ "epoch": 2.3333333333333335,
253
+ "grad_norm": 0.16542628407478333,
254
+ "learning_rate": 2.9289321881345254e-05,
255
+ "loss": 1.1115,
256
+ "step": 25
257
  }
258
  ],
259
  "logging_steps": 1,
 
273
  "attributes": {}
274
  }
275
  },
276
+ "total_flos": 4707063061020672.0,
277
  "train_batch_size": 2,
278
  "trial_name": null,
279
  "trial_params": null