besimray commited on
Commit
8ab4a71
·
verified ·
1 Parent(s): f9f519e

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b79d6c738525786d8d024bebde7ff54252eade5a767213fcce4683fdcd8800e4
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31cbaf9f678fb5252161490e7de95855e09d9f5a4bee67c60dc0b199da4a6b53
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd114e07ac02776a619f90cdd586afe7eeed91c34f863224075824ea5a7e6e77
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe342709cbad6397e41ae1e5c634ef6e4b377eb7e04ad4b2e12e258c1aeb717
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27b9c1be8d8bae4df98ae1a27353c07078eed013c40757a6521cd0fbfb3c8fb9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e90410ed8d75deee232d46a71672a78439ef812c0e8c37ade4c255c49bee23b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a62e6ea25099651400ff4a3142a50e40bef5b52ba883be53b2fcb9d1a5b0a98c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8ae5b9632b883900417a4b328f111a055e2a3387d176daa619ce2ea248142d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.880952380952381,
5
  "eval_steps": 3,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -203,6 +203,57 @@
203
  "learning_rate": 0.0001,
204
  "loss": 1.2653,
205
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
  ],
208
  "logging_steps": 1,
@@ -222,7 +273,7 @@
222
  "attributes": {}
223
  }
224
  },
225
- "total_flos": 3791128769593344.0,
226
  "train_batch_size": 2,
227
  "trial_name": null,
228
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3333333333333335,
5
  "eval_steps": 3,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
203
  "learning_rate": 0.0001,
204
  "loss": 1.2653,
205
  "step": 20
206
+ },
207
+ {
208
+ "epoch": 1.9761904761904763,
209
+ "grad_norm": 0.17035318911075592,
210
+ "learning_rate": 8.435655349597689e-05,
211
+ "loss": 1.1873,
212
+ "step": 21
213
+ },
214
+ {
215
+ "epoch": 1.9761904761904763,
216
+ "eval_loss": 1.1423017978668213,
217
+ "eval_runtime": 6.2847,
218
+ "eval_samples_per_second": 15.912,
219
+ "eval_steps_per_second": 7.956,
220
+ "step": 21
221
+ },
222
+ {
223
+ "epoch": 2.0476190476190474,
224
+ "grad_norm": 0.20932504534721375,
225
+ "learning_rate": 6.909830056250527e-05,
226
+ "loss": 1.1359,
227
+ "step": 22
228
+ },
229
+ {
230
+ "epoch": 2.142857142857143,
231
+ "grad_norm": 0.1693231463432312,
232
+ "learning_rate": 5.4600950026045326e-05,
233
+ "loss": 1.1202,
234
+ "step": 23
235
+ },
236
+ {
237
+ "epoch": 2.238095238095238,
238
+ "grad_norm": 0.16167840361595154,
239
+ "learning_rate": 4.12214747707527e-05,
240
+ "loss": 1.1978,
241
+ "step": 24
242
+ },
243
+ {
244
+ "epoch": 2.238095238095238,
245
+ "eval_loss": 1.1400079727172852,
246
+ "eval_runtime": 6.3594,
247
+ "eval_samples_per_second": 15.725,
248
+ "eval_steps_per_second": 7.862,
249
+ "step": 24
250
+ },
251
+ {
252
+ "epoch": 2.3333333333333335,
253
+ "grad_norm": 0.16464297473430634,
254
+ "learning_rate": 2.9289321881345254e-05,
255
+ "loss": 1.1135,
256
+ "step": 25
257
  }
258
  ],
259
  "logging_steps": 1,
 
273
  "attributes": {}
274
  }
275
  },
276
+ "total_flos": 4707063061020672.0,
277
  "train_batch_size": 2,
278
  "trial_name": null,
279
  "trial_params": null