tokhey commited on
Commit
edb4bc4
·
verified ·
1 Parent(s): 73beb7b

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:777110b9885805d05b54b83c0078c17d2c3f56f401f8d414b249c230762358bb
3
  size 140815952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79cc0f0d4da8dc41e74774dbe3bfeab0c15fec40247cbabd3e59c325c765c7a5
3
  size 140815952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd299fcb2d78913771ba672fc903a33ad3ee1376e8feba78832d4267d4cbedd
3
  size 281829907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35fc8e78c68beb9d55d5f8c9737abc89321e71f3ac3fe3bd94c06fd11b5eae05
3
  size 281829907
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17c4ae54d27cbe43b4c5fa652d71ae90547dadc724f75624e3d3c44e35870949
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606420a76df9020f0375458d71a968da3eb2527da0a6f0d6f52ba573a3d73d92
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d74ad02c6868c7927ba45f4229ae211aa9e779d68ecfa33c6fcfa330a9d67a76
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ef75ec69ab83d00b269577ccea9ea264ded16c482996c8d7d7cfc5af58b9a5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.6666666666666665,
6
  "eval_steps": 100,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -304,6 +304,41 @@
304
  "eval_samples_per_second": 0.378,
305
  "eval_steps_per_second": 0.378,
306
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  }
308
  ],
309
  "logging_steps": 5,
@@ -318,12 +353,12 @@
318
  "should_evaluate": false,
319
  "should_log": false,
320
  "should_save": true,
321
- "should_training_stop": false
322
  },
323
  "attributes": {}
324
  }
325
  },
326
- "total_flos": 6611475205324800.0,
327
  "train_batch_size": 1,
328
  "trial_name": null,
329
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 100,
7
+ "global_step": 225,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
304
  "eval_samples_per_second": 0.378,
305
  "eval_steps_per_second": 0.378,
306
  "step": 200
307
+ },
308
+ {
309
+ "epoch": 2.7333333333333334,
310
+ "grad_norm": 0.13668841123580933,
311
+ "learning_rate": 1.3215442672249972e-05,
312
+ "loss": 0.4098,
313
+ "step": 205
314
+ },
315
+ {
316
+ "epoch": 2.8,
317
+ "grad_norm": 0.11291203647851944,
318
+ "learning_rate": 7.70025020008347e-06,
319
+ "loss": 0.3316,
320
+ "step": 210
321
+ },
322
+ {
323
+ "epoch": 2.8666666666666667,
324
+ "grad_norm": 0.1049598902463913,
325
+ "learning_rate": 3.64949617782967e-06,
326
+ "loss": 0.3406,
327
+ "step": 215
328
+ },
329
+ {
330
+ "epoch": 2.9333333333333336,
331
+ "grad_norm": 0.10330229252576828,
332
+ "learning_rate": 1.0876630077453487e-06,
333
+ "loss": 0.3846,
334
+ "step": 220
335
+ },
336
+ {
337
+ "epoch": 3.0,
338
+ "grad_norm": 0.11973880231380463,
339
+ "learning_rate": 3.023418496261865e-08,
340
+ "loss": 0.3517,
341
+ "step": 225
342
  }
343
  ],
344
  "logging_steps": 5,
 
353
  "should_evaluate": false,
354
  "should_log": false,
355
  "should_save": true,
356
+ "should_training_stop": true
357
  },
358
  "attributes": {}
359
  }
360
  },
361
+ "total_flos": 7437909605990400.0,
362
  "train_batch_size": 1,
363
  "trial_name": null,
364
  "trial_params": null