mgh6 commited on
Commit
32f5a14
·
verified ·
1 Parent(s): c39210c

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8045ece01cd2aeb9d499a0320e1f22286bdc8cfcfe61a226664746e07c3c5f60
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4449773cebb7fc20f1afa5ae5d77b8cd35bef0c0950cae40c99ea43ed9adf3f2
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005e7de3abb84635dafdee3a8ba79cb585547bd0f0048be0efc20bee6a04b734
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25d83ef5b65b44f7e3171cf437447867602a77559bcaf513ab95836cec436f4
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf21107d6237c1a47582df5d14262b3f30a2235c4b055ac43a59cd75f5a56ee5
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2d74e185f05924f3efb59e8e62402934923eb6095702f1ed9ee86b4e99fff7
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b174702c9c1953310bc2ab4fc08bf447a0a35d9790cc4ddb43424b44578611a9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527c464a68510f76edc995cc9b4702aff5ce3e29350f6f2b9760ebf8e1ca430b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.798156538040931,
5
  "eval_steps": 500,
6
- "global_step": 5900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -508,6 +508,21 @@
508
  "learning_rate": 6.066666666666667e-05,
509
  "loss": 1.25,
510
  "step": 5900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
512
  ],
513
  "logging_steps": 100,
@@ -527,7 +542,7 @@
527
  "attributes": {}
528
  }
529
  },
530
- "total_flos": 9.208322574528807e+18,
531
  "train_batch_size": 8,
532
  "trial_name": null,
533
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.99812529292298,
5
  "eval_steps": 500,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
508
  "learning_rate": 6.066666666666667e-05,
509
  "loss": 1.25,
510
  "step": 5900
511
+ },
512
+ {
513
+ "epoch": 11.99812529292298,
514
+ "grad_norm": 0.20437301695346832,
515
+ "learning_rate": 6e-05,
516
+ "loss": 1.2491,
517
+ "step": 6000
518
+ },
519
+ {
520
+ "epoch": 11.99812529292298,
521
+ "eval_loss": 1.128514289855957,
522
+ "eval_runtime": 781.5324,
523
+ "eval_samples_per_second": 19.861,
524
+ "eval_steps_per_second": 2.484,
525
+ "step": 6000
526
  }
527
  ],
528
  "logging_steps": 100,
 
542
  "attributes": {}
543
  }
544
  },
545
+ "total_flos": 9.364397545716974e+18,
546
  "train_batch_size": 8,
547
  "trial_name": null,
548
  "trial_params": null