mgh6 commited on
Commit
bbbb8e5
·
verified ·
1 Parent(s): b51f8a2

Training in progress, step 8800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c6cb9247d45c10ce77370e60754859d2518047c9a604863293c6169fe1920a
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7757780af89912e735dbe8105f9436b517bec77bc86ead84b221561abe7a296d
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d464dfd896b2fd0820404716aed321e57b6a1656a1cd35b62c83fb57ed329024
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8431521971fece40cb686bfc9e291a10646a93986a28990be1d27f0f9abbae59
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe1d00b2b2daf374227827daf9e2dce48084dbcdfe32ac7a611f2c4f978154e6
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ffd3aacbcb147c6ed382ad2dd8b147b0e5b9115eb8b744fcb864a112248a4f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c91d665b883130b2365ab2b5b66cce65436aac90db74b30a9f6865a2968e50
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3868261845271e61a238c0da87c904f896cb24e82af8bdd1e74bf04ddadb87a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.397281674738323,
5
  "eval_steps": 500,
6
- "global_step": 8700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -752,6 +752,13 @@
752
  "learning_rate": 4.2e-05,
753
  "loss": 1.2371,
754
  "step": 8700
 
 
 
 
 
 
 
755
  }
756
  ],
757
  "logging_steps": 100,
@@ -771,7 +778,7 @@
771
  "attributes": {}
772
  }
773
  },
774
- "total_flos": 1.3578366826575823e+19,
775
  "train_batch_size": 8,
776
  "trial_name": null,
777
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.59725042962037,
5
  "eval_steps": 500,
6
+ "global_step": 8800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
752
  "learning_rate": 4.2e-05,
753
  "loss": 1.2371,
754
  "step": 8700
755
+ },
756
+ {
757
+ "epoch": 17.59725042962037,
758
+ "grad_norm": 0.23766712844371796,
759
+ "learning_rate": 4.133333333333333e-05,
760
+ "loss": 1.2307,
761
+ "step": 8800
762
  }
763
  ],
764
  "logging_steps": 100,
 
778
  "attributes": {}
779
  }
780
  },
781
+ "total_flos": 1.373444179776399e+19,
782
  "train_batch_size": 8,
783
  "trial_name": null,
784
  "trial_params": null