mgh6 commited on
Commit
d7df1ec
·
verified ·
1 Parent(s): 02a2670

Training in progress, step 118000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4693ab1d7b89c8b62f59e3b745ac1d26958661e43128d0951c45a8dae2b0449
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfed27d6337a699642afb264f3167a26d2ca21ac0273cb5e18c125ded3a387ba
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a245f6ed22667309e57e9a46319a8b2fe7b589321a520f67f07d4b72b056ac
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7801aee86c0670959119d2467b5f94581b85321743ff0b2c1de9d55040ef0a1
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca5192042f50a136fba0493cd299946cbaf57e870dcaa2fe525804417d01654b
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12692e3ded8e4b48e14a240df346a1d6bfd5fe5c760f7febc8f9669a6659e15
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988ef2aca538b3b817a541deff8ad816e9a0af759270c4a157b0d1aedf5cd77c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27b9b8c3bd8dd54ca219bcc622f7453fe04bec46ffa58e0fd1624344a5a69ef4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.182584607973023,
5
  "eval_steps": 500,
6
- "global_step": 117000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -826,6 +826,13 @@
826
  "learning_rate": 6.046735726331005e-05,
827
  "loss": 0.7535,
828
  "step": 117000
 
 
 
 
 
 
 
829
  }
830
  ],
831
  "logging_steps": 1000,
@@ -845,7 +852,7 @@
845
  "attributes": {}
846
  }
847
  },
848
- "total_flos": 1.170425106950783e+18,
849
  "train_batch_size": 64,
850
  "trial_name": null,
851
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.42346139949416,
5
  "eval_steps": 500,
6
+ "global_step": 118000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
826
  "learning_rate": 6.046735726331005e-05,
827
  "loss": 0.7535,
828
  "step": 117000
829
+ },
830
+ {
831
+ "epoch": 28.42346139949416,
832
+ "grad_norm": 0.5937727689743042,
833
+ "learning_rate": 5.243716373564603e-05,
834
+ "loss": 0.7453,
835
+ "step": 118000
836
  }
837
  ],
838
  "logging_steps": 1000,
 
852
  "attributes": {}
853
  }
854
  },
855
+ "total_flos": 1.1804289834313318e+18,
856
  "train_batch_size": 64,
857
  "trial_name": null,
858
  "trial_params": null