mgh6 commited on
Commit
2a0e4a5
·
verified ·
1 Parent(s): cdd82db

Training in progress, step 119000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfed27d6337a699642afb264f3167a26d2ca21ac0273cb5e18c125ded3a387ba
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412193e885b1f8267ec01dd41cf7748d4021c58a085cdccf3df96d4c39a4f744
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7801aee86c0670959119d2467b5f94581b85321743ff0b2c1de9d55040ef0a1
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c065961e7db169a4d7ddfe994e9a841e734d8eba81e357e012ac63914181fd7
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d12692e3ded8e4b48e14a240df346a1d6bfd5fe5c760f7febc8f9669a6659e15
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df07f83f2f362fd986b1bf45b68e7a3dcf6fa383add3ed8d2f752492eebc54e4
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27b9b8c3bd8dd54ca219bcc622f7453fe04bec46ffa58e0fd1624344a5a69ef4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba15a0768ff2fa4f2e6148e363a3a482950f51aed5d4a6c6dacbbecf282200a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.42346139949416,
5
  "eval_steps": 500,
6
- "global_step": 118000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -833,6 +833,13 @@
833
  "learning_rate": 5.243716373564603e-05,
834
  "loss": 0.7453,
835
  "step": 118000
 
 
 
 
 
 
 
836
  }
837
  ],
838
  "logging_steps": 1000,
@@ -852,7 +859,7 @@
852
  "attributes": {}
853
  }
854
  },
855
- "total_flos": 1.1804289834313318e+18,
856
  "train_batch_size": 64,
857
  "trial_name": null,
858
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.664338191015297,
5
  "eval_steps": 500,
6
+ "global_step": 119000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
833
  "learning_rate": 5.243716373564603e-05,
834
  "loss": 0.7453,
835
  "step": 118000
836
+ },
837
+ {
838
+ "epoch": 28.664338191015297,
839
+ "grad_norm": 0.5951708555221558,
840
+ "learning_rate": 4.440697020798201e-05,
841
+ "loss": 0.7462,
842
+ "step": 119000
843
  }
844
  ],
845
  "logging_steps": 1000,
 
859
  "attributes": {}
860
  }
861
  },
862
+ "total_flos": 1.1904328599118807e+18,
863
  "train_batch_size": 64,
864
  "trial_name": null,
865
  "trial_params": null