mgh6 commited on
Commit
ecf1901
·
verified ·
1 Parent(s): 78de543

Training in progress, step 120000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:412193e885b1f8267ec01dd41cf7748d4021c58a085cdccf3df96d4c39a4f744
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b4abcad2bc55f8b93b6bd0b69a3e6b19d6d79eea733df712ccb4e558c7e76f
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c065961e7db169a4d7ddfe994e9a841e734d8eba81e357e012ac63914181fd7
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1318700afdd8bc8f372d0aac1019c41e2362c09c3dd521afe41c9c15c2dff706
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df07f83f2f362fd986b1bf45b68e7a3dcf6fa383add3ed8d2f752492eebc54e4
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f637f68138f2f3e0b10bc8106538e25e3a7bd3c99fc90c278af4e9c1843848
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ba15a0768ff2fa4f2e6148e363a3a482950f51aed5d4a6c6dacbbecf282200a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0d0bc14c7937072008c2425eb1b1c8372d5578f285662a058c839c08d67094
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.664338191015297,
5
  "eval_steps": 500,
6
- "global_step": 119000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -840,6 +840,13 @@
840
  "learning_rate": 4.440697020798201e-05,
841
  "loss": 0.7462,
842
  "step": 119000
 
 
 
 
 
 
 
843
  }
844
  ],
845
  "logging_steps": 1000,
@@ -859,7 +866,7 @@
859
  "attributes": {}
860
  }
861
  },
862
- "total_flos": 1.1904328599118807e+18,
863
  "train_batch_size": 64,
864
  "trial_name": null,
865
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.90521498253643,
5
  "eval_steps": 500,
6
+ "global_step": 120000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
840
  "learning_rate": 4.440697020798201e-05,
841
  "loss": 0.7462,
842
  "step": 119000
843
+ },
844
+ {
845
+ "epoch": 28.90521498253643,
846
+ "grad_norm": 0.5529988408088684,
847
+ "learning_rate": 3.6376776680318e-05,
848
+ "loss": 0.7478,
849
+ "step": 120000
850
  }
851
  ],
852
  "logging_steps": 1000,
 
866
  "attributes": {}
867
  }
868
  },
869
+ "total_flos": 1.2004367363924296e+18,
870
  "train_batch_size": 64,
871
  "trial_name": null,
872
  "trial_params": null