mgh6 commited on
Commit
eaa0351
·
verified ·
1 Parent(s): 36f02e7

Training in progress, step 6300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3ae07dfc8047815d8a6c8fa296734722fa59b3cbce8564d1323ef267909af0
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a1e0d87add09ba651f2a729abafd2a613814e02b8125bafe8d677d51716111
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be98d39cce9ff88fbe1cc2efa6e12295da3d7e52cc13c66ab55863018e19714d
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:970bd2be1ae81ed39a3bab26aac9fc4a4b90f3a6e038c7195d610bc8193f621e
3
  size 268176506
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc2062bc89d104244d565459987ebb4830d60b8bf5adf124b5e56de8734d6115
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be98fd7b6c59868051c2a7a496322fe42f0b5f1c7f02ac3df74001bd720639e9
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:623bc2e65ccbc34780c1af24f2c57017f114efdf168ac15aa697d79d87303903
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c09c62cd10a62d153706d50b58ca84f185d81931e84abc4ef9fdb458816f39a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5706071c9ad96057c6c418b387e13aee2d821fd2f5f783d719b2e5373d3c6eba
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0095be6e8cdbda40c1ef0de4eab1d05ca8dcf9005466b61accb040b1d57e5855
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e50793c876edc70655416f85d3d91575396243d66a94e5c6919e4c64b70e3fe
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44214bf51755430bb0ad6e043f4416534d7554ae6b99cffa99fb0632139f6298
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a52d27fb81146a519532f9a867778c1f05661217a09bb1e7108e29e88e3a990
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f46aec629b817656a56924b587a49fe29c98571de391a7ee56ba1b4b34f3fc5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.757574200630188,
3
- "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-6200",
4
- "epoch": 8.299866131191433,
5
  "eval_steps": 100,
6
- "global_step": 6200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -937,6 +937,21 @@
937
  "eval_samples_per_second": 888.642,
938
  "eval_steps_per_second": 3.591,
939
  "step": 6200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  }
941
  ],
942
  "logging_steps": 100,
@@ -965,7 +980,7 @@
965
  "attributes": {}
966
  }
967
  },
968
- "total_flos": 2.25136078749696e+17,
969
  "train_batch_size": 64,
970
  "trial_name": null,
971
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7565015554428101,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-6300",
4
+ "epoch": 8.433734939759036,
5
  "eval_steps": 100,
6
+ "global_step": 6300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
937
  "eval_samples_per_second": 888.642,
938
  "eval_steps_per_second": 3.591,
939
  "step": 6200
940
+ },
941
+ {
942
+ "epoch": 8.433734939759036,
943
+ "grad_norm": 0.1967461109161377,
944
+ "learning_rate": 0.0001566265060240964,
945
+ "loss": 0.672,
946
+ "step": 6300
947
+ },
948
+ {
949
+ "epoch": 8.433734939759036,
950
+ "eval_loss": 0.7565015554428101,
951
+ "eval_runtime": 6.3503,
952
+ "eval_samples_per_second": 896.341,
953
+ "eval_steps_per_second": 3.622,
954
+ "step": 6300
955
  }
956
  ],
957
  "logging_steps": 100,
 
980
  "attributes": {}
981
  }
982
  },
983
+ "total_flos": 2.28767305826304e+17,
984
  "train_batch_size": 64,
985
  "trial_name": null,
986
  "trial_params": null