Token Classification
Safetensors
English
deberta-v2
shawnrushefsky commited on
Commit
95a73df
·
verified ·
1 Parent(s): c5a5989

Training in progress, step 861, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8577b4857b879d05e0f8a6cfe7c43b8c8ced9e7dfe63a4280e2e26a0e15e3c94
3
  size 735396724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0e470e1eacd956bee52a20508ccdb7f508b31c3c5f624b811daf5cfa3c7cf3
3
  size 735396724
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94b0395fd5b76ce19e34d146a8c289af03287be06f938af8e5a3ecf61883d775
3
  size 1470915147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333e703f3783d34a6d44079c09bfb829d5cf3ee625752b283ca8731d7c2d4193
3
  size 1470915147
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2438a2027427b3d96e56afa02342e40b518cc5a4732faf79d6925d5d55c577f0
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386936f571a3bb73993bb57c8096f93a00e47a91407b87b14eafaf523d8d4243
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b38c30466207633871e8888206bfe5061612dc77969df08738de7f38180f016e
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc802edde1abd3269a9fb5bd5728b077d66511bd1040cd61d61bfd756d8edf82
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:500ceadf909eafd207b60a2fffbb122c8876c30e24e54f8c5c803d79507c972c
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5669e1a3fa934bf58fc578de28d959d51d71136e896799b0f88f3a63a39c20ae
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67ea961a54ce2c81798cc07b051c3e0a86c346a5b46ed3c52e9ad1a579e735e6
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a1e0244fee1d7a41af4c1e6bb36d9c21f1092f569f8c69a338a0d753130e2f1
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b516e159fa8c859060ac9216c8c88e777ae6b3be75cd282753aa03e2d8ddd5d
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38a5b73c21e3fdc1d3880bc420afefb2bcaee15f4859f00a2dedf626d423293
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c00c04a0504831ef1b89143e9318053ab8faf9658d240bdb98b2f4c94afaf578
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6428c4cb1b09dbf4c49c45d7c925fedb46fbf7d14d24a337331ceb01c537fd7e
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58dcb0bbaa1efa8f03be4baf87c9eab892f94695cc44466d3b7dcde1769437c
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:962516c0946264663175debb272171a6aeeef3bcbf3a51bb37623d3e002784bc
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05560e77f104933fad67bf5dcd7fe3899a199c567c3ae3bd1089287dcba01cbe
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58076390752eb289cf58b117a31dac67edb3ce8024e8cda265f0e1778fdf0150
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1095ec08fae81706719696411543a90b06fc787d5310b31ba6bec1d086872596
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b33311c9d1ac1e2e9a207a403ad11f68a52ddbcc7079e83ceb4058a94228f8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 738,
3
- "best_metric": 0.7250351529639901,
4
- "best_model_checkpoint": "model/checkpoint-738",
5
- "epoch": 1.50920245398773,
6
  "eval_steps": 123,
7
- "global_step": 738,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -172,6 +172,38 @@
172
  "eval_samples_per_second": 1995.539,
173
  "eval_steps_per_second": 31.184,
174
  "step": 738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
  ],
177
  "logging_steps": 50,
@@ -191,7 +223,7 @@
191
  "attributes": {}
192
  }
193
  },
194
- "total_flos": 9.35469922690007e+16,
195
  "train_batch_size": 256,
196
  "trial_name": null,
197
  "trial_params": null
 
1
  {
2
+ "best_global_step": 861,
3
+ "best_metric": 0.728103938603657,
4
+ "best_model_checkpoint": "model/checkpoint-861",
5
+ "epoch": 1.7607361963190185,
6
  "eval_steps": 123,
7
+ "global_step": 861,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
172
  "eval_samples_per_second": 1995.539,
173
  "eval_steps_per_second": 31.184,
174
  "step": 738
175
+ },
176
+ {
177
+ "epoch": 1.5337423312883436,
178
+ "grad_norm": 0.23476801812648773,
179
+ "learning_rate": 1.6260024056459024e-05,
180
+ "loss": 0.094,
181
+ "step": 750
182
+ },
183
+ {
184
+ "epoch": 1.6359918200408998,
185
+ "grad_norm": 0.2882135808467865,
186
+ "learning_rate": 1.573003455354235e-05,
187
+ "loss": 0.0935,
188
+ "step": 800
189
+ },
190
+ {
191
+ "epoch": 1.738241308793456,
192
+ "grad_norm": 0.19589418172836304,
193
+ "learning_rate": 1.5174904485609352e-05,
194
+ "loss": 0.0954,
195
+ "step": 850
196
+ },
197
+ {
198
+ "epoch": 1.7607361963190185,
199
+ "eval_entity_f1": 0.728103938603657,
200
+ "eval_entity_precision": 0.6492946761951431,
201
+ "eval_entity_recall": 0.871873007639857,
202
+ "eval_loss": 0.09316740930080414,
203
+ "eval_runtime": 75.2979,
204
+ "eval_samples_per_second": 1992.087,
205
+ "eval_steps_per_second": 31.13,
206
+ "step": 861
207
  }
208
  ],
209
  "logging_steps": 50,
 
223
  "attributes": {}
224
  }
225
  },
226
+ "total_flos": 1.091555797457961e+17,
227
  "train_batch_size": 256,
228
  "trial_name": null,
229
  "trial_params": null