Token Classification
Safetensors
English
deberta-v2
shawnrushefsky commited on
Commit
7fa27e3
·
verified ·
1 Parent(s): 2270207

Training in progress, step 615, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1b67c149f93729ff5d99f0579c55f8a26bb90fe36bcc54fa14c0c80571b72bc
3
  size 735396724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff836c102370effe0873d714d18fb4deb97c990377413beb28af0209b7117fd
3
  size 735396724
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fea94f691bcda836a311e6903ddfbcac11ebb5e781b8a9476dfc2efe58fea1a
3
  size 1470915147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23121584ddfa42bf24d51afe9540f4af170e58a7d92879f2ef5afc774afcf860
3
  size 1470915147
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e6530759c518c078edc4ae49d672f5f242a93037b0e4dd194c202c72d75543
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09f12437570e9924f51bc4a821db9b068e306d232df0cca1a764d5a6c61a79f
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:366c74d3a7fe533d4fa142e544c066f8f9646b963e7962826461ae512537ca63
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b886aa3d5bf3a5412d189e2e63abc0e0362d43ead0c1373f953b7ddf9847afb
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb1b72021107976ad5d442650034b28c3c6bc9c3e8de922d645c6277ea8d9eb
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be74bf3f842bae7677c9d0f21bf8c2bf8bdef7bc5e4729c1e101ce625035e9bd
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5bcca44a7f5fab1f8e98dce58817837a3d7095f1ad1c0ef1c858df738b40285
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1124a4af024eed9c934cf3ba9d0d996a738e061f5691271c14c7e5100af77b
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aea798263f1db0f52960cdcad18f3b680365093036927425971d0f7b83b8cac1
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33fc7fdc06cdeb8bb2807eccde88ebb8242a3864a3857fb765782ad7bd05e4c1
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c94f331ae72a64d5dac40124aeea47d4554af3a199298edfb60ccc8b3d6eb31
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5836b27184f990ca621738efc9434979e8a956e3ee20e9620b60dc22e0b28dc
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aa4dd4ef2abf1151df3ac605c46ea001fb6a28e625e02202e88d37f2af78be6
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40658d030b33d76a0abb569872fbae573ee257ce1a678d173300ad883007eb86
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a89d4c15664b7f5001cf76be5071f4d0e1cffc50ac157651c9bce0d5c37b6ab
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e84ac527cf9cfef8619fd0f3a76b8211c6167dcc7c225bfa4dafe4104b5fd35
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bba0415825f315ff5d45583b7b1ecd031ea2fde60e522a47be2528cc3a730746
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8cacffbc62e563c7b7ee69a67f0fcd9357f9269af92f22b2b3802c932e9df3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 492,
3
  "best_metric": 0.7237721816258966,
4
  "best_model_checkpoint": "model/checkpoint-492",
5
- "epoch": 1.0061349693251533,
6
  "eval_steps": 123,
7
- "global_step": 492,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -115,6 +115,38 @@
115
  "eval_samples_per_second": 1980.667,
116
  "eval_steps_per_second": 30.951,
117
  "step": 492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  }
119
  ],
120
  "logging_steps": 50,
@@ -134,7 +166,7 @@
134
  "attributes": {}
135
  }
136
  },
137
- "total_flos": 6.235385980413542e+16,
138
  "train_batch_size": 256,
139
  "trial_name": null,
140
  "trial_params": null
 
2
  "best_global_step": 492,
3
  "best_metric": 0.7237721816258966,
4
  "best_model_checkpoint": "model/checkpoint-492",
5
+ "epoch": 1.2576687116564418,
6
  "eval_steps": 123,
7
+ "global_step": 615,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
115
  "eval_samples_per_second": 1980.667,
116
  "eval_steps_per_second": 30.951,
117
  "step": 492
118
+ },
119
+ {
120
+ "epoch": 1.0224948875255624,
121
+ "grad_norm": 0.2750433385372162,
122
+ "learning_rate": 1.8455893306060422e-05,
123
+ "loss": 0.1016,
124
+ "step": 500
125
+ },
126
+ {
127
+ "epoch": 1.1247443762781186,
128
+ "grad_norm": 0.29885414242744446,
129
+ "learning_rate": 1.8083934841122383e-05,
130
+ "loss": 0.0981,
131
+ "step": 550
132
+ },
133
+ {
134
+ "epoch": 1.2269938650306749,
135
+ "grad_norm": 0.23177891969680786,
136
+ "learning_rate": 1.7676508057876326e-05,
137
+ "loss": 0.0969,
138
+ "step": 600
139
+ },
140
+ {
141
+ "epoch": 1.2576687116564418,
142
+ "eval_entity_f1": 0.7210088680783326,
143
+ "eval_entity_precision": 0.6396341157410339,
144
+ "eval_entity_recall": 0.8732237602792275,
145
+ "eval_loss": 0.09714934974908829,
146
+ "eval_runtime": 75.4582,
147
+ "eval_samples_per_second": 1987.855,
148
+ "eval_steps_per_second": 31.064,
149
+ "step": 615
150
  }
151
  ],
152
  "logging_steps": 50,
 
166
  "attributes": {}
167
  }
168
  },
169
+ "total_flos": 7.797577516843008e+16,
170
  "train_batch_size": 256,
171
  "trial_name": null,
172
  "trial_params": null