Token Classification
Safetensors
English
deberta-v2
shawnrushefsky commited on
Commit
4005d62
·
verified ·
1 Parent(s): 6270181

Training in progress, step 369, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deccd61b451daa0ec0591ba75811e98693197f579b3769e921d384f095639eaa
3
  size 735396724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00617d127e52142db152f51035e6d793d67b79698553c806fb0c755de24a4944
3
  size 735396724
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6d709a2808f31d7f80a489592591a3ffebc1ac60d07a88e2117fd1b0b7d533c
3
  size 1470915147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aec0adc0e2b4b305e7a1da493a55e418ee0ee5d560c8ff61b106568d5b0bac5
3
  size 1470915147
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ef1ca8487f499ee246a6113560b7f98d9d93e7a706ea836514dfafc61cfdb5
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5daafd01e11a8e378bee423bfc78dc889b9595f52486fe44309d6cfdb1e8f39e
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c4b2ee8ed61c350591897b0b738377e05c54710201e50796437303db65a96e
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd946779b63f36a745f76b3dea5b82fdda34f69f71b29d30bc33c6469ec6efc7
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c06b390f1bd2771723650e23106e03eb20a882a147c8eeb18edef13ad6207240
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfb52d3d9719c3a23780cb0726fd105f13eb3795bef452c37de45847245f48c4
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dcb6e19f15460381db97701e91d75ab838347177e3f2d1544b748fa16006b01
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ea23a163b521be4be62f809b70e9314364da460e9093ec7e246193690284e2
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8743d9d1a5e67d62041cb18bb187837f62a466292a1d2ac5f934a15c6a7fca90
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3aa0ea2b718d2e995605d91b46b0095c0770bb817cc0b7bb955f992dbfd3445
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a22727b8d71b2e104aef2ff07e14810a7d56876a87b890223c43269fe5f4923b
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274b05ab7fd4442bba200371ded2027858747aed543ac8bcea81c8efc8330216
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:942f6be4d2b8c51bc27a9a3b98cde9faca68e74d52c980f100a02872c02e2cfd
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70ecf73265273c79012c58e718f348331144177d9b2caba269cbcb34dabd37a
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b22c706cf368a787f58060e17e765a61fd8c9d0f22d86b6d62f9e450996e25
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c8994ebdec0308f7fb2f8c656db7abc240ad44611736b9d1866dfbb4e908d2
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494887c1ddf3eca615797e6fee4eb1a99b2d33a061b56590e8489792ee772caa
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b81c99c7d092f3e4d54713b045255676277930acb48938b544aa14bde54b79
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 246,
3
- "best_metric": 0.6956311106980313,
4
- "best_model_checkpoint": "model/checkpoint-246",
5
- "epoch": 0.5030674846625767,
6
  "eval_steps": 123,
7
- "global_step": 246,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -58,6 +58,38 @@
58
  "eval_samples_per_second": 1993.05,
59
  "eval_steps_per_second": 31.145,
60
  "step": 246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  ],
63
  "logging_steps": 50,
@@ -77,7 +109,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 3.1134071362945024e+16,
81
  "train_batch_size": 256,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
+ "best_global_step": 369,
3
+ "best_metric": 0.7108936942946532,
4
+ "best_model_checkpoint": "model/checkpoint-369",
5
+ "epoch": 0.754601226993865,
6
  "eval_steps": 123,
7
+ "global_step": 369,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
58
  "eval_samples_per_second": 1993.05,
59
  "eval_steps_per_second": 31.145,
60
  "step": 246
61
+ },
62
+ {
63
+ "epoch": 0.5112474437627812,
64
+ "grad_norm": 0.3463546633720398,
65
+ "learning_rate": 1.9732369264463184e-05,
66
+ "loss": 0.1163,
67
+ "step": 250
68
+ },
69
+ {
70
+ "epoch": 0.6134969325153374,
71
+ "grad_norm": 0.31447625160217285,
72
+ "learning_rate": 1.9558884178116507e-05,
73
+ "loss": 0.1118,
74
+ "step": 300
75
+ },
76
+ {
77
+ "epoch": 0.7157464212678937,
78
+ "grad_norm": 0.32359057664871216,
79
+ "learning_rate": 1.934345942335807e-05,
80
+ "loss": 0.1105,
81
+ "step": 350
82
+ },
83
+ {
84
+ "epoch": 0.754601226993865,
85
+ "eval_entity_f1": 0.7108936942946532,
86
+ "eval_entity_precision": 0.6230731442494926,
87
+ "eval_entity_recall": 0.8653623799180904,
88
+ "eval_loss": 0.10435672849416733,
89
+ "eval_runtime": 75.8105,
90
+ "eval_samples_per_second": 1978.617,
91
+ "eval_steps_per_second": 30.919,
92
+ "step": 369
93
  }
94
  ],
95
  "logging_steps": 50,
 
109
  "attributes": {}
110
  }
111
  },
112
+ "total_flos": 4.673116000432947e+16,
113
  "train_batch_size": 256,
114
  "trial_name": null,
115
  "trial_params": null