jaime-epoch-metrics commited on
Commit
3c5c868
·
verified ·
1 Parent(s): 5e2ef39

epochmetrics/task-embedder

Browse files
README.md CHANGED
@@ -13,14 +13,14 @@ model-index:
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/epoch-metrics/fine-tuning/runs/lyr2lbb4)
17
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/epoch-metrics/fine-tuning/runs/lyr2lbb4)
18
  # task-embedder
19
 
20
  This model is a fine-tuned version of [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 9.8114
23
- - Accuracy: 0.0068
24
 
25
  ## Model description
26
 
@@ -45,13 +45,27 @@ The following hyperparameters were used during training:
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
- - num_epochs: 1
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
- | 12.3606 | 1.0 | 2 | 10.2947 | 0.0083 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  ### Framework versions
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/epoch-metrics/fine-tuning/runs/cpytiehg)
17
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/epoch-metrics/fine-tuning/runs/cpytiehg)
18
  # task-embedder
19
 
20
  This model is a fine-tuned version of [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 2.0565
23
+ - Accuracy: 0.6332
24
 
25
  ## Model description
26
 
 
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
+ - num_epochs: 15
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
+ | 5.6287 | 1.0 | 171 | 4.0112 | 0.3787 |
55
+ | 3.7578 | 2.0 | 342 | 3.2693 | 0.4648 |
56
+ | 3.2266 | 3.0 | 513 | 2.9233 | 0.5083 |
57
+ | 2.9062 | 4.0 | 684 | 2.6422 | 0.5454 |
58
+ | 2.7046 | 5.0 | 855 | 2.5057 | 0.5657 |
59
+ | 2.5462 | 6.0 | 1026 | 2.3794 | 0.5850 |
60
+ | 2.4348 | 7.0 | 1197 | 2.2906 | 0.5981 |
61
+ | 2.3406 | 8.0 | 1368 | 2.2580 | 0.6043 |
62
+ | 2.2544 | 9.0 | 1539 | 2.1751 | 0.6137 |
63
+ | 2.2031 | 10.0 | 1710 | 2.1368 | 0.6225 |
64
+ | 2.1693 | 11.0 | 1881 | 2.1410 | 0.6185 |
65
+ | 2.1243 | 12.0 | 2052 | 2.0609 | 0.6291 |
66
+ | 2.086 | 13.0 | 2223 | 2.0226 | 0.6354 |
67
+ | 2.0771 | 14.0 | 2394 | 2.0461 | 0.6358 |
68
+ | 2.0692 | 15.0 | 2565 | 2.0071 | 0.6430 |
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.006756756756756757,
4
- "eval_loss": 9.811368942260742,
5
- "eval_runtime": 0.0832,
6
- "eval_samples": 2,
7
- "eval_samples_per_second": 24.028,
8
- "eval_steps_per_second": 12.014,
9
- "perplexity": 18239.939418355945,
10
- "total_flos": 3158457937920.0,
11
- "train_loss": 12.360578536987305,
12
- "train_runtime": 15.0775,
13
- "train_samples": 12,
14
- "train_samples_per_second": 0.796,
15
- "train_steps_per_second": 0.133
16
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.6331559235166568,
4
+ "eval_loss": 2.056515693664551,
5
+ "eval_runtime": 8.1808,
6
+ "eval_samples": 350,
7
+ "eval_samples_per_second": 42.783,
8
+ "eval_steps_per_second": 5.378,
9
+ "perplexity": 7.818679619302798,
10
+ "total_flos": 5397015001420800.0,
11
+ "train_loss": 2.701929186845151,
12
+ "train_runtime": 4016.0696,
13
+ "train_samples": 1367,
14
+ "train_samples_per_second": 5.106,
15
+ "train_steps_per_second": 0.639
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.006756756756756757,
4
- "eval_loss": 9.811368942260742,
5
- "eval_runtime": 0.0832,
6
- "eval_samples": 2,
7
- "eval_samples_per_second": 24.028,
8
- "eval_steps_per_second": 12.014,
9
- "perplexity": 18239.939418355945
10
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.6331559235166568,
4
+ "eval_loss": 2.056515693664551,
5
+ "eval_runtime": 8.1808,
6
+ "eval_samples": 350,
7
+ "eval_samples_per_second": 42.783,
8
+ "eval_steps_per_second": 5.378,
9
+ "perplexity": 7.818679619302798
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc9adc08401ae19f43369e8858f4255c0dd7cc93b1bef6612e1be3f3c225912d
3
  size 438097372
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcd47ba207a3f860f8ae2d2805dcf6163dae73d4d41393602f3b1985f1fa65a
3
  size 438097372
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 3158457937920.0,
4
- "train_loss": 12.360578536987305,
5
- "train_runtime": 15.0775,
6
- "train_samples": 12,
7
- "train_samples_per_second": 0.796,
8
- "train_steps_per_second": 0.133
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "total_flos": 5397015001420800.0,
4
+ "train_loss": 2.701929186845151,
5
+ "train_runtime": 4016.0696,
6
+ "train_samples": 1367,
7
+ "train_samples_per_second": 5.106,
8
+ "train_steps_per_second": 0.639
9
  }
trainer_state.json CHANGED
@@ -1,43 +1,267 @@
1
  {
2
- "best_metric": 10.294657707214355,
3
- "best_model_checkpoint": "epochmetrics/task-embedder/checkpoint-2",
4
- "epoch": 1.0,
5
  "eval_steps": 1,
6
- "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 26.22691535949707,
14
- "learning_rate": 0.0,
15
- "loss": 12.3606,
16
- "step": 2
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.008333333333333333,
21
- "eval_loss": 10.294657707214355,
22
- "eval_runtime": 0.0948,
23
- "eval_samples_per_second": 21.096,
24
- "eval_steps_per_second": 10.548,
25
- "step": 2
26
  },
27
  {
28
- "epoch": 1.0,
29
- "step": 2,
30
- "total_flos": 3158457937920.0,
31
- "train_loss": 12.360578536987305,
32
- "train_runtime": 15.0775,
33
- "train_samples_per_second": 0.796,
34
- "train_steps_per_second": 0.133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  ],
37
  "logging_steps": 1,
38
- "max_steps": 2,
39
  "num_input_tokens_seen": 0,
40
- "num_train_epochs": 1,
41
  "save_steps": 1,
42
  "stateful_callbacks": {
43
  "TrainerControl": {
@@ -51,7 +275,7 @@
51
  "attributes": {}
52
  }
53
  },
54
- "total_flos": 3158457937920.0,
55
  "train_batch_size": 8,
56
  "trial_name": null,
57
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.0070760250091553,
3
+ "best_model_checkpoint": "epochmetrics/task-embedder/checkpoint-2565",
4
+ "epoch": 15.0,
5
  "eval_steps": 1,
6
+ "global_step": 2565,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.0819196701049805,
14
+ "learning_rate": 4.666666666666667e-05,
15
+ "loss": 5.6287,
16
+ "step": 171
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.3786586320237909,
21
+ "eval_loss": 4.011183738708496,
22
+ "eval_runtime": 13.9675,
23
+ "eval_samples_per_second": 25.058,
24
+ "eval_steps_per_second": 3.15,
25
+ "step": 171
26
  },
27
  {
28
+ "epoch": 2.0,
29
+ "grad_norm": 5.902393341064453,
30
+ "learning_rate": 4.3333333333333334e-05,
31
+ "loss": 3.7578,
32
+ "step": 342
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.4647582296424634,
37
+ "eval_loss": 3.269301414489746,
38
+ "eval_runtime": 14.2546,
39
+ "eval_samples_per_second": 24.554,
40
+ "eval_steps_per_second": 3.087,
41
+ "step": 342
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "grad_norm": 6.104726314544678,
46
+ "learning_rate": 4e-05,
47
+ "loss": 3.2266,
48
+ "step": 513
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.508273029532564,
53
+ "eval_loss": 2.9233286380767822,
54
+ "eval_runtime": 8.5864,
55
+ "eval_samples_per_second": 40.762,
56
+ "eval_steps_per_second": 5.124,
57
+ "step": 513
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "grad_norm": 5.560727119445801,
62
+ "learning_rate": 3.6666666666666666e-05,
63
+ "loss": 2.9062,
64
+ "step": 684
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.5453717994380466,
69
+ "eval_loss": 2.642239809036255,
70
+ "eval_runtime": 8.5093,
71
+ "eval_samples_per_second": 41.131,
72
+ "eval_steps_per_second": 5.171,
73
+ "step": 684
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "grad_norm": 5.642611980438232,
78
+ "learning_rate": 3.3333333333333335e-05,
79
+ "loss": 2.7046,
80
+ "step": 855
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.5657459867799811,
85
+ "eval_loss": 2.505657434463501,
86
+ "eval_runtime": 8.4686,
87
+ "eval_samples_per_second": 41.329,
88
+ "eval_steps_per_second": 5.196,
89
+ "step": 855
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "grad_norm": 5.382541656494141,
94
+ "learning_rate": 3e-05,
95
+ "loss": 2.5462,
96
+ "step": 1026
97
+ },
98
+ {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.5850446167634338,
101
+ "eval_loss": 2.3794305324554443,
102
+ "eval_runtime": 8.6097,
103
+ "eval_samples_per_second": 40.652,
104
+ "eval_steps_per_second": 5.111,
105
+ "step": 1026
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "grad_norm": 5.883482933044434,
110
+ "learning_rate": 2.6666666666666667e-05,
111
+ "loss": 2.4348,
112
+ "step": 1197
113
+ },
114
+ {
115
+ "epoch": 7.0,
116
+ "eval_accuracy": 0.5981030022732617,
117
+ "eval_loss": 2.290560722351074,
118
+ "eval_runtime": 14.2401,
119
+ "eval_samples_per_second": 24.579,
120
+ "eval_steps_per_second": 3.09,
121
+ "step": 1197
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "grad_norm": 4.892796993255615,
126
+ "learning_rate": 2.3333333333333336e-05,
127
+ "loss": 2.3406,
128
+ "step": 1368
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.6042614409580466,
133
+ "eval_loss": 2.2579710483551025,
134
+ "eval_runtime": 14.1291,
135
+ "eval_samples_per_second": 24.772,
136
+ "eval_steps_per_second": 3.114,
137
+ "step": 1368
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "grad_norm": 5.444692611694336,
142
+ "learning_rate": 2e-05,
143
+ "loss": 2.2544,
144
+ "step": 1539
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "eval_accuracy": 0.6137461398368833,
149
+ "eval_loss": 2.1750903129577637,
150
+ "eval_runtime": 18.0466,
151
+ "eval_samples_per_second": 19.394,
152
+ "eval_steps_per_second": 2.438,
153
+ "step": 1539
154
+ },
155
+ {
156
+ "epoch": 10.0,
157
+ "grad_norm": 5.572258949279785,
158
+ "learning_rate": 1.6666666666666667e-05,
159
+ "loss": 2.2031,
160
+ "step": 1710
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "eval_accuracy": 0.6225209429183713,
165
+ "eval_loss": 2.136831045150757,
166
+ "eval_runtime": 8.6198,
167
+ "eval_samples_per_second": 40.604,
168
+ "eval_steps_per_second": 5.105,
169
+ "step": 1710
170
+ },
171
+ {
172
+ "epoch": 11.0,
173
+ "grad_norm": 5.637876510620117,
174
+ "learning_rate": 1.3333333333333333e-05,
175
+ "loss": 2.1693,
176
+ "step": 1881
177
+ },
178
+ {
179
+ "epoch": 11.0,
180
+ "eval_accuracy": 0.6184971098265896,
181
+ "eval_loss": 2.140977144241333,
182
+ "eval_runtime": 8.6333,
183
+ "eval_samples_per_second": 40.541,
184
+ "eval_steps_per_second": 5.097,
185
+ "step": 1881
186
+ },
187
+ {
188
+ "epoch": 12.0,
189
+ "grad_norm": 5.16227388381958,
190
+ "learning_rate": 1e-05,
191
+ "loss": 2.1243,
192
+ "step": 2052
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_accuracy": 0.6290763561437572,
197
+ "eval_loss": 2.0609424114227295,
198
+ "eval_runtime": 8.8768,
199
+ "eval_samples_per_second": 39.429,
200
+ "eval_steps_per_second": 4.957,
201
+ "step": 2052
202
+ },
203
+ {
204
+ "epoch": 13.0,
205
+ "grad_norm": 5.17201566696167,
206
+ "learning_rate": 6.666666666666667e-06,
207
+ "loss": 2.086,
208
+ "step": 2223
209
+ },
210
+ {
211
+ "epoch": 13.0,
212
+ "eval_accuracy": 0.6354386788761055,
213
+ "eval_loss": 2.0226352214813232,
214
+ "eval_runtime": 9.1353,
215
+ "eval_samples_per_second": 38.313,
216
+ "eval_steps_per_second": 4.817,
217
+ "step": 2223
218
+ },
219
+ {
220
+ "epoch": 14.0,
221
+ "grad_norm": 5.382483959197998,
222
+ "learning_rate": 3.3333333333333333e-06,
223
+ "loss": 2.0771,
224
+ "step": 2394
225
+ },
226
+ {
227
+ "epoch": 14.0,
228
+ "eval_accuracy": 0.6357628841792445,
229
+ "eval_loss": 2.046103000640869,
230
+ "eval_runtime": 15.2839,
231
+ "eval_samples_per_second": 22.9,
232
+ "eval_steps_per_second": 2.879,
233
+ "step": 2394
234
+ },
235
+ {
236
+ "epoch": 15.0,
237
+ "grad_norm": 5.431705474853516,
238
+ "learning_rate": 0.0,
239
+ "loss": 2.0692,
240
+ "step": 2565
241
+ },
242
+ {
243
+ "epoch": 15.0,
244
+ "eval_accuracy": 0.6430424528301887,
245
+ "eval_loss": 2.0070760250091553,
246
+ "eval_runtime": 13.9029,
247
+ "eval_samples_per_second": 25.175,
248
+ "eval_steps_per_second": 3.165,
249
+ "step": 2565
250
+ },
251
+ {
252
+ "epoch": 15.0,
253
+ "step": 2565,
254
+ "total_flos": 5397015001420800.0,
255
+ "train_loss": 2.701929186845151,
256
+ "train_runtime": 4016.0696,
257
+ "train_samples_per_second": 5.106,
258
+ "train_steps_per_second": 0.639
259
  }
260
  ],
261
  "logging_steps": 1,
262
+ "max_steps": 2565,
263
  "num_input_tokens_seen": 0,
264
+ "num_train_epochs": 15,
265
  "save_steps": 1,
266
  "stateful_callbacks": {
267
  "TrainerControl": {
 
275
  "attributes": {}
276
  }
277
  },
278
+ "total_flos": 5397015001420800.0,
279
  "train_batch_size": 8,
280
  "trial_name": null,
281
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae20a66062df2f9870b9fab34dde38bf56aa6321b6fcb8b3642f23f85b750b47
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4fb11e754eb9b0cbb02a9fe5c043c7d02b1a27c6cb263e621b2865fa7ba734c
3
  size 5176