CNR-ILC commited on
Commit
0e162b6
·
verified ·
1 Parent(s): 42e8801

ILC-CNR/gs-Logion

Browse files
README.md CHANGED
@@ -15,13 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [cabrooks/LOGION-50k_wordpiece](https://huggingface.co/cabrooks/LOGION-50k_wordpiece) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.4955
19
- - Top1 Acc: 0.4969
20
- - Top5 Acc: 0.6832
21
- - Top10 Acc: 0.7329
22
- - Top15 Acc: 0.7702
23
- - Top20 Acc: 0.8137
24
- - Top25 Acc: 0.8447
25
 
26
  ## Model description
27
 
@@ -40,7 +34,7 @@ More information needed
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
- - learning_rate: 5e-05
44
  - train_batch_size: 16
45
  - eval_batch_size: 8
46
  - seed: 42
@@ -51,23 +45,23 @@ The following hyperparameters were used during training:
51
 
52
  ### Training results
53
 
54
- | Training Loss | Epoch | Step | Validation Loss | Top1 Acc | Top5 Acc | Top10 Acc | Top15 Acc | Top20 Acc | Top25 Acc |
55
- |:-------------:|:-----:|:-----:|:---------------:|:--------:|:--------:|:---------:|:---------:|:---------:|:---------:|
56
- | 3.682 | 1.0 | 1945 | 3.2188 | 0.4809 | 0.6412 | 0.7023 | 0.7481 | 0.7634 | 0.7939 |
57
- | 3.1512 | 2.0 | 3890 | 2.9742 | 0.5223 | 0.7070 | 0.7389 | 0.7962 | 0.8089 | 0.8153 |
58
- | 2.9401 | 3.0 | 5835 | 2.8273 | 0.5796 | 0.6815 | 0.7325 | 0.7834 | 0.8025 | 0.8025 |
59
- | 2.8102 | 4.0 | 7780 | 2.7434 | 0.6051 | 0.7898 | 0.8089 | 0.8408 | 0.8471 | 0.8535 |
60
- | 2.6986 | 5.0 | 9725 | 2.6706 | 0.5973 | 0.7248 | 0.7584 | 0.7785 | 0.8054 | 0.8188 |
61
- | 2.6151 | 6.0 | 11670 | 2.6058 | 0.5484 | 0.6516 | 0.7290 | 0.7548 | 0.7677 | 0.7935 |
62
- | 2.5517 | 7.0 | 13615 | 2.5683 | 0.5906 | 0.7047 | 0.7651 | 0.8054 | 0.8188 | 0.8188 |
63
- | 2.4911 | 8.0 | 15560 | 2.5127 | 0.6644 | 0.7808 | 0.8288 | 0.8425 | 0.8493 | 0.8767 |
64
- | 2.4587 | 9.0 | 17505 | 2.5157 | 0.5886 | 0.6899 | 0.7342 | 0.7532 | 0.7722 | 0.7975 |
65
- | 2.4275 | 10.0 | 19450 | 2.4786 | 0.5608 | 0.7095 | 0.7365 | 0.7568 | 0.7770 | 0.7973 |
66
 
67
 
68
  ### Framework versions
69
 
70
  - Transformers 4.51.3
71
  - Pytorch 2.7.0+cu126
72
- - Datasets 3.5.1
73
  - Tokenizers 0.21.1
 
15
 
16
  This model is a fine-tuned version of [cabrooks/LOGION-50k_wordpiece](https://huggingface.co/cabrooks/LOGION-50k_wordpiece) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 2.5408
 
 
 
 
 
 
19
 
20
  ## Model description
21
 
 
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
+ - learning_rate: 4e-05
38
  - train_batch_size: 16
39
  - eval_batch_size: 8
40
  - seed: 42
 
45
 
46
  ### Training results
47
 
48
+ | Training Loss | Epoch | Step | Validation Loss |
49
+ |:-------------:|:-----:|:-----:|:---------------:|
50
+ | 3.7164 | 1.0 | 1945 | 3.2547 |
51
+ | 3.1887 | 2.0 | 3890 | 3.0060 |
52
+ | 2.9816 | 3.0 | 5835 | 2.8557 |
53
+ | 2.8566 | 4.0 | 7780 | 2.7777 |
54
+ | 2.7497 | 5.0 | 9725 | 2.7062 |
55
+ | 2.6705 | 6.0 | 11670 | 2.6446 |
56
+ | 2.6134 | 7.0 | 13615 | 2.6067 |
57
+ | 2.5566 | 8.0 | 15560 | 2.5568 |
58
+ | 2.5294 | 9.0 | 17505 | 2.5612 |
59
+ | 2.5018 | 10.0 | 19450 | 2.5244 |
60
 
61
 
62
  ### Framework versions
63
 
64
  - Transformers 4.51.3
65
  - Pytorch 2.7.0+cu126
66
+ - Datasets 3.6.0
67
  - Tokenizers 0.21.1
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 2.4954521656036377,
4
- "eval_runtime": 1447.5251,
5
- "eval_samples_per_second": 2.394,
6
- "eval_steps_per_second": 0.3,
7
  "eval_top10_acc": 0.7329192546583851,
8
  "eval_top15_acc": 0.7701863354037267,
9
  "eval_top1_acc": 0.4968944099378882,
@@ -12,8 +12,8 @@
12
  "eval_top5_acc": 0.6832298136645962,
13
  "step": 19450,
14
  "total_flos": 2.04819897090048e+16,
15
- "train_loss": 2.78261650968027,
16
- "train_runtime": 17591.3374,
17
- "train_samples_per_second": 17.698,
18
- "train_steps_per_second": 1.106
19
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 2.5408458709716797,
4
+ "eval_runtime": 26.4975,
5
+ "eval_samples_per_second": 130.805,
6
+ "eval_steps_per_second": 16.379,
7
  "eval_top10_acc": 0.7329192546583851,
8
  "eval_top15_acc": 0.7701863354037267,
9
  "eval_top1_acc": 0.4968944099378882,
 
12
  "eval_top5_acc": 0.6832298136645962,
13
  "step": 19450,
14
  "total_flos": 2.04819897090048e+16,
15
+ "train_loss": 2.836465446256427,
16
+ "train_runtime": 5611.6836,
17
+ "train_samples_per_second": 55.481,
18
+ "train_steps_per_second": 3.466
19
  }
eval_results.json CHANGED
@@ -1,13 +1,7 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 2.4954521656036377,
4
- "eval_runtime": 1447.5251,
5
- "eval_samples_per_second": 2.394,
6
- "eval_steps_per_second": 0.3,
7
- "eval_top10_acc": 0.7329192546583851,
8
- "eval_top15_acc": 0.7701863354037267,
9
- "eval_top1_acc": 0.4968944099378882,
10
- "eval_top20_acc": 0.8136645962732919,
11
- "eval_top25_acc": 0.84472049689441,
12
- "eval_top5_acc": 0.6832298136645962
13
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 2.5408458709716797,
4
+ "eval_runtime": 26.4975,
5
+ "eval_samples_per_second": 130.805,
6
+ "eval_steps_per_second": 16.379
 
 
 
 
 
 
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3af1daa73536dd2971cd777ee89413ad3ce946c4c5a72af8298794d7f09febed
3
  size 497995232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb886e9e5e381ea97f2b5e4a06d1b6f0751a5ed19f347709138f0ae5ba3aa41
3
  size 497995232
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 10.0,
3
  "step": 19450,
4
  "total_flos": 2.04819897090048e+16,
5
- "train_loss": 2.78261650968027,
6
- "train_runtime": 17591.3374,
7
- "train_samples_per_second": 17.698,
8
- "train_steps_per_second": 1.106
9
  }
 
2
  "epoch": 10.0,
3
  "step": 19450,
4
  "total_flos": 2.04819897090048e+16,
5
+ "train_loss": 2.836465446256427,
6
+ "train_runtime": 5611.6836,
7
+ "train_samples_per_second": 55.481,
8
+ "train_steps_per_second": 3.466
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 10.0,
6
  "eval_steps": 500,
@@ -11,235 +11,169 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 7.5098795890808105,
15
- "learning_rate": 4.500771208226221e-05,
16
- "loss": 3.682,
17
  "step": 1945
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_loss": 3.2188210487365723,
22
- "eval_runtime": 1468.3006,
23
- "eval_samples_per_second": 2.361,
24
- "eval_steps_per_second": 0.296,
25
- "eval_top10_acc": 0.7022900763358778,
26
- "eval_top15_acc": 0.7480916030534351,
27
- "eval_top1_acc": 0.48091603053435117,
28
- "eval_top20_acc": 0.7633587786259542,
29
- "eval_top25_acc": 0.7938931297709924,
30
- "eval_top5_acc": 0.6412213740458015,
31
  "step": 1945
32
  },
33
  {
34
  "epoch": 2.0,
35
- "grad_norm": 7.310127258300781,
36
- "learning_rate": 4.000771208226221e-05,
37
- "loss": 3.1512,
38
  "step": 3890
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_loss": 2.9741616249084473,
43
- "eval_runtime": 1459.0041,
44
- "eval_samples_per_second": 2.376,
45
- "eval_steps_per_second": 0.297,
46
- "eval_top10_acc": 0.7388535031847133,
47
- "eval_top15_acc": 0.7961783439490446,
48
- "eval_top1_acc": 0.5222929936305732,
49
- "eval_top20_acc": 0.8089171974522293,
50
- "eval_top25_acc": 0.8152866242038217,
51
- "eval_top5_acc": 0.7070063694267515,
52
  "step": 3890
53
  },
54
  {
55
  "epoch": 3.0,
56
- "grad_norm": 7.220669269561768,
57
- "learning_rate": 3.5010282776349616e-05,
58
- "loss": 2.9401,
59
  "step": 5835
60
  },
61
  {
62
  "epoch": 3.0,
63
- "eval_loss": 2.8273277282714844,
64
- "eval_runtime": 1449.4875,
65
- "eval_samples_per_second": 2.391,
66
- "eval_steps_per_second": 0.299,
67
- "eval_top10_acc": 0.732484076433121,
68
- "eval_top15_acc": 0.7834394904458599,
69
- "eval_top1_acc": 0.5796178343949044,
70
- "eval_top20_acc": 0.802547770700637,
71
- "eval_top25_acc": 0.802547770700637,
72
- "eval_top5_acc": 0.6815286624203821,
73
  "step": 5835
74
  },
75
  {
76
  "epoch": 4.0,
77
- "grad_norm": 6.662809371948242,
78
- "learning_rate": 3.0010282776349617e-05,
79
- "loss": 2.8102,
80
  "step": 7780
81
  },
82
  {
83
  "epoch": 4.0,
84
- "eval_loss": 2.7433667182922363,
85
- "eval_runtime": 1464.6527,
86
- "eval_samples_per_second": 2.366,
87
- "eval_steps_per_second": 0.296,
88
- "eval_top10_acc": 0.8089171974522293,
89
- "eval_top15_acc": 0.8407643312101911,
90
- "eval_top1_acc": 0.6050955414012739,
91
- "eval_top20_acc": 0.8471337579617835,
92
- "eval_top25_acc": 0.8535031847133758,
93
- "eval_top5_acc": 0.7898089171974523,
94
  "step": 7780
95
  },
96
  {
97
  "epoch": 5.0,
98
- "grad_norm": 6.250679969787598,
99
- "learning_rate": 2.501285347043702e-05,
100
- "loss": 2.6986,
101
  "step": 9725
102
  },
103
  {
104
  "epoch": 5.0,
105
- "eval_loss": 2.670605182647705,
106
- "eval_runtime": 1457.7258,
107
- "eval_samples_per_second": 2.378,
108
- "eval_steps_per_second": 0.298,
109
- "eval_top10_acc": 0.7583892617449665,
110
- "eval_top15_acc": 0.7785234899328859,
111
- "eval_top1_acc": 0.5973154362416108,
112
- "eval_top20_acc": 0.8053691275167785,
113
- "eval_top25_acc": 0.8187919463087249,
114
- "eval_top5_acc": 0.7248322147651006,
115
  "step": 9725
116
  },
117
  {
118
  "epoch": 6.0,
119
- "grad_norm": 7.041776657104492,
120
- "learning_rate": 2.0012853470437018e-05,
121
- "loss": 2.6151,
122
  "step": 11670
123
  },
124
  {
125
  "epoch": 6.0,
126
- "eval_loss": 2.605792999267578,
127
- "eval_runtime": 1462.3053,
128
- "eval_samples_per_second": 2.37,
129
- "eval_steps_per_second": 0.297,
130
- "eval_top10_acc": 0.7290322580645161,
131
- "eval_top15_acc": 0.7548387096774194,
132
- "eval_top1_acc": 0.5483870967741935,
133
- "eval_top20_acc": 0.7677419354838709,
134
- "eval_top25_acc": 0.7935483870967742,
135
- "eval_top5_acc": 0.6516129032258065,
136
  "step": 11670
137
  },
138
  {
139
  "epoch": 7.0,
140
- "grad_norm": 7.394732475280762,
141
- "learning_rate": 1.5015424164524421e-05,
142
- "loss": 2.5517,
143
  "step": 13615
144
  },
145
  {
146
  "epoch": 7.0,
147
- "eval_loss": 2.5682945251464844,
148
- "eval_runtime": 1457.23,
149
- "eval_samples_per_second": 2.378,
150
- "eval_steps_per_second": 0.298,
151
- "eval_top10_acc": 0.7651006711409396,
152
- "eval_top15_acc": 0.8053691275167785,
153
- "eval_top1_acc": 0.5906040268456376,
154
- "eval_top20_acc": 0.8187919463087249,
155
- "eval_top25_acc": 0.8187919463087249,
156
- "eval_top5_acc": 0.7046979865771812,
157
  "step": 13615
158
  },
159
  {
160
  "epoch": 8.0,
161
- "grad_norm": 6.788048267364502,
162
- "learning_rate": 1.0017994858611827e-05,
163
- "loss": 2.4911,
164
  "step": 15560
165
  },
166
  {
167
  "epoch": 8.0,
168
- "eval_loss": 2.5127227306365967,
169
- "eval_runtime": 1453.7592,
170
- "eval_samples_per_second": 2.384,
171
- "eval_steps_per_second": 0.299,
172
- "eval_top10_acc": 0.8287671232876712,
173
- "eval_top15_acc": 0.8424657534246576,
174
- "eval_top1_acc": 0.6643835616438356,
175
- "eval_top20_acc": 0.8493150684931506,
176
- "eval_top25_acc": 0.8767123287671232,
177
- "eval_top5_acc": 0.7808219178082192,
178
  "step": 15560
179
  },
180
  {
181
  "epoch": 9.0,
182
- "grad_norm": 6.64603328704834,
183
- "learning_rate": 5.017994858611825e-06,
184
- "loss": 2.4587,
185
  "step": 17505
186
  },
187
  {
188
  "epoch": 9.0,
189
- "eval_loss": 2.5156757831573486,
190
- "eval_runtime": 1453.9459,
191
- "eval_samples_per_second": 2.384,
192
- "eval_steps_per_second": 0.298,
193
- "eval_top10_acc": 0.7341772151898734,
194
- "eval_top15_acc": 0.7531645569620253,
195
- "eval_top1_acc": 0.5886075949367089,
196
- "eval_top20_acc": 0.7721518987341772,
197
- "eval_top25_acc": 0.7974683544303798,
198
- "eval_top5_acc": 0.689873417721519,
199
  "step": 17505
200
  },
201
  {
202
  "epoch": 10.0,
203
- "grad_norm": 7.522444248199463,
204
- "learning_rate": 2.0565552699228795e-08,
205
- "loss": 2.4275,
206
  "step": 19450
207
  },
208
  {
209
  "epoch": 10.0,
210
- "eval_loss": 2.478581666946411,
211
- "eval_runtime": 1450.9422,
212
- "eval_samples_per_second": 2.389,
213
- "eval_steps_per_second": 0.299,
214
- "eval_top10_acc": 0.7364864864864865,
215
- "eval_top15_acc": 0.7567567567567568,
216
- "eval_top1_acc": 0.5608108108108109,
217
- "eval_top20_acc": 0.777027027027027,
218
- "eval_top25_acc": 0.7972972972972973,
219
- "eval_top5_acc": 0.7094594594594594,
220
  "step": 19450
221
  },
222
  {
223
  "epoch": 10.0,
224
  "step": 19450,
225
  "total_flos": 2.04819897090048e+16,
226
- "train_loss": 2.78261650968027,
227
- "train_runtime": 17591.3374,
228
- "train_samples_per_second": 17.698,
229
- "train_steps_per_second": 1.106
230
  },
231
  {
232
  "epoch": 10.0,
233
- "eval_loss": 2.4954521656036377,
234
- "eval_runtime": 1447.5251,
235
- "eval_samples_per_second": 2.394,
236
- "eval_steps_per_second": 0.3,
237
- "eval_top10_acc": 0.7329192546583851,
238
- "eval_top15_acc": 0.7701863354037267,
239
- "eval_top1_acc": 0.4968944099378882,
240
- "eval_top20_acc": 0.8136645962732919,
241
- "eval_top25_acc": 0.84472049689441,
242
- "eval_top5_acc": 0.6832298136645962,
243
  "step": 19450
244
  }
245
  ],
@@ -249,6 +183,15 @@
249
  "num_train_epochs": 10,
250
  "save_steps": 500,
251
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
252
  "TrainerControl": {
253
  "args": {
254
  "should_epoch_stop": false,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 2.5244338512420654,
4
  "best_model_checkpoint": null,
5
  "epoch": 10.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 7.744540691375732,
15
+ "learning_rate": 3.600616966580978e-05,
16
+ "loss": 3.7164,
17
  "step": 1945
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_loss": 3.2547335624694824,
22
+ "eval_runtime": 24.9102,
23
+ "eval_samples_per_second": 139.14,
24
+ "eval_steps_per_second": 17.423,
 
 
 
 
 
 
25
  "step": 1945
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 7.693874359130859,
30
+ "learning_rate": 3.200822622107969e-05,
31
+ "loss": 3.1887,
32
  "step": 3890
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_loss": 3.0059654712677,
37
+ "eval_runtime": 24.8889,
38
+ "eval_samples_per_second": 139.259,
39
+ "eval_steps_per_second": 17.438,
 
 
 
 
 
 
40
  "step": 3890
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "grad_norm": 7.287598609924316,
45
+ "learning_rate": 2.8010282776349616e-05,
46
+ "loss": 2.9816,
47
  "step": 5835
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_loss": 2.8556885719299316,
52
+ "eval_runtime": 24.9739,
53
+ "eval_samples_per_second": 138.785,
54
+ "eval_steps_per_second": 17.378,
 
 
 
 
 
 
55
  "step": 5835
56
  },
57
  {
58
  "epoch": 4.0,
59
+ "grad_norm": 6.844884872436523,
60
+ "learning_rate": 2.401439588688946e-05,
61
+ "loss": 2.8566,
62
  "step": 7780
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_loss": 2.7777276039123535,
67
+ "eval_runtime": 24.9528,
68
+ "eval_samples_per_second": 138.902,
69
+ "eval_steps_per_second": 17.393,
 
 
 
 
 
 
70
  "step": 7780
71
  },
72
  {
73
  "epoch": 5.0,
74
+ "grad_norm": 6.421507835388184,
75
+ "learning_rate": 2.001439588688946e-05,
76
+ "loss": 2.7497,
77
  "step": 9725
78
  },
79
  {
80
  "epoch": 5.0,
81
+ "eval_loss": 2.706150531768799,
82
+ "eval_runtime": 25.4567,
83
+ "eval_samples_per_second": 136.153,
84
+ "eval_steps_per_second": 17.049,
 
 
 
 
 
 
85
  "step": 9725
86
  },
87
  {
88
  "epoch": 6.0,
89
+ "grad_norm": 7.025661945343018,
90
+ "learning_rate": 1.6014395886889463e-05,
91
+ "loss": 2.6705,
92
  "step": 11670
93
  },
94
  {
95
  "epoch": 6.0,
96
+ "eval_loss": 2.644594430923462,
97
+ "eval_runtime": 24.4453,
98
+ "eval_samples_per_second": 141.786,
99
+ "eval_steps_per_second": 17.754,
 
 
 
 
 
 
100
  "step": 11670
101
  },
102
  {
103
  "epoch": 7.0,
104
+ "grad_norm": 7.462997913360596,
105
+ "learning_rate": 1.201439588688946e-05,
106
+ "loss": 2.6134,
107
  "step": 13615
108
  },
109
  {
110
  "epoch": 7.0,
111
+ "eval_loss": 2.6067004203796387,
112
+ "eval_runtime": 25.7293,
113
+ "eval_samples_per_second": 134.71,
114
+ "eval_steps_per_second": 16.868,
 
 
 
 
 
 
115
  "step": 13615
116
  },
117
  {
118
  "epoch": 8.0,
119
+ "grad_norm": 6.885842800140381,
120
+ "learning_rate": 8.016452442159383e-06,
121
+ "loss": 2.5566,
122
  "step": 15560
123
  },
124
  {
125
  "epoch": 8.0,
126
+ "eval_loss": 2.5567843914031982,
127
+ "eval_runtime": 26.1325,
128
+ "eval_samples_per_second": 132.632,
129
+ "eval_steps_per_second": 16.608,
 
 
 
 
 
 
130
  "step": 15560
131
  },
132
  {
133
  "epoch": 9.0,
134
+ "grad_norm": 6.753671646118164,
135
+ "learning_rate": 4.02056555269923e-06,
136
+ "loss": 2.5294,
137
  "step": 17505
138
  },
139
  {
140
  "epoch": 9.0,
141
+ "eval_loss": 2.5612432956695557,
142
+ "eval_runtime": 23.421,
143
+ "eval_samples_per_second": 147.987,
144
+ "eval_steps_per_second": 18.53,
 
 
 
 
 
 
145
  "step": 17505
146
  },
147
  {
148
  "epoch": 10.0,
149
+ "grad_norm": 7.779834270477295,
150
+ "learning_rate": 2.056555269922879e-08,
151
+ "loss": 2.5018,
152
  "step": 19450
153
  },
154
  {
155
  "epoch": 10.0,
156
+ "eval_loss": 2.5244338512420654,
157
+ "eval_runtime": 26.3445,
158
+ "eval_samples_per_second": 131.564,
159
+ "eval_steps_per_second": 16.474,
 
 
 
 
 
 
160
  "step": 19450
161
  },
162
  {
163
  "epoch": 10.0,
164
  "step": 19450,
165
  "total_flos": 2.04819897090048e+16,
166
+ "train_loss": 2.836465446256427,
167
+ "train_runtime": 5611.6836,
168
+ "train_samples_per_second": 55.481,
169
+ "train_steps_per_second": 3.466
170
  },
171
  {
172
  "epoch": 10.0,
173
+ "eval_loss": 2.5408458709716797,
174
+ "eval_runtime": 26.4975,
175
+ "eval_samples_per_second": 130.805,
176
+ "eval_steps_per_second": 16.379,
 
 
 
 
 
 
177
  "step": 19450
178
  }
179
  ],
 
183
  "num_train_epochs": 10,
184
  "save_steps": 500,
185
  "stateful_callbacks": {
186
+ "EarlyStoppingCallback": {
187
+ "args": {
188
+ "early_stopping_patience": 2,
189
+ "early_stopping_threshold": 0.0
190
+ },
191
+ "attributes": {
192
+ "early_stopping_patience_counter": 0
193
+ }
194
+ },
195
  "TrainerControl": {
196
  "args": {
197
  "should_epoch_stop": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56d990e78a87875a305cb75d68d2e66e4c4d1896a4e05bd3d7dde0c1e1ebd2d3
3
  size 5649
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3edd9f667d26281ac19fcd505b40032fd8864a7754c4e6e1c5f47d379f91c4
3
  size 5649