ShengdingHu commited on
Commit
ebef2ad
·
1 Parent(s): 45b98b5

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 94.89999999999999,
4
- "eval_average_metrics": 94.05452729693741,
5
- "eval_f1": 93.20905459387484,
6
- "eval_loss": 0.057457663118839264,
7
- "eval_runtime": 2.733,
8
- "eval_samples_per_second": 365.895,
9
- "test_accuracy": 90.31659658669305,
10
- "test_average_metrics": 88.64653887083897,
11
- "test_f1": 86.97648115498488,
12
- "test_loss": 0.09569031745195389,
13
- "test_runtime": 106.5499,
14
- "test_samples_per_second": 379.447,
15
- "train_loss": 0.1380685100413512,
16
- "train_runtime": 3830.7041,
17
  "train_samples": 362846,
18
- "train_samples_per_second": 284.161,
19
- "train_steps_per_second": 2.842
20
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 95.0,
4
+ "eval_average_metrics": 94.22774869109946,
5
+ "eval_f1": 93.45549738219894,
6
+ "eval_loss": 0.055720701813697815,
7
+ "eval_runtime": 4.796,
8
+ "eval_samples_per_second": 208.506,
9
+ "test_accuracy": 90.3141231758595,
10
+ "test_average_metrics": 88.72727044120705,
11
+ "test_f1": 87.14041770655459,
12
+ "test_loss": 0.09215616434812546,
13
+ "test_runtime": 180.597,
14
+ "test_samples_per_second": 223.869,
15
+ "train_loss": 0.07169761398949699,
16
+ "train_runtime": 13428.6442,
17
  "train_samples": 362846,
18
+ "train_samples_per_second": 81.061,
19
+ "train_steps_per_second": 2.533
20
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 94.89999999999999,
4
- "eval_average_metrics": 94.05452729693741,
5
- "eval_f1": 93.20905459387484,
6
- "eval_loss": 0.057457663118839264,
7
- "eval_runtime": 2.733,
8
- "eval_samples_per_second": 365.895
9
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 95.0,
4
+ "eval_average_metrics": 94.22774869109946,
5
+ "eval_f1": 93.45549738219894,
6
+ "eval_loss": 0.055720701813697815,
7
+ "eval_runtime": 4.796,
8
+ "eval_samples_per_second": 208.506
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea96955615a89323a03f3ac639c8f8faec3279834d6865a774163badb1602ec0
3
- size 7551621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408f9f69d04647ca9fc02f4fa700ca2a77bdf482d046051f409a9a93fdd80734
3
+ size 2631685
runs/Feb01_09-54-08_node1/events.out.tfevents.1643680621.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45f8985a1ec937cc88667ae35b6aa55f5c9b4e54c337cd0ae827313458a205e
3
- size 77888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f282ed9800579880d127ed781c6242319b0476e61a79e8f0d3b752afba87760a
3
+ size 78623
runs/Feb01_09-54-08_node1/events.out.tfevents.1643694057.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3dcfdd25f487c2cedc8f5528fa291645116206e9fe02731aa1aa743afdc1a1
3
+ size 790
runs/Feb02_21-25-57_node1/1643808618.960355/events.out.tfevents.1643808618.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db4374f8c0dacd11d450cec2ad2b6516845e7b69d5077dfed97bc745676d76c
3
+ size 5008
runs/Feb02_21-25-57_node1/events.out.tfevents.1643808618.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e060981f177e86172998af1f56d4555bac0e739cc33f88dd0a60aa187f6f4e
3
+ size 4296
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
- "test_accuracy": 90.31659658669305,
4
- "test_average_metrics": 88.64653887083897,
5
- "test_f1": 86.97648115498488,
6
- "test_loss": 0.09569031745195389,
7
- "test_runtime": 106.5499,
8
- "test_samples_per_second": 379.447
9
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "test_accuracy": 90.3141231758595,
4
+ "test_average_metrics": 88.72727044120705,
5
+ "test_f1": 87.14041770655459,
6
+ "test_loss": 0.09215616434812546,
7
+ "test_runtime": 180.597,
8
+ "test_samples_per_second": 223.869
9
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.1380685100413512,
4
- "train_runtime": 3830.7041,
5
  "train_samples": 362846,
6
- "train_samples_per_second": 284.161,
7
- "train_steps_per_second": 2.842
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.07169761398949699,
4
+ "train_runtime": 13428.6442,
5
  "train_samples": 362846,
6
+ "train_samples_per_second": 81.061,
7
+ "train_steps_per_second": 2.533
8
  }
trainer_state.json CHANGED
@@ -1,361 +1,2133 @@
1
  {
2
- "best_metric": 94.05452729693741,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/qqp/checkpoint-4000",
4
  "epoch": 3.0,
5
- "global_step": 10887,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.14,
12
- "learning_rate": 0.0003,
13
- "loss": 1.4376,
14
- "step": 500
 
 
 
 
15
  },
16
  {
17
- "epoch": 0.14,
18
- "eval_accuracy": 92.0,
19
- "eval_average_metrics": 90.92385786802029,
20
- "eval_f1": 89.8477157360406,
21
- "eval_loss": 0.07532218098640442,
22
- "eval_runtime": 2.8074,
23
- "eval_samples_per_second": 356.2,
 
 
 
 
 
 
24
  "step": 500
25
  },
26
  {
27
- "epoch": 0.28,
28
- "learning_rate": 0.0002855588716665062,
29
- "loss": 0.0863,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "step": 1000
31
  },
32
  {
33
- "epoch": 0.28,
34
- "eval_accuracy": 92.5,
35
- "eval_average_metrics": 91.48506988564168,
36
- "eval_f1": 90.47013977128336,
37
- "eval_loss": 0.07053044438362122,
38
- "eval_runtime": 2.7207,
39
- "eval_samples_per_second": 367.549,
40
  "step": 1000
41
  },
42
  {
43
- "epoch": 0.41,
44
- "learning_rate": 0.00027111774333301236,
45
- "loss": 0.0819,
46
- "step": 1500
 
 
 
 
47
  },
48
  {
49
- "epoch": 0.41,
50
- "eval_accuracy": 93.7,
51
- "eval_average_metrics": 92.67781456953642,
52
- "eval_f1": 91.65562913907283,
53
- "eval_loss": 0.06254870444536209,
54
- "eval_runtime": 2.7947,
55
- "eval_samples_per_second": 357.814,
 
 
 
 
 
 
56
  "step": 1500
57
  },
58
  {
59
- "epoch": 0.55,
60
- "learning_rate": 0.0002566766149995186,
61
- "loss": 0.0781,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "step": 2000
63
  },
64
  {
65
- "epoch": 0.55,
66
- "eval_accuracy": 93.4,
67
- "eval_average_metrics": 92.35789473684211,
68
- "eval_f1": 91.31578947368422,
69
- "eval_loss": 0.06308286637067795,
70
- "eval_runtime": 2.7948,
71
- "eval_samples_per_second": 357.804,
72
  "step": 2000
73
  },
74
  {
75
- "epoch": 0.69,
76
- "learning_rate": 0.00024223548666602482,
77
- "loss": 0.0808,
78
- "step": 2500
 
 
 
 
79
  },
80
  {
81
- "epoch": 0.69,
82
- "eval_accuracy": 94.39999999999999,
83
- "eval_average_metrics": 93.50606860158311,
84
- "eval_f1": 92.61213720316623,
85
- "eval_loss": 0.06003529205918312,
86
- "eval_runtime": 2.7959,
87
- "eval_samples_per_second": 357.67,
 
 
 
 
 
 
88
  "step": 2500
89
  },
90
  {
91
- "epoch": 0.83,
92
- "learning_rate": 0.00022779435833253101,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  "loss": 0.0764,
94
  "step": 3000
95
  },
96
  {
97
- "epoch": 0.83,
98
- "eval_accuracy": 94.3,
99
- "eval_average_metrics": 93.41474442988203,
100
- "eval_f1": 92.52948885976409,
101
- "eval_loss": 0.058581382036209106,
102
- "eval_runtime": 2.8618,
103
- "eval_samples_per_second": 349.436,
104
  "step": 3000
105
  },
106
  {
107
- "epoch": 0.96,
108
- "learning_rate": 0.00021335322999903723,
109
- "loss": 0.0777,
110
- "step": 3500
 
 
 
 
111
  },
112
  {
113
- "epoch": 0.96,
114
- "eval_accuracy": 93.89999999999999,
115
- "eval_average_metrics": 93.03472400513479,
116
- "eval_f1": 92.16944801026958,
117
- "eval_loss": 0.06309271603822708,
118
- "eval_runtime": 2.7979,
119
- "eval_samples_per_second": 357.412,
 
 
 
 
 
 
120
  "step": 3500
121
  },
122
  {
123
- "epoch": 1.1,
124
- "learning_rate": 0.00019891210166554345,
125
- "loss": 0.0751,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  "step": 4000
127
  },
128
  {
129
- "epoch": 1.1,
130
- "eval_accuracy": 94.89999999999999,
131
- "eval_average_metrics": 94.05452729693741,
132
- "eval_f1": 93.20905459387484,
133
- "eval_loss": 0.057457663118839264,
134
- "eval_runtime": 2.8329,
135
- "eval_samples_per_second": 352.998,
136
  "step": 4000
137
  },
138
  {
139
- "epoch": 1.24,
140
- "learning_rate": 0.00018447097333204967,
141
- "loss": 0.0753,
142
- "step": 4500
 
 
 
 
143
  },
144
  {
145
- "epoch": 1.24,
146
- "eval_accuracy": 93.8,
147
- "eval_average_metrics": 92.82105263157894,
148
- "eval_f1": 91.84210526315789,
149
- "eval_loss": 0.06179063394665718,
150
- "eval_runtime": 2.8497,
151
- "eval_samples_per_second": 350.919,
 
 
 
 
 
 
152
  "step": 4500
153
  },
154
  {
155
- "epoch": 1.38,
156
- "learning_rate": 0.00017002984499855586,
157
- "loss": 0.0756,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  "step": 5000
159
  },
160
  {
161
- "epoch": 1.38,
162
- "eval_accuracy": 94.3,
163
- "eval_average_metrics": 93.3751655629139,
164
- "eval_f1": 92.45033112582782,
165
- "eval_loss": 0.059713296592235565,
166
- "eval_runtime": 2.8157,
167
- "eval_samples_per_second": 355.153,
168
  "step": 5000
169
  },
170
  {
171
- "epoch": 1.52,
172
- "learning_rate": 0.00015558871666506208,
173
- "loss": 0.0745,
174
- "step": 5500
 
 
 
 
175
  },
176
  {
177
- "epoch": 1.52,
178
- "eval_accuracy": 94.6,
179
- "eval_average_metrics": 93.78437500000001,
180
- "eval_f1": 92.96875000000001,
181
- "eval_loss": 0.05805646628141403,
182
- "eval_runtime": 2.8217,
183
- "eval_samples_per_second": 354.401,
 
 
 
 
 
 
184
  "step": 5500
185
  },
186
  {
187
- "epoch": 1.65,
188
- "learning_rate": 0.0001411475883315683,
189
- "loss": 0.0738,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  "step": 6000
191
  },
192
  {
193
- "epoch": 1.65,
194
- "eval_accuracy": 94.19999999999999,
195
- "eval_average_metrics": 93.32395833333332,
196
- "eval_f1": 92.44791666666666,
197
- "eval_loss": 0.06040719524025917,
198
- "eval_runtime": 2.8379,
199
- "eval_samples_per_second": 352.375,
200
  "step": 6000
201
  },
202
  {
203
- "epoch": 1.79,
204
- "learning_rate": 0.00012670645999807452,
205
- "loss": 0.0736,
206
- "step": 6500
 
 
 
 
207
  },
208
  {
209
- "epoch": 1.79,
210
- "eval_accuracy": 94.5,
211
- "eval_average_metrics": 93.59794156706508,
212
- "eval_f1": 92.69588313413014,
213
- "eval_loss": 0.05985904857516289,
214
- "eval_runtime": 2.7565,
215
- "eval_samples_per_second": 362.781,
 
 
 
 
 
 
216
  "step": 6500
217
  },
218
  {
219
- "epoch": 1.93,
220
- "learning_rate": 0.00011226533166458071,
221
- "loss": 0.0741,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  "step": 7000
223
  },
224
  {
225
- "epoch": 1.93,
226
- "eval_accuracy": 94.39999999999999,
227
- "eval_average_metrics": 93.5254593175853,
228
- "eval_f1": 92.6509186351706,
229
- "eval_loss": 0.058951567858457565,
230
- "eval_runtime": 2.7279,
231
- "eval_samples_per_second": 366.586,
232
  "step": 7000
233
  },
234
  {
235
- "epoch": 2.07,
236
- "learning_rate": 9.782420333108692e-05,
237
- "loss": 0.0726,
238
- "step": 7500
 
 
 
 
239
  },
240
  {
241
- "epoch": 2.07,
242
- "eval_accuracy": 94.19999999999999,
243
- "eval_average_metrics": 93.22299465240641,
244
- "eval_f1": 92.24598930481282,
245
- "eval_loss": 0.05785064399242401,
246
- "eval_runtime": 2.8767,
247
- "eval_samples_per_second": 347.617,
 
 
 
 
 
 
248
  "step": 7500
249
  },
250
  {
251
- "epoch": 2.2,
252
- "learning_rate": 8.338307499759314e-05,
253
- "loss": 0.0738,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  "step": 8000
255
  },
256
  {
257
- "epoch": 2.2,
258
- "eval_accuracy": 94.5,
259
- "eval_average_metrics": 93.65522875816993,
260
- "eval_f1": 92.81045751633987,
261
- "eval_loss": 0.05999515578150749,
262
- "eval_runtime": 2.7717,
263
- "eval_samples_per_second": 360.784,
264
  "step": 8000
265
  },
266
  {
267
- "epoch": 2.34,
268
- "learning_rate": 6.894194666409934e-05,
269
- "loss": 0.0742,
270
- "step": 8500
 
 
 
 
271
  },
272
  {
273
- "epoch": 2.34,
274
- "eval_accuracy": 94.6,
275
- "eval_average_metrics": 93.75669291338582,
276
- "eval_f1": 92.91338582677164,
277
- "eval_loss": 0.05879341810941696,
278
- "eval_runtime": 2.8642,
279
- "eval_samples_per_second": 349.137,
 
 
 
 
 
 
280
  "step": 8500
281
  },
282
  {
283
- "epoch": 2.48,
284
- "learning_rate": 5.450081833060556e-05,
285
- "loss": 0.0721,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  "step": 9000
287
  },
288
  {
289
- "epoch": 2.48,
290
- "eval_accuracy": 94.8,
291
- "eval_average_metrics": 93.96084656084656,
292
- "eval_f1": 93.12169312169313,
293
- "eval_loss": 0.05719929561018944,
294
- "eval_runtime": 2.8448,
295
- "eval_samples_per_second": 351.524,
296
  "step": 9000
297
  },
298
  {
299
- "epoch": 2.62,
300
- "learning_rate": 4.005968999711177e-05,
301
- "loss": 0.0719,
302
- "step": 9500
 
 
 
 
303
  },
304
  {
305
- "epoch": 2.62,
306
  "eval_accuracy": 94.39999999999999,
307
- "eval_average_metrics": 93.48647214854111,
308
- "eval_f1": 92.57294429708222,
309
- "eval_loss": 0.058148905634880066,
310
- "eval_runtime": 2.7609,
311
- "eval_samples_per_second": 362.202,
 
 
 
 
 
 
312
  "step": 9500
313
  },
314
  {
315
- "epoch": 2.76,
316
- "learning_rate": 2.561856166361798e-05,
317
- "loss": 0.073,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  "step": 10000
319
  },
320
  {
321
- "epoch": 2.76,
322
- "eval_accuracy": 94.3,
323
- "eval_average_metrics": 93.40492772667542,
324
- "eval_f1": 92.50985545335085,
325
- "eval_loss": 0.059017885476350784,
326
- "eval_runtime": 2.8496,
327
- "eval_samples_per_second": 350.923,
328
  "step": 10000
329
  },
330
  {
331
- "epoch": 2.89,
332
- "learning_rate": 1.1177433330124192e-05,
333
- "loss": 0.0727,
334
- "step": 10500
 
 
 
 
335
  },
336
  {
337
- "epoch": 2.89,
338
- "eval_accuracy": 94.5,
339
- "eval_average_metrics": 93.63633377135348,
340
- "eval_f1": 92.77266754270696,
341
- "eval_loss": 0.058137666434049606,
342
- "eval_runtime": 2.7834,
343
- "eval_samples_per_second": 359.279,
 
 
 
 
 
 
344
  "step": 10500
345
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  {
347
  "epoch": 3.0,
348
- "step": 10887,
349
- "total_flos": 1.2488641630509658e+17,
350
- "train_loss": 0.1380685100413512,
351
- "train_runtime": 3830.7041,
352
- "train_samples_per_second": 284.161,
353
- "train_steps_per_second": 2.842
354
  }
355
  ],
356
- "max_steps": 10887,
357
  "num_train_epochs": 3,
358
- "total_flos": 1.2488641630509658e+17,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
 
1
  {
2
+ "best_metric": 94.22774869109946,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/qqp/checkpoint-8600",
4
  "epoch": 3.0,
5
+ "global_step": 34017,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.02,
12
+ "eval_accuracy": 93.60000000000001,
13
+ "eval_average_metrics": 92.41643835616439,
14
+ "eval_f1": 91.23287671232877,
15
+ "eval_loss": 0.06451932340860367,
16
+ "eval_runtime": 4.5218,
17
+ "eval_samples_per_second": 221.15,
18
+ "step": 200
19
  },
20
  {
21
+ "epoch": 0.04,
22
+ "eval_accuracy": 90.2,
23
+ "eval_average_metrics": 89.15339805825244,
24
+ "eval_f1": 88.10679611650487,
25
+ "eval_loss": 0.08826350420713425,
26
+ "eval_runtime": 4.5364,
27
+ "eval_samples_per_second": 220.438,
28
+ "step": 400
29
+ },
30
+ {
31
+ "epoch": 0.04,
32
+ "learning_rate": 0.00029559044007408056,
33
+ "loss": 0.1448,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 0.05,
38
+ "eval_accuracy": 93.7,
39
+ "eval_average_metrics": 92.76439688715953,
40
+ "eval_f1": 91.82879377431907,
41
+ "eval_loss": 0.061929114162921906,
42
+ "eval_runtime": 4.5766,
43
+ "eval_samples_per_second": 218.503,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.07,
48
+ "eval_accuracy": 93.30000000000001,
49
+ "eval_average_metrics": 92.3385456885457,
50
+ "eval_f1": 91.37709137709138,
51
+ "eval_loss": 0.0669504851102829,
52
+ "eval_runtime": 4.5237,
53
+ "eval_samples_per_second": 221.059,
54
+ "step": 800
55
+ },
56
+ {
57
+ "epoch": 0.09,
58
+ "learning_rate": 0.0002911808801481612,
59
+ "loss": 0.0782,
60
  "step": 1000
61
  },
62
  {
63
+ "epoch": 0.09,
64
+ "eval_accuracy": 93.7,
65
+ "eval_average_metrics": 92.74308996088658,
66
+ "eval_f1": 91.78617992177314,
67
+ "eval_loss": 0.06603064388036728,
68
+ "eval_runtime": 4.727,
69
+ "eval_samples_per_second": 211.549,
70
  "step": 1000
71
  },
72
  {
73
+ "epoch": 0.11,
74
+ "eval_accuracy": 93.30000000000001,
75
+ "eval_average_metrics": 92.14125168236878,
76
+ "eval_f1": 90.98250336473754,
77
+ "eval_loss": 0.06394415348768234,
78
+ "eval_runtime": 4.6179,
79
+ "eval_samples_per_second": 216.548,
80
+ "step": 1200
81
  },
82
  {
83
+ "epoch": 0.12,
84
+ "eval_accuracy": 93.30000000000001,
85
+ "eval_average_metrics": 92.07974079126876,
86
+ "eval_f1": 90.85948158253751,
87
+ "eval_loss": 0.059915054589509964,
88
+ "eval_runtime": 4.4912,
89
+ "eval_samples_per_second": 222.655,
90
+ "step": 1400
91
+ },
92
+ {
93
+ "epoch": 0.13,
94
+ "learning_rate": 0.0002867713202222418,
95
+ "loss": 0.0773,
96
  "step": 1500
97
  },
98
  {
99
+ "epoch": 0.14,
100
+ "eval_accuracy": 93.0,
101
+ "eval_average_metrics": 92.0242966751918,
102
+ "eval_f1": 91.04859335038363,
103
+ "eval_loss": 0.06896140426397324,
104
+ "eval_runtime": 4.614,
105
+ "eval_samples_per_second": 216.732,
106
+ "step": 1600
107
+ },
108
+ {
109
+ "epoch": 0.16,
110
+ "eval_accuracy": 93.60000000000001,
111
+ "eval_average_metrics": 92.61151832460735,
112
+ "eval_f1": 91.62303664921467,
113
+ "eval_loss": 0.06234096363186836,
114
+ "eval_runtime": 4.6818,
115
+ "eval_samples_per_second": 213.593,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 0.18,
120
+ "learning_rate": 0.0002823617602963224,
121
+ "loss": 0.0746,
122
  "step": 2000
123
  },
124
  {
125
+ "epoch": 0.18,
126
+ "eval_accuracy": 94.19999999999999,
127
+ "eval_average_metrics": 93.23333333333332,
128
+ "eval_f1": 92.26666666666667,
129
+ "eval_loss": 0.0571160614490509,
130
+ "eval_runtime": 4.7897,
131
+ "eval_samples_per_second": 208.783,
132
  "step": 2000
133
  },
134
  {
135
+ "epoch": 0.19,
136
+ "eval_accuracy": 93.60000000000001,
137
+ "eval_average_metrics": 92.54468085106384,
138
+ "eval_f1": 91.48936170212767,
139
+ "eval_loss": 0.059621669352054596,
140
+ "eval_runtime": 4.6087,
141
+ "eval_samples_per_second": 216.979,
142
+ "step": 2200
143
  },
144
  {
145
+ "epoch": 0.21,
146
+ "eval_accuracy": 93.60000000000001,
147
+ "eval_average_metrics": 92.36786703601109,
148
+ "eval_f1": 91.13573407202216,
149
+ "eval_loss": 0.06162749230861664,
150
+ "eval_runtime": 4.7312,
151
+ "eval_samples_per_second": 211.361,
152
+ "step": 2400
153
+ },
154
+ {
155
+ "epoch": 0.22,
156
+ "learning_rate": 0.00027795220037040303,
157
+ "loss": 0.0771,
158
  "step": 2500
159
  },
160
  {
161
+ "epoch": 0.23,
162
+ "eval_accuracy": 93.10000000000001,
163
+ "eval_average_metrics": 91.91912751677853,
164
+ "eval_f1": 90.73825503355705,
165
+ "eval_loss": 0.06196223199367523,
166
+ "eval_runtime": 4.4778,
167
+ "eval_samples_per_second": 223.322,
168
+ "step": 2600
169
+ },
170
+ {
171
+ "epoch": 0.25,
172
+ "eval_accuracy": 93.7,
173
+ "eval_average_metrics": 92.72155963302754,
174
+ "eval_f1": 91.74311926605506,
175
+ "eval_loss": 0.059104129672050476,
176
+ "eval_runtime": 4.462,
177
+ "eval_samples_per_second": 224.113,
178
+ "step": 2800
179
+ },
180
+ {
181
+ "epoch": 0.26,
182
+ "learning_rate": 0.0002735426404444836,
183
  "loss": 0.0764,
184
  "step": 3000
185
  },
186
  {
187
+ "epoch": 0.26,
188
+ "eval_accuracy": 94.1,
189
+ "eval_average_metrics": 93.1633069828722,
190
+ "eval_f1": 92.2266139657444,
191
+ "eval_loss": 0.059123676270246506,
192
+ "eval_runtime": 4.5787,
193
+ "eval_samples_per_second": 218.4,
194
  "step": 3000
195
  },
196
  {
197
+ "epoch": 0.28,
198
+ "eval_accuracy": 91.5,
199
+ "eval_average_metrics": 90.49660074165637,
200
+ "eval_f1": 89.49320148331273,
201
+ "eval_loss": 0.07883985340595245,
202
+ "eval_runtime": 4.5148,
203
+ "eval_samples_per_second": 221.496,
204
+ "step": 3200
205
  },
206
  {
207
+ "epoch": 0.3,
208
+ "eval_accuracy": 93.10000000000001,
209
+ "eval_average_metrics": 92.02837483617301,
210
+ "eval_f1": 90.956749672346,
211
+ "eval_loss": 0.06356123834848404,
212
+ "eval_runtime": 4.5836,
213
+ "eval_samples_per_second": 218.171,
214
+ "step": 3400
215
+ },
216
+ {
217
+ "epoch": 0.31,
218
+ "learning_rate": 0.0002691330805185642,
219
+ "loss": 0.0732,
220
  "step": 3500
221
  },
222
  {
223
+ "epoch": 0.32,
224
+ "eval_accuracy": 93.7,
225
+ "eval_average_metrics": 92.598987854251,
226
+ "eval_f1": 91.49797570850201,
227
+ "eval_loss": 0.058661118149757385,
228
+ "eval_runtime": 4.581,
229
+ "eval_samples_per_second": 218.292,
230
+ "step": 3600
231
+ },
232
+ {
233
+ "epoch": 0.34,
234
+ "eval_accuracy": 93.10000000000001,
235
+ "eval_average_metrics": 92.00454545454545,
236
+ "eval_f1": 90.90909090909089,
237
+ "eval_loss": 0.06345341354608536,
238
+ "eval_runtime": 4.6337,
239
+ "eval_samples_per_second": 215.808,
240
+ "step": 3800
241
+ },
242
+ {
243
+ "epoch": 0.35,
244
+ "learning_rate": 0.00026472352059264486,
245
+ "loss": 0.0745,
246
  "step": 4000
247
  },
248
  {
249
+ "epoch": 0.35,
250
+ "eval_accuracy": 92.7,
251
+ "eval_average_metrics": 91.72389100126742,
252
+ "eval_f1": 90.74778200253485,
253
+ "eval_loss": 0.07112478464841843,
254
+ "eval_runtime": 4.5897,
255
+ "eval_samples_per_second": 217.881,
256
  "step": 4000
257
  },
258
  {
259
+ "epoch": 0.37,
260
+ "eval_accuracy": 92.4,
261
+ "eval_average_metrics": 91.38987341772153,
262
+ "eval_f1": 90.37974683544304,
263
+ "eval_loss": 0.06926184892654419,
264
+ "eval_runtime": 4.5334,
265
+ "eval_samples_per_second": 220.585,
266
+ "step": 4200
267
  },
268
  {
269
+ "epoch": 0.39,
270
+ "eval_accuracy": 94.19999999999999,
271
+ "eval_average_metrics": 93.2021505376344,
272
+ "eval_f1": 92.20430107526882,
273
+ "eval_loss": 0.06072888895869255,
274
+ "eval_runtime": 4.7545,
275
+ "eval_samples_per_second": 210.329,
276
+ "step": 4400
277
+ },
278
+ {
279
+ "epoch": 0.4,
280
+ "learning_rate": 0.00026031396066672545,
281
+ "loss": 0.0766,
282
  "step": 4500
283
  },
284
  {
285
+ "epoch": 0.41,
286
+ "eval_accuracy": 93.89999999999999,
287
+ "eval_average_metrics": 92.8779038718291,
288
+ "eval_f1": 91.85580774365822,
289
+ "eval_loss": 0.06117413192987442,
290
+ "eval_runtime": 4.5554,
291
+ "eval_samples_per_second": 219.52,
292
+ "step": 4600
293
+ },
294
+ {
295
+ "epoch": 0.42,
296
+ "eval_accuracy": 94.0,
297
+ "eval_average_metrics": 93.13402061855669,
298
+ "eval_f1": 92.2680412371134,
299
+ "eval_loss": 0.060421667993068695,
300
+ "eval_runtime": 4.5444,
301
+ "eval_samples_per_second": 220.049,
302
+ "step": 4800
303
+ },
304
+ {
305
+ "epoch": 0.44,
306
+ "learning_rate": 0.00025590440074080604,
307
+ "loss": 0.0729,
308
  "step": 5000
309
  },
310
  {
311
+ "epoch": 0.44,
312
+ "eval_accuracy": 93.0,
313
+ "eval_average_metrics": 91.95454545454547,
314
+ "eval_f1": 90.90909090909092,
315
+ "eval_loss": 0.06169410049915314,
316
+ "eval_runtime": 4.6688,
317
+ "eval_samples_per_second": 214.188,
318
  "step": 5000
319
  },
320
  {
321
+ "epoch": 0.46,
322
+ "eval_accuracy": 94.3,
323
+ "eval_average_metrics": 93.3244966442953,
324
+ "eval_f1": 92.34899328859059,
325
+ "eval_loss": 0.0613214485347271,
326
+ "eval_runtime": 4.5049,
327
+ "eval_samples_per_second": 221.979,
328
+ "step": 5200
329
  },
330
  {
331
+ "epoch": 0.48,
332
+ "eval_accuracy": 93.7,
333
+ "eval_average_metrics": 92.71070959264125,
334
+ "eval_f1": 91.72141918528251,
335
+ "eval_loss": 0.06161003187298775,
336
+ "eval_runtime": 4.5685,
337
+ "eval_samples_per_second": 218.891,
338
+ "step": 5400
339
+ },
340
+ {
341
+ "epoch": 0.49,
342
+ "learning_rate": 0.0002514948408148867,
343
+ "loss": 0.071,
344
  "step": 5500
345
  },
346
  {
347
+ "epoch": 0.49,
348
+ "eval_accuracy": 93.89999999999999,
349
+ "eval_average_metrics": 92.95262123197902,
350
+ "eval_f1": 92.00524246395806,
351
+ "eval_loss": 0.06118384748697281,
352
+ "eval_runtime": 4.564,
353
+ "eval_samples_per_second": 219.107,
354
+ "step": 5600
355
+ },
356
+ {
357
+ "epoch": 0.51,
358
+ "eval_accuracy": 93.0,
359
+ "eval_average_metrics": 92.0470737913486,
360
+ "eval_f1": 91.09414758269719,
361
+ "eval_loss": 0.06946446746587753,
362
+ "eval_runtime": 4.5787,
363
+ "eval_samples_per_second": 218.4,
364
+ "step": 5800
365
+ },
366
+ {
367
+ "epoch": 0.53,
368
+ "learning_rate": 0.0002470852808889673,
369
+ "loss": 0.0748,
370
  "step": 6000
371
  },
372
  {
373
+ "epoch": 0.53,
374
+ "eval_accuracy": 94.6,
375
+ "eval_average_metrics": 93.7095744680851,
376
+ "eval_f1": 92.81914893617021,
377
+ "eval_loss": 0.05765092372894287,
378
+ "eval_runtime": 4.5272,
379
+ "eval_samples_per_second": 220.889,
380
  "step": 6000
381
  },
382
  {
383
+ "epoch": 0.55,
384
+ "eval_accuracy": 94.3,
385
+ "eval_average_metrics": 93.3244966442953,
386
+ "eval_f1": 92.34899328859059,
387
+ "eval_loss": 0.05894589051604271,
388
+ "eval_runtime": 4.6099,
389
+ "eval_samples_per_second": 216.924,
390
+ "step": 6200
391
  },
392
  {
393
+ "epoch": 0.56,
394
+ "eval_accuracy": 93.7,
395
+ "eval_average_metrics": 92.598987854251,
396
+ "eval_f1": 91.49797570850201,
397
+ "eval_loss": 0.061102479696273804,
398
+ "eval_runtime": 4.6948,
399
+ "eval_samples_per_second": 213.001,
400
+ "step": 6400
401
+ },
402
+ {
403
+ "epoch": 0.57,
404
+ "learning_rate": 0.00024267572096304786,
405
+ "loss": 0.074,
406
  "step": 6500
407
  },
408
  {
409
+ "epoch": 0.58,
410
+ "eval_accuracy": 93.0,
411
+ "eval_average_metrics": 92.03571428571429,
412
+ "eval_f1": 91.07142857142858,
413
+ "eval_loss": 0.06452207267284393,
414
+ "eval_runtime": 4.6106,
415
+ "eval_samples_per_second": 216.891,
416
+ "step": 6600
417
+ },
418
+ {
419
+ "epoch": 0.6,
420
+ "eval_accuracy": 93.5,
421
+ "eval_average_metrics": 92.55645161290323,
422
+ "eval_f1": 91.61290322580645,
423
+ "eval_loss": 0.05938281863927841,
424
+ "eval_runtime": 4.5228,
425
+ "eval_samples_per_second": 221.102,
426
+ "step": 6800
427
+ },
428
+ {
429
+ "epoch": 0.62,
430
+ "learning_rate": 0.00023826616103712848,
431
+ "loss": 0.0738,
432
  "step": 7000
433
  },
434
  {
435
+ "epoch": 0.62,
436
+ "eval_accuracy": 94.6,
437
+ "eval_average_metrics": 93.77519582245431,
438
+ "eval_f1": 92.95039164490862,
439
+ "eval_loss": 0.057858582586050034,
440
+ "eval_runtime": 4.5704,
441
+ "eval_samples_per_second": 218.797,
442
  "step": 7000
443
  },
444
  {
445
+ "epoch": 0.63,
446
+ "eval_accuracy": 94.6,
447
+ "eval_average_metrics": 93.71909814323607,
448
+ "eval_f1": 92.83819628647215,
449
+ "eval_loss": 0.05671229586005211,
450
+ "eval_runtime": 4.4966,
451
+ "eval_samples_per_second": 222.39,
452
+ "step": 7200
453
  },
454
  {
455
+ "epoch": 0.65,
456
+ "eval_accuracy": 94.5,
457
+ "eval_average_metrics": 93.64580602883355,
458
+ "eval_f1": 92.7916120576671,
459
+ "eval_loss": 0.059491805732250214,
460
+ "eval_runtime": 4.5973,
461
+ "eval_samples_per_second": 217.521,
462
+ "step": 7400
463
+ },
464
+ {
465
+ "epoch": 0.66,
466
+ "learning_rate": 0.00023385660111120907,
467
+ "loss": 0.0746,
468
  "step": 7500
469
  },
470
  {
471
+ "epoch": 0.67,
472
+ "eval_accuracy": 94.5,
473
+ "eval_average_metrics": 93.53879892037787,
474
+ "eval_f1": 92.57759784075573,
475
+ "eval_loss": 0.057486891746520996,
476
+ "eval_runtime": 4.6372,
477
+ "eval_samples_per_second": 215.649,
478
+ "step": 7600
479
+ },
480
+ {
481
+ "epoch": 0.69,
482
+ "eval_accuracy": 94.69999999999999,
483
+ "eval_average_metrics": 93.81194926568757,
484
+ "eval_f1": 92.92389853137516,
485
+ "eval_loss": 0.05628298968076706,
486
+ "eval_runtime": 4.6937,
487
+ "eval_samples_per_second": 213.051,
488
+ "step": 7800
489
+ },
490
+ {
491
+ "epoch": 0.71,
492
+ "learning_rate": 0.0002294470411852897,
493
+ "loss": 0.0762,
494
  "step": 8000
495
  },
496
  {
497
+ "epoch": 0.71,
498
+ "eval_accuracy": 94.39999999999999,
499
+ "eval_average_metrics": 93.4566844919786,
500
+ "eval_f1": 92.51336898395722,
501
+ "eval_loss": 0.05849047377705574,
502
+ "eval_runtime": 4.6139,
503
+ "eval_samples_per_second": 216.737,
504
  "step": 8000
505
  },
506
  {
507
+ "epoch": 0.72,
508
+ "eval_accuracy": 94.69999999999999,
509
+ "eval_average_metrics": 93.86773981603153,
510
+ "eval_f1": 93.03547963206307,
511
+ "eval_loss": 0.056792281568050385,
512
+ "eval_runtime": 4.6916,
513
+ "eval_samples_per_second": 213.147,
514
+ "step": 8200
515
  },
516
  {
517
+ "epoch": 0.74,
518
+ "eval_accuracy": 94.5,
519
+ "eval_average_metrics": 93.59794156706508,
520
+ "eval_f1": 92.69588313413014,
521
+ "eval_loss": 0.05638590082526207,
522
+ "eval_runtime": 4.6952,
523
+ "eval_samples_per_second": 212.982,
524
+ "step": 8400
525
+ },
526
+ {
527
+ "epoch": 0.75,
528
+ "learning_rate": 0.0002250374812593703,
529
+ "loss": 0.0726,
530
  "step": 8500
531
  },
532
  {
533
+ "epoch": 0.76,
534
+ "eval_accuracy": 95.0,
535
+ "eval_average_metrics": 94.22774869109946,
536
+ "eval_f1": 93.45549738219894,
537
+ "eval_loss": 0.055720701813697815,
538
+ "eval_runtime": 4.5004,
539
+ "eval_samples_per_second": 222.204,
540
+ "step": 8600
541
+ },
542
+ {
543
+ "epoch": 0.78,
544
+ "eval_accuracy": 94.0,
545
+ "eval_average_metrics": 93.08355091383812,
546
+ "eval_f1": 92.16710182767625,
547
+ "eval_loss": 0.06084197014570236,
548
+ "eval_runtime": 4.5822,
549
+ "eval_samples_per_second": 218.238,
550
+ "step": 8800
551
+ },
552
+ {
553
+ "epoch": 0.79,
554
+ "learning_rate": 0.0002206279213334509,
555
+ "loss": 0.0734,
556
  "step": 9000
557
  },
558
  {
559
+ "epoch": 0.79,
560
+ "eval_accuracy": 93.10000000000001,
561
+ "eval_average_metrics": 92.14386973180078,
562
+ "eval_f1": 91.18773946360155,
563
+ "eval_loss": 0.06530317664146423,
564
+ "eval_runtime": 4.5035,
565
+ "eval_samples_per_second": 222.05,
566
  "step": 9000
567
  },
568
  {
569
+ "epoch": 0.81,
570
+ "eval_accuracy": 93.8,
571
+ "eval_average_metrics": 92.76666666666665,
572
+ "eval_f1": 91.73333333333332,
573
+ "eval_loss": 0.05946441367268562,
574
+ "eval_runtime": 4.8109,
575
+ "eval_samples_per_second": 207.861,
576
+ "step": 9200
577
  },
578
  {
579
+ "epoch": 0.83,
580
  "eval_accuracy": 94.39999999999999,
581
+ "eval_average_metrics": 93.4566844919786,
582
+ "eval_f1": 92.51336898395722,
583
+ "eval_loss": 0.059339020401239395,
584
+ "eval_runtime": 4.5265,
585
+ "eval_samples_per_second": 220.922,
586
+ "step": 9400
587
+ },
588
+ {
589
+ "epoch": 0.84,
590
+ "learning_rate": 0.00021621836140753152,
591
+ "loss": 0.0731,
592
  "step": 9500
593
  },
594
  {
595
+ "epoch": 0.85,
596
+ "eval_accuracy": 92.60000000000001,
597
+ "eval_average_metrics": 91.64005037783375,
598
+ "eval_f1": 90.6801007556675,
599
+ "eval_loss": 0.07186109572649002,
600
+ "eval_runtime": 4.5431,
601
+ "eval_samples_per_second": 220.114,
602
+ "step": 9600
603
+ },
604
+ {
605
+ "epoch": 0.86,
606
+ "eval_accuracy": 94.1,
607
+ "eval_average_metrics": 93.1633069828722,
608
+ "eval_f1": 92.2266139657444,
609
+ "eval_loss": 0.05946135148406029,
610
+ "eval_runtime": 4.5655,
611
+ "eval_samples_per_second": 219.036,
612
+ "step": 9800
613
+ },
614
+ {
615
+ "epoch": 0.88,
616
+ "learning_rate": 0.0002118088014816121,
617
+ "loss": 0.0733,
618
  "step": 10000
619
  },
620
  {
621
+ "epoch": 0.88,
622
+ "eval_accuracy": 93.89999999999999,
623
+ "eval_average_metrics": 92.95262123197902,
624
+ "eval_f1": 92.00524246395806,
625
+ "eval_loss": 0.06076710671186447,
626
+ "eval_runtime": 4.5407,
627
+ "eval_samples_per_second": 220.229,
628
  "step": 10000
629
  },
630
  {
631
+ "epoch": 0.9,
632
+ "eval_accuracy": 94.3,
633
+ "eval_average_metrics": 93.34492656875835,
634
+ "eval_f1": 92.3898531375167,
635
+ "eval_loss": 0.055939000099897385,
636
+ "eval_runtime": 4.4996,
637
+ "eval_samples_per_second": 222.24,
638
+ "step": 10200
639
  },
640
  {
641
+ "epoch": 0.92,
642
+ "eval_accuracy": 94.8,
643
+ "eval_average_metrics": 93.96084656084656,
644
+ "eval_f1": 93.12169312169313,
645
+ "eval_loss": 0.05636580288410187,
646
+ "eval_runtime": 4.4643,
647
+ "eval_samples_per_second": 223.998,
648
+ "step": 10400
649
+ },
650
+ {
651
+ "epoch": 0.93,
652
+ "learning_rate": 0.00020739924155569272,
653
+ "loss": 0.0738,
654
  "step": 10500
655
  },
656
+ {
657
+ "epoch": 0.93,
658
+ "eval_accuracy": 93.8,
659
+ "eval_average_metrics": 92.71081081081081,
660
+ "eval_f1": 91.62162162162161,
661
+ "eval_loss": 0.059175312519073486,
662
+ "eval_runtime": 4.7877,
663
+ "eval_samples_per_second": 208.867,
664
+ "step": 10600
665
+ },
666
+ {
667
+ "epoch": 0.95,
668
+ "eval_accuracy": 93.89999999999999,
669
+ "eval_average_metrics": 92.91026490066224,
670
+ "eval_f1": 91.9205298013245,
671
+ "eval_loss": 0.059644319117069244,
672
+ "eval_runtime": 4.6781,
673
+ "eval_samples_per_second": 213.761,
674
+ "step": 10800
675
+ },
676
+ {
677
+ "epoch": 0.97,
678
+ "learning_rate": 0.0002029896816297733,
679
+ "loss": 0.0752,
680
+ "step": 11000
681
+ },
682
+ {
683
+ "epoch": 0.97,
684
+ "eval_accuracy": 93.60000000000001,
685
+ "eval_average_metrics": 92.622454308094,
686
+ "eval_f1": 91.644908616188,
687
+ "eval_loss": 0.061212606728076935,
688
+ "eval_runtime": 4.4874,
689
+ "eval_samples_per_second": 222.845,
690
+ "step": 11000
691
+ },
692
+ {
693
+ "epoch": 0.99,
694
+ "eval_accuracy": 94.3,
695
+ "eval_average_metrics": 93.41474442988203,
696
+ "eval_f1": 92.52948885976409,
697
+ "eval_loss": 0.059587035328149796,
698
+ "eval_runtime": 4.5784,
699
+ "eval_samples_per_second": 218.418,
700
+ "step": 11200
701
+ },
702
+ {
703
+ "epoch": 1.01,
704
+ "eval_accuracy": 94.1,
705
+ "eval_average_metrics": 93.2533462033462,
706
+ "eval_f1": 92.4066924066924,
707
+ "eval_loss": 0.060919877141714096,
708
+ "eval_runtime": 4.5416,
709
+ "eval_samples_per_second": 220.185,
710
+ "step": 11400
711
+ },
712
+ {
713
+ "epoch": 1.01,
714
+ "learning_rate": 0.00019858012170385393,
715
+ "loss": 0.0716,
716
+ "step": 11500
717
+ },
718
+ {
719
+ "epoch": 1.02,
720
+ "eval_accuracy": 94.1,
721
+ "eval_average_metrics": 93.11141522029372,
722
+ "eval_f1": 92.12283044058745,
723
+ "eval_loss": 0.05961688980460167,
724
+ "eval_runtime": 4.5998,
725
+ "eval_samples_per_second": 217.402,
726
+ "step": 11600
727
+ },
728
+ {
729
+ "epoch": 1.04,
730
+ "eval_accuracy": 94.0,
731
+ "eval_average_metrics": 93.0212201591512,
732
+ "eval_f1": 92.04244031830238,
733
+ "eval_loss": 0.06122226640582085,
734
+ "eval_runtime": 4.6213,
735
+ "eval_samples_per_second": 216.391,
736
+ "step": 11800
737
+ },
738
+ {
739
+ "epoch": 1.06,
740
+ "learning_rate": 0.00019417056177793455,
741
+ "loss": 0.0713,
742
+ "step": 12000
743
+ },
744
+ {
745
+ "epoch": 1.06,
746
+ "eval_accuracy": 94.0,
747
+ "eval_average_metrics": 92.96774193548387,
748
+ "eval_f1": 91.93548387096774,
749
+ "eval_loss": 0.06119931861758232,
750
+ "eval_runtime": 4.5888,
751
+ "eval_samples_per_second": 217.92,
752
+ "step": 12000
753
+ },
754
+ {
755
+ "epoch": 1.08,
756
+ "eval_accuracy": 94.3,
757
+ "eval_average_metrics": 93.31419919246298,
758
+ "eval_f1": 92.32839838492598,
759
+ "eval_loss": 0.05847727879881859,
760
+ "eval_runtime": 4.4633,
761
+ "eval_samples_per_second": 224.05,
762
+ "step": 12200
763
+ },
764
+ {
765
+ "epoch": 1.09,
766
+ "eval_accuracy": 93.4,
767
+ "eval_average_metrics": 92.39190600522193,
768
+ "eval_f1": 91.38381201044386,
769
+ "eval_loss": 0.06247144192457199,
770
+ "eval_runtime": 4.5667,
771
+ "eval_samples_per_second": 218.978,
772
+ "step": 12400
773
+ },
774
+ {
775
+ "epoch": 1.1,
776
+ "learning_rate": 0.00018976100185201514,
777
+ "loss": 0.0687,
778
+ "step": 12500
779
+ },
780
+ {
781
+ "epoch": 1.11,
782
+ "eval_accuracy": 93.89999999999999,
783
+ "eval_average_metrics": 93.04475032010242,
784
+ "eval_f1": 92.18950064020484,
785
+ "eval_loss": 0.0635332465171814,
786
+ "eval_runtime": 4.5944,
787
+ "eval_samples_per_second": 217.654,
788
+ "step": 12600
789
+ },
790
+ {
791
+ "epoch": 1.13,
792
+ "eval_accuracy": 94.0,
793
+ "eval_average_metrics": 93.05263157894737,
794
+ "eval_f1": 92.10526315789474,
795
+ "eval_loss": 0.06063272804021835,
796
+ "eval_runtime": 4.5058,
797
+ "eval_samples_per_second": 221.934,
798
+ "step": 12800
799
+ },
800
+ {
801
+ "epoch": 1.15,
802
+ "learning_rate": 0.00018535144192609576,
803
+ "loss": 0.0711,
804
+ "step": 13000
805
+ },
806
+ {
807
+ "epoch": 1.15,
808
+ "eval_accuracy": 94.0,
809
+ "eval_average_metrics": 93.12403100775194,
810
+ "eval_f1": 92.24806201550389,
811
+ "eval_loss": 0.06045162305235863,
812
+ "eval_runtime": 4.6598,
813
+ "eval_samples_per_second": 214.601,
814
+ "step": 13000
815
+ },
816
+ {
817
+ "epoch": 1.16,
818
+ "eval_accuracy": 94.39999999999999,
819
+ "eval_average_metrics": 93.50606860158311,
820
+ "eval_f1": 92.61213720316623,
821
+ "eval_loss": 0.06117108836770058,
822
+ "eval_runtime": 4.501,
823
+ "eval_samples_per_second": 222.172,
824
+ "step": 13200
825
+ },
826
+ {
827
+ "epoch": 1.18,
828
+ "eval_accuracy": 94.39999999999999,
829
+ "eval_average_metrics": 93.4962962962963,
830
+ "eval_f1": 92.5925925925926,
831
+ "eval_loss": 0.05846463143825531,
832
+ "eval_runtime": 4.4931,
833
+ "eval_samples_per_second": 222.561,
834
+ "step": 13400
835
+ },
836
+ {
837
+ "epoch": 1.19,
838
+ "learning_rate": 0.00018094188200017637,
839
+ "loss": 0.0694,
840
+ "step": 13500
841
+ },
842
+ {
843
+ "epoch": 1.2,
844
+ "eval_accuracy": 94.1,
845
+ "eval_average_metrics": 93.06889338731443,
846
+ "eval_f1": 92.03778677462888,
847
+ "eval_loss": 0.05917409434914589,
848
+ "eval_runtime": 4.6082,
849
+ "eval_samples_per_second": 217.003,
850
+ "step": 13600
851
+ },
852
+ {
853
+ "epoch": 1.22,
854
+ "eval_accuracy": 93.8,
855
+ "eval_average_metrics": 92.85300261096606,
856
+ "eval_f1": 91.9060052219321,
857
+ "eval_loss": 0.06280769407749176,
858
+ "eval_runtime": 4.5282,
859
+ "eval_samples_per_second": 220.836,
860
+ "step": 13800
861
+ },
862
+ {
863
+ "epoch": 1.23,
864
+ "learning_rate": 0.00017653232207425696,
865
+ "loss": 0.0741,
866
+ "step": 14000
867
+ },
868
+ {
869
+ "epoch": 1.23,
870
+ "eval_accuracy": 93.60000000000001,
871
+ "eval_average_metrics": 92.65492227979274,
872
+ "eval_f1": 91.70984455958549,
873
+ "eval_loss": 0.06333824247121811,
874
+ "eval_runtime": 4.4743,
875
+ "eval_samples_per_second": 223.497,
876
+ "step": 14000
877
+ },
878
+ {
879
+ "epoch": 1.25,
880
+ "eval_accuracy": 93.89999999999999,
881
+ "eval_average_metrics": 92.94211563731932,
882
+ "eval_f1": 91.98423127463865,
883
+ "eval_loss": 0.06064913421869278,
884
+ "eval_runtime": 4.6765,
885
+ "eval_samples_per_second": 213.836,
886
+ "step": 14200
887
+ },
888
+ {
889
+ "epoch": 1.27,
890
+ "eval_accuracy": 92.60000000000001,
891
+ "eval_average_metrics": 91.62828282828283,
892
+ "eval_f1": 90.65656565656566,
893
+ "eval_loss": 0.07161322236061096,
894
+ "eval_runtime": 4.5138,
895
+ "eval_samples_per_second": 221.545,
896
+ "step": 14400
897
+ },
898
+ {
899
+ "epoch": 1.28,
900
+ "learning_rate": 0.00017212276214833758,
901
+ "loss": 0.0715,
902
+ "step": 14500
903
+ },
904
+ {
905
+ "epoch": 1.29,
906
+ "eval_accuracy": 93.7,
907
+ "eval_average_metrics": 92.6998023715415,
908
+ "eval_f1": 91.699604743083,
909
+ "eval_loss": 0.06242042034864426,
910
+ "eval_runtime": 4.764,
911
+ "eval_samples_per_second": 209.909,
912
+ "step": 14600
913
+ },
914
+ {
915
+ "epoch": 1.31,
916
+ "eval_accuracy": 93.7,
917
+ "eval_average_metrics": 92.73235294117647,
918
+ "eval_f1": 91.76470588235294,
919
+ "eval_loss": 0.0626644566655159,
920
+ "eval_runtime": 4.4732,
921
+ "eval_samples_per_second": 223.555,
922
+ "step": 14800
923
+ },
924
+ {
925
+ "epoch": 1.32,
926
+ "learning_rate": 0.0001677132022224182,
927
+ "loss": 0.0714,
928
+ "step": 15000
929
+ },
930
+ {
931
+ "epoch": 1.32,
932
+ "eval_accuracy": 94.39999999999999,
933
+ "eval_average_metrics": 93.54464751958224,
934
+ "eval_f1": 92.68929503916449,
935
+ "eval_loss": 0.05990656465291977,
936
+ "eval_runtime": 4.5922,
937
+ "eval_samples_per_second": 217.762,
938
+ "step": 15000
939
+ },
940
+ {
941
+ "epoch": 1.34,
942
+ "eval_accuracy": 94.6,
943
+ "eval_average_metrics": 93.73799472295514,
944
+ "eval_f1": 92.87598944591029,
945
+ "eval_loss": 0.060957495123147964,
946
+ "eval_runtime": 4.4536,
947
+ "eval_samples_per_second": 224.537,
948
+ "step": 15200
949
+ },
950
+ {
951
+ "epoch": 1.36,
952
+ "eval_accuracy": 94.39999999999999,
953
+ "eval_average_metrics": 93.51578947368421,
954
+ "eval_f1": 92.63157894736842,
955
+ "eval_loss": 0.06167261675000191,
956
+ "eval_runtime": 4.4865,
957
+ "eval_samples_per_second": 222.89,
958
+ "step": 15400
959
+ },
960
+ {
961
+ "epoch": 1.37,
962
+ "learning_rate": 0.0001633036422964988,
963
+ "loss": 0.0707,
964
+ "step": 15500
965
+ },
966
+ {
967
+ "epoch": 1.38,
968
+ "eval_accuracy": 94.39999999999999,
969
+ "eval_average_metrics": 93.51578947368421,
970
+ "eval_f1": 92.63157894736842,
971
+ "eval_loss": 0.061066027730703354,
972
+ "eval_runtime": 4.5716,
973
+ "eval_samples_per_second": 218.74,
974
+ "step": 15600
975
+ },
976
+ {
977
+ "epoch": 1.39,
978
+ "eval_accuracy": 94.1,
979
+ "eval_average_metrics": 93.1633069828722,
980
+ "eval_f1": 92.2266139657444,
981
+ "eval_loss": 0.06235107034444809,
982
+ "eval_runtime": 4.499,
983
+ "eval_samples_per_second": 222.27,
984
+ "step": 15800
985
+ },
986
+ {
987
+ "epoch": 1.41,
988
+ "learning_rate": 0.00015889408237057938,
989
+ "loss": 0.0709,
990
+ "step": 16000
991
+ },
992
+ {
993
+ "epoch": 1.41,
994
+ "eval_accuracy": 94.39999999999999,
995
+ "eval_average_metrics": 93.47659574468085,
996
+ "eval_f1": 92.55319148936171,
997
+ "eval_loss": 0.06194847822189331,
998
+ "eval_runtime": 4.5231,
999
+ "eval_samples_per_second": 221.086,
1000
+ "step": 16000
1001
+ },
1002
+ {
1003
+ "epoch": 1.43,
1004
+ "eval_accuracy": 94.6,
1005
+ "eval_average_metrics": 93.76596858638743,
1006
+ "eval_f1": 92.93193717277488,
1007
+ "eval_loss": 0.05966123938560486,
1008
+ "eval_runtime": 4.637,
1009
+ "eval_samples_per_second": 215.656,
1010
+ "step": 16200
1011
+ },
1012
+ {
1013
+ "epoch": 1.45,
1014
+ "eval_accuracy": 94.39999999999999,
1015
+ "eval_average_metrics": 93.44664879356569,
1016
+ "eval_f1": 92.49329758713138,
1017
+ "eval_loss": 0.06104936823248863,
1018
+ "eval_runtime": 4.5374,
1019
+ "eval_samples_per_second": 220.391,
1020
+ "step": 16400
1021
+ },
1022
+ {
1023
+ "epoch": 1.46,
1024
+ "learning_rate": 0.00015448452244466002,
1025
+ "loss": 0.0729,
1026
+ "step": 16500
1027
+ },
1028
+ {
1029
+ "epoch": 1.46,
1030
+ "eval_accuracy": 94.69999999999999,
1031
+ "eval_average_metrics": 93.87686762778506,
1032
+ "eval_f1": 93.05373525557013,
1033
+ "eval_loss": 0.06205834820866585,
1034
+ "eval_runtime": 4.4838,
1035
+ "eval_samples_per_second": 223.023,
1036
+ "step": 16600
1037
+ },
1038
+ {
1039
+ "epoch": 1.48,
1040
+ "eval_accuracy": 94.6,
1041
+ "eval_average_metrics": 93.78437500000001,
1042
+ "eval_f1": 92.96875000000001,
1043
+ "eval_loss": 0.06089754402637482,
1044
+ "eval_runtime": 4.5038,
1045
+ "eval_samples_per_second": 222.035,
1046
+ "step": 16800
1047
+ },
1048
+ {
1049
+ "epoch": 1.5,
1050
+ "learning_rate": 0.00015007496251874061,
1051
+ "loss": 0.07,
1052
+ "step": 17000
1053
+ },
1054
+ {
1055
+ "epoch": 1.5,
1056
+ "eval_accuracy": 94.19999999999999,
1057
+ "eval_average_metrics": 93.36288659793814,
1058
+ "eval_f1": 92.52577319587628,
1059
+ "eval_loss": 0.06112566590309143,
1060
+ "eval_runtime": 4.6026,
1061
+ "eval_samples_per_second": 217.269,
1062
+ "step": 17000
1063
+ },
1064
+ {
1065
+ "epoch": 1.52,
1066
+ "eval_accuracy": 94.39999999999999,
1067
+ "eval_average_metrics": 93.48647214854111,
1068
+ "eval_f1": 92.57294429708222,
1069
+ "eval_loss": 0.06089947372674942,
1070
+ "eval_runtime": 4.5389,
1071
+ "eval_samples_per_second": 220.318,
1072
+ "step": 17200
1073
+ },
1074
+ {
1075
+ "epoch": 1.53,
1076
+ "eval_accuracy": 94.1,
1077
+ "eval_average_metrics": 93.24354838709678,
1078
+ "eval_f1": 92.38709677419355,
1079
+ "eval_loss": 0.06110972911119461,
1080
+ "eval_runtime": 4.6859,
1081
+ "eval_samples_per_second": 213.405,
1082
+ "step": 17400
1083
+ },
1084
+ {
1085
+ "epoch": 1.54,
1086
+ "learning_rate": 0.00014566540259282123,
1087
+ "loss": 0.0669,
1088
+ "step": 17500
1089
+ },
1090
+ {
1091
+ "epoch": 1.55,
1092
+ "eval_accuracy": 94.3,
1093
+ "eval_average_metrics": 93.38513870541613,
1094
+ "eval_f1": 92.47027741083225,
1095
+ "eval_loss": 0.06174538657069206,
1096
+ "eval_runtime": 4.6674,
1097
+ "eval_samples_per_second": 214.254,
1098
+ "step": 17600
1099
+ },
1100
+ {
1101
+ "epoch": 1.57,
1102
+ "eval_accuracy": 94.6,
1103
+ "eval_average_metrics": 93.7095744680851,
1104
+ "eval_f1": 92.81914893617021,
1105
+ "eval_loss": 0.059681929647922516,
1106
+ "eval_runtime": 4.6196,
1107
+ "eval_samples_per_second": 216.471,
1108
+ "step": 17800
1109
+ },
1110
+ {
1111
+ "epoch": 1.59,
1112
+ "learning_rate": 0.00014125584266690182,
1113
+ "loss": 0.07,
1114
+ "step": 18000
1115
+ },
1116
+ {
1117
+ "epoch": 1.59,
1118
+ "eval_accuracy": 94.19999999999999,
1119
+ "eval_average_metrics": 93.29422572178477,
1120
+ "eval_f1": 92.38845144356955,
1121
+ "eval_loss": 0.061346184462308884,
1122
+ "eval_runtime": 4.4984,
1123
+ "eval_samples_per_second": 222.301,
1124
+ "step": 18000
1125
+ },
1126
+ {
1127
+ "epoch": 1.61,
1128
+ "eval_accuracy": 94.19999999999999,
1129
+ "eval_average_metrics": 93.28421052631577,
1130
+ "eval_f1": 92.36842105263158,
1131
+ "eval_loss": 0.06077203154563904,
1132
+ "eval_runtime": 4.454,
1133
+ "eval_samples_per_second": 224.518,
1134
+ "step": 18200
1135
+ },
1136
+ {
1137
+ "epoch": 1.62,
1138
+ "eval_accuracy": 94.1,
1139
+ "eval_average_metrics": 93.13233731739707,
1140
+ "eval_f1": 92.16467463479415,
1141
+ "eval_loss": 0.05959217995405197,
1142
+ "eval_runtime": 4.588,
1143
+ "eval_samples_per_second": 217.959,
1144
+ "step": 18400
1145
+ },
1146
+ {
1147
+ "epoch": 1.63,
1148
+ "learning_rate": 0.00013684628274098244,
1149
+ "loss": 0.069,
1150
+ "step": 18500
1151
+ },
1152
+ {
1153
+ "epoch": 1.64,
1154
+ "eval_accuracy": 94.39999999999999,
1155
+ "eval_average_metrics": 93.46666666666665,
1156
+ "eval_f1": 92.53333333333332,
1157
+ "eval_loss": 0.06017257645726204,
1158
+ "eval_runtime": 4.483,
1159
+ "eval_samples_per_second": 223.066,
1160
+ "step": 18600
1161
+ },
1162
+ {
1163
+ "epoch": 1.66,
1164
+ "eval_accuracy": 94.5,
1165
+ "eval_average_metrics": 93.58821571238349,
1166
+ "eval_f1": 92.67643142476697,
1167
+ "eval_loss": 0.058851905167102814,
1168
+ "eval_runtime": 4.5048,
1169
+ "eval_samples_per_second": 221.985,
1170
+ "step": 18800
1171
+ },
1172
+ {
1173
+ "epoch": 1.68,
1174
+ "learning_rate": 0.00013243672281506306,
1175
+ "loss": 0.0713,
1176
+ "step": 19000
1177
+ },
1178
+ {
1179
+ "epoch": 1.68,
1180
+ "eval_accuracy": 93.89999999999999,
1181
+ "eval_average_metrics": 92.97346805736636,
1182
+ "eval_f1": 92.04693611473273,
1183
+ "eval_loss": 0.06167756766080856,
1184
+ "eval_runtime": 4.4859,
1185
+ "eval_samples_per_second": 222.921,
1186
+ "step": 19000
1187
+ },
1188
+ {
1189
+ "epoch": 1.69,
1190
+ "eval_accuracy": 93.7,
1191
+ "eval_average_metrics": 92.6998023715415,
1192
+ "eval_f1": 91.699604743083,
1193
+ "eval_loss": 0.06253690272569656,
1194
+ "eval_runtime": 4.582,
1195
+ "eval_samples_per_second": 218.244,
1196
+ "step": 19200
1197
+ },
1198
+ {
1199
+ "epoch": 1.71,
1200
+ "eval_accuracy": 93.89999999999999,
1201
+ "eval_average_metrics": 93.00433376455368,
1202
+ "eval_f1": 92.10866752910736,
1203
+ "eval_loss": 0.06255872547626495,
1204
+ "eval_runtime": 4.5188,
1205
+ "eval_samples_per_second": 221.296,
1206
+ "step": 19400
1207
+ },
1208
+ {
1209
+ "epoch": 1.72,
1210
+ "learning_rate": 0.00012802716288914365,
1211
+ "loss": 0.0699,
1212
+ "step": 19500
1213
+ },
1214
+ {
1215
+ "epoch": 1.73,
1216
+ "eval_accuracy": 94.3,
1217
+ "eval_average_metrics": 93.40492772667542,
1218
+ "eval_f1": 92.50985545335085,
1219
+ "eval_loss": 0.062451381236314774,
1220
+ "eval_runtime": 4.5919,
1221
+ "eval_samples_per_second": 217.773,
1222
+ "step": 19600
1223
+ },
1224
+ {
1225
+ "epoch": 1.75,
1226
+ "eval_accuracy": 94.0,
1227
+ "eval_average_metrics": 93.01063829787235,
1228
+ "eval_f1": 92.0212765957447,
1229
+ "eval_loss": 0.06319490820169449,
1230
+ "eval_runtime": 4.591,
1231
+ "eval_samples_per_second": 217.817,
1232
+ "step": 19800
1233
+ },
1234
+ {
1235
+ "epoch": 1.76,
1236
+ "learning_rate": 0.00012361760296322426,
1237
+ "loss": 0.0698,
1238
+ "step": 20000
1239
+ },
1240
+ {
1241
+ "epoch": 1.76,
1242
+ "eval_accuracy": 93.5,
1243
+ "eval_average_metrics": 92.51271186440678,
1244
+ "eval_f1": 91.52542372881356,
1245
+ "eval_loss": 0.06364640593528748,
1246
+ "eval_runtime": 4.5171,
1247
+ "eval_samples_per_second": 221.382,
1248
+ "step": 20000
1249
+ },
1250
+ {
1251
+ "epoch": 1.78,
1252
+ "eval_accuracy": 93.89999999999999,
1253
+ "eval_average_metrics": 92.99409857328145,
1254
+ "eval_f1": 92.0881971465629,
1255
+ "eval_loss": 0.06635148823261261,
1256
+ "eval_runtime": 4.6206,
1257
+ "eval_samples_per_second": 216.422,
1258
+ "step": 20200
1259
+ },
1260
+ {
1261
+ "epoch": 1.8,
1262
+ "eval_accuracy": 94.1,
1263
+ "eval_average_metrics": 93.11141522029372,
1264
+ "eval_f1": 92.12283044058745,
1265
+ "eval_loss": 0.0606299452483654,
1266
+ "eval_runtime": 4.4605,
1267
+ "eval_samples_per_second": 224.19,
1268
+ "step": 20400
1269
+ },
1270
+ {
1271
+ "epoch": 1.81,
1272
+ "learning_rate": 0.00011920804303730487,
1273
+ "loss": 0.0703,
1274
+ "step": 20500
1275
+ },
1276
+ {
1277
+ "epoch": 1.82,
1278
+ "eval_accuracy": 94.19999999999999,
1279
+ "eval_average_metrics": 93.32395833333332,
1280
+ "eval_f1": 92.44791666666666,
1281
+ "eval_loss": 0.060722097754478455,
1282
+ "eval_runtime": 4.5249,
1283
+ "eval_samples_per_second": 221.001,
1284
+ "step": 20600
1285
+ },
1286
+ {
1287
+ "epoch": 1.83,
1288
+ "eval_accuracy": 93.8,
1289
+ "eval_average_metrics": 92.76666666666665,
1290
+ "eval_f1": 91.73333333333332,
1291
+ "eval_loss": 0.05862819775938988,
1292
+ "eval_runtime": 4.5187,
1293
+ "eval_samples_per_second": 221.304,
1294
+ "step": 20800
1295
+ },
1296
+ {
1297
+ "epoch": 1.85,
1298
+ "learning_rate": 0.00011479848311138547,
1299
+ "loss": 0.0698,
1300
+ "step": 21000
1301
+ },
1302
+ {
1303
+ "epoch": 1.85,
1304
+ "eval_accuracy": 93.8,
1305
+ "eval_average_metrics": 92.79947089947089,
1306
+ "eval_f1": 91.7989417989418,
1307
+ "eval_loss": 0.06128830835223198,
1308
+ "eval_runtime": 4.521,
1309
+ "eval_samples_per_second": 221.188,
1310
+ "step": 21000
1311
+ },
1312
+ {
1313
+ "epoch": 1.87,
1314
+ "eval_accuracy": 93.8,
1315
+ "eval_average_metrics": 92.87402597402597,
1316
+ "eval_f1": 91.94805194805194,
1317
+ "eval_loss": 0.06374780088663101,
1318
+ "eval_runtime": 4.4879,
1319
+ "eval_samples_per_second": 222.822,
1320
+ "step": 21200
1321
+ },
1322
+ {
1323
+ "epoch": 1.89,
1324
+ "eval_accuracy": 93.89999999999999,
1325
+ "eval_average_metrics": 92.88874833555259,
1326
+ "eval_f1": 91.87749667110519,
1327
+ "eval_loss": 0.06154455617070198,
1328
+ "eval_runtime": 4.6011,
1329
+ "eval_samples_per_second": 217.337,
1330
+ "step": 21400
1331
+ },
1332
+ {
1333
+ "epoch": 1.9,
1334
+ "learning_rate": 0.00011038892318546609,
1335
+ "loss": 0.0709,
1336
+ "step": 21500
1337
+ },
1338
+ {
1339
+ "epoch": 1.9,
1340
+ "eval_accuracy": 94.5,
1341
+ "eval_average_metrics": 93.57843791722297,
1342
+ "eval_f1": 92.65687583444593,
1343
+ "eval_loss": 0.060043178498744965,
1344
+ "eval_runtime": 4.46,
1345
+ "eval_samples_per_second": 224.215,
1346
+ "step": 21600
1347
+ },
1348
+ {
1349
+ "epoch": 1.92,
1350
+ "eval_accuracy": 93.89999999999999,
1351
+ "eval_average_metrics": 92.93155467720685,
1352
+ "eval_f1": 91.9631093544137,
1353
+ "eval_loss": 0.061132512986660004,
1354
+ "eval_runtime": 4.4987,
1355
+ "eval_samples_per_second": 222.287,
1356
+ "step": 21800
1357
+ },
1358
+ {
1359
+ "epoch": 1.94,
1360
+ "learning_rate": 0.00010597936325954669,
1361
+ "loss": 0.0695,
1362
+ "step": 22000
1363
+ },
1364
+ {
1365
+ "epoch": 1.94,
1366
+ "eval_accuracy": 93.5,
1367
+ "eval_average_metrics": 92.56724581724582,
1368
+ "eval_f1": 91.63449163449164,
1369
+ "eval_loss": 0.06395059078931808,
1370
+ "eval_runtime": 4.6548,
1371
+ "eval_samples_per_second": 214.832,
1372
+ "step": 22000
1373
+ },
1374
+ {
1375
+ "epoch": 1.96,
1376
+ "eval_accuracy": 94.19999999999999,
1377
+ "eval_average_metrics": 93.29422572178477,
1378
+ "eval_f1": 92.38845144356955,
1379
+ "eval_loss": 0.06141780689358711,
1380
+ "eval_runtime": 4.4836,
1381
+ "eval_samples_per_second": 223.034,
1382
+ "step": 22200
1383
+ },
1384
+ {
1385
+ "epoch": 1.98,
1386
+ "eval_accuracy": 94.5,
1387
+ "eval_average_metrics": 93.65522875816993,
1388
+ "eval_f1": 92.81045751633987,
1389
+ "eval_loss": 0.058759015053510666,
1390
+ "eval_runtime": 4.5162,
1391
+ "eval_samples_per_second": 221.426,
1392
+ "step": 22400
1393
+ },
1394
+ {
1395
+ "epoch": 1.98,
1396
+ "learning_rate": 0.0001015698033336273,
1397
+ "loss": 0.0715,
1398
+ "step": 22500
1399
+ },
1400
+ {
1401
+ "epoch": 1.99,
1402
+ "eval_accuracy": 93.89999999999999,
1403
+ "eval_average_metrics": 92.97346805736636,
1404
+ "eval_f1": 92.04693611473273,
1405
+ "eval_loss": 0.06228160858154297,
1406
+ "eval_runtime": 4.4726,
1407
+ "eval_samples_per_second": 223.582,
1408
+ "step": 22600
1409
+ },
1410
+ {
1411
+ "epoch": 2.01,
1412
+ "eval_accuracy": 94.6,
1413
+ "eval_average_metrics": 93.73799472295514,
1414
+ "eval_f1": 92.87598944591029,
1415
+ "eval_loss": 0.05991463363170624,
1416
+ "eval_runtime": 4.5003,
1417
+ "eval_samples_per_second": 222.206,
1418
+ "step": 22800
1419
+ },
1420
+ {
1421
+ "epoch": 2.03,
1422
+ "learning_rate": 9.71602434077079e-05,
1423
+ "loss": 0.0682,
1424
+ "step": 23000
1425
+ },
1426
+ {
1427
+ "epoch": 2.03,
1428
+ "eval_accuracy": 94.0,
1429
+ "eval_average_metrics": 93.1038961038961,
1430
+ "eval_f1": 92.20779220779221,
1431
+ "eval_loss": 0.061682794243097305,
1432
+ "eval_runtime": 4.611,
1433
+ "eval_samples_per_second": 216.874,
1434
+ "step": 23000
1435
+ },
1436
+ {
1437
+ "epoch": 2.05,
1438
+ "eval_accuracy": 93.5,
1439
+ "eval_average_metrics": 92.55645161290323,
1440
+ "eval_f1": 91.61290322580645,
1441
+ "eval_loss": 0.06373216211795807,
1442
+ "eval_runtime": 4.6044,
1443
+ "eval_samples_per_second": 217.186,
1444
+ "step": 23200
1445
+ },
1446
+ {
1447
+ "epoch": 2.06,
1448
+ "eval_accuracy": 94.3,
1449
+ "eval_average_metrics": 93.34492656875835,
1450
+ "eval_f1": 92.3898531375167,
1451
+ "eval_loss": 0.05869932472705841,
1452
+ "eval_runtime": 4.4706,
1453
+ "eval_samples_per_second": 223.684,
1454
+ "step": 23400
1455
+ },
1456
+ {
1457
+ "epoch": 2.07,
1458
+ "learning_rate": 9.27506834817885e-05,
1459
+ "loss": 0.0652,
1460
+ "step": 23500
1461
+ },
1462
+ {
1463
+ "epoch": 2.08,
1464
+ "eval_accuracy": 94.39999999999999,
1465
+ "eval_average_metrics": 93.50606860158311,
1466
+ "eval_f1": 92.61213720316623,
1467
+ "eval_loss": 0.06166525185108185,
1468
+ "eval_runtime": 4.5725,
1469
+ "eval_samples_per_second": 218.699,
1470
+ "step": 23600
1471
+ },
1472
+ {
1473
+ "epoch": 2.1,
1474
+ "eval_accuracy": 94.19999999999999,
1475
+ "eval_average_metrics": 93.33376623376623,
1476
+ "eval_f1": 92.46753246753246,
1477
+ "eval_loss": 0.06055561453104019,
1478
+ "eval_runtime": 4.5492,
1479
+ "eval_samples_per_second": 219.818,
1480
+ "step": 23800
1481
+ },
1482
+ {
1483
+ "epoch": 2.12,
1484
+ "learning_rate": 8.834112355586911e-05,
1485
+ "loss": 0.0691,
1486
+ "step": 24000
1487
+ },
1488
+ {
1489
+ "epoch": 2.12,
1490
+ "eval_accuracy": 93.7,
1491
+ "eval_average_metrics": 92.75377113133939,
1492
+ "eval_f1": 91.8075422626788,
1493
+ "eval_loss": 0.06339309364557266,
1494
+ "eval_runtime": 4.6365,
1495
+ "eval_samples_per_second": 215.678,
1496
+ "step": 24000
1497
+ },
1498
+ {
1499
+ "epoch": 2.13,
1500
+ "eval_accuracy": 94.1,
1501
+ "eval_average_metrics": 93.1633069828722,
1502
+ "eval_f1": 92.2266139657444,
1503
+ "eval_loss": 0.06319531798362732,
1504
+ "eval_runtime": 4.5722,
1505
+ "eval_samples_per_second": 218.712,
1506
+ "step": 24200
1507
+ },
1508
+ {
1509
+ "epoch": 2.15,
1510
+ "eval_accuracy": 94.1,
1511
+ "eval_average_metrics": 93.14271523178809,
1512
+ "eval_f1": 92.18543046357617,
1513
+ "eval_loss": 0.060979247093200684,
1514
+ "eval_runtime": 4.4363,
1515
+ "eval_samples_per_second": 225.412,
1516
+ "step": 24400
1517
+ },
1518
+ {
1519
+ "epoch": 2.16,
1520
+ "learning_rate": 8.393156362994973e-05,
1521
+ "loss": 0.0679,
1522
+ "step": 24500
1523
+ },
1524
+ {
1525
+ "epoch": 2.17,
1526
+ "eval_accuracy": 94.3,
1527
+ "eval_average_metrics": 93.38513870541613,
1528
+ "eval_f1": 92.47027741083225,
1529
+ "eval_loss": 0.061841148883104324,
1530
+ "eval_runtime": 4.4945,
1531
+ "eval_samples_per_second": 222.493,
1532
+ "step": 24600
1533
+ },
1534
+ {
1535
+ "epoch": 2.19,
1536
+ "eval_accuracy": 94.39999999999999,
1537
+ "eval_average_metrics": 93.51578947368421,
1538
+ "eval_f1": 92.63157894736842,
1539
+ "eval_loss": 0.06020021066069603,
1540
+ "eval_runtime": 4.6482,
1541
+ "eval_samples_per_second": 215.136,
1542
+ "step": 24800
1543
+ },
1544
+ {
1545
+ "epoch": 2.2,
1546
+ "learning_rate": 7.952200370403033e-05,
1547
+ "loss": 0.0678,
1548
+ "step": 25000
1549
+ },
1550
+ {
1551
+ "epoch": 2.2,
1552
+ "eval_accuracy": 94.69999999999999,
1553
+ "eval_average_metrics": 93.86773981603153,
1554
+ "eval_f1": 93.03547963206307,
1555
+ "eval_loss": 0.061626460403203964,
1556
+ "eval_runtime": 4.4847,
1557
+ "eval_samples_per_second": 222.982,
1558
+ "step": 25000
1559
+ },
1560
+ {
1561
+ "epoch": 2.22,
1562
+ "eval_accuracy": 94.3,
1563
+ "eval_average_metrics": 93.35505992010653,
1564
+ "eval_f1": 92.41011984021304,
1565
+ "eval_loss": 0.05932234972715378,
1566
+ "eval_runtime": 4.444,
1567
+ "eval_samples_per_second": 225.02,
1568
+ "step": 25200
1569
+ },
1570
+ {
1571
+ "epoch": 2.24,
1572
+ "eval_accuracy": 94.3,
1573
+ "eval_average_metrics": 93.35505992010653,
1574
+ "eval_f1": 92.41011984021304,
1575
+ "eval_loss": 0.05860959738492966,
1576
+ "eval_runtime": 4.4729,
1577
+ "eval_samples_per_second": 223.568,
1578
+ "step": 25400
1579
+ },
1580
+ {
1581
+ "epoch": 2.25,
1582
+ "learning_rate": 7.511244377811093e-05,
1583
+ "loss": 0.0687,
1584
+ "step": 25500
1585
+ },
1586
+ {
1587
+ "epoch": 2.26,
1588
+ "eval_accuracy": 94.6,
1589
+ "eval_average_metrics": 93.74736842105261,
1590
+ "eval_f1": 92.89473684210525,
1591
+ "eval_loss": 0.05995591729879379,
1592
+ "eval_runtime": 4.6311,
1593
+ "eval_samples_per_second": 215.933,
1594
+ "step": 25600
1595
+ },
1596
+ {
1597
+ "epoch": 2.28,
1598
+ "eval_accuracy": 94.39999999999999,
1599
+ "eval_average_metrics": 93.51578947368421,
1600
+ "eval_f1": 92.63157894736842,
1601
+ "eval_loss": 0.06067919358611107,
1602
+ "eval_runtime": 4.4705,
1603
+ "eval_samples_per_second": 223.69,
1604
+ "step": 25800
1605
+ },
1606
+ {
1607
+ "epoch": 2.29,
1608
+ "learning_rate": 7.070288385219154e-05,
1609
+ "loss": 0.0665,
1610
+ "step": 26000
1611
+ },
1612
+ {
1613
+ "epoch": 2.29,
1614
+ "eval_accuracy": 94.6,
1615
+ "eval_average_metrics": 93.74736842105261,
1616
+ "eval_f1": 92.89473684210525,
1617
+ "eval_loss": 0.06090604141354561,
1618
+ "eval_runtime": 4.4777,
1619
+ "eval_samples_per_second": 223.33,
1620
+ "step": 26000
1621
+ },
1622
+ {
1623
+ "epoch": 2.31,
1624
+ "eval_accuracy": 94.5,
1625
+ "eval_average_metrics": 93.63633377135348,
1626
+ "eval_f1": 92.77266754270696,
1627
+ "eval_loss": 0.06175965070724487,
1628
+ "eval_runtime": 4.5456,
1629
+ "eval_samples_per_second": 219.993,
1630
+ "step": 26200
1631
+ },
1632
+ {
1633
+ "epoch": 2.33,
1634
+ "eval_accuracy": 94.1,
1635
+ "eval_average_metrics": 93.1937908496732,
1636
+ "eval_f1": 92.2875816993464,
1637
+ "eval_loss": 0.062108419835567474,
1638
+ "eval_runtime": 4.5414,
1639
+ "eval_samples_per_second": 220.196,
1640
+ "step": 26400
1641
+ },
1642
+ {
1643
+ "epoch": 2.34,
1644
+ "learning_rate": 6.629332392627216e-05,
1645
+ "loss": 0.0681,
1646
+ "step": 26500
1647
+ },
1648
+ {
1649
+ "epoch": 2.35,
1650
+ "eval_accuracy": 94.39999999999999,
1651
+ "eval_average_metrics": 93.48647214854111,
1652
+ "eval_f1": 92.57294429708222,
1653
+ "eval_loss": 0.060741446912288666,
1654
+ "eval_runtime": 4.4624,
1655
+ "eval_samples_per_second": 224.096,
1656
+ "step": 26600
1657
+ },
1658
+ {
1659
+ "epoch": 2.36,
1660
+ "eval_accuracy": 94.39999999999999,
1661
+ "eval_average_metrics": 93.4962962962963,
1662
+ "eval_f1": 92.5925925925926,
1663
+ "eval_loss": 0.06029416620731354,
1664
+ "eval_runtime": 4.5256,
1665
+ "eval_samples_per_second": 220.966,
1666
+ "step": 26800
1667
+ },
1668
+ {
1669
+ "epoch": 2.38,
1670
+ "learning_rate": 6.188376400035276e-05,
1671
+ "loss": 0.0667,
1672
+ "step": 27000
1673
+ },
1674
+ {
1675
+ "epoch": 2.38,
1676
+ "eval_accuracy": 94.69999999999999,
1677
+ "eval_average_metrics": 93.84933949801848,
1678
+ "eval_f1": 92.99867899603699,
1679
+ "eval_loss": 0.059210509061813354,
1680
+ "eval_runtime": 4.8741,
1681
+ "eval_samples_per_second": 205.167,
1682
+ "step": 27000
1683
+ },
1684
+ {
1685
+ "epoch": 2.4,
1686
+ "eval_accuracy": 94.3,
1687
+ "eval_average_metrics": 93.41474442988203,
1688
+ "eval_f1": 92.52948885976409,
1689
+ "eval_loss": 0.0605180561542511,
1690
+ "eval_runtime": 4.5293,
1691
+ "eval_samples_per_second": 220.783,
1692
+ "step": 27200
1693
+ },
1694
+ {
1695
+ "epoch": 2.42,
1696
+ "eval_accuracy": 94.5,
1697
+ "eval_average_metrics": 93.64580602883355,
1698
+ "eval_f1": 92.7916120576671,
1699
+ "eval_loss": 0.060811493545770645,
1700
+ "eval_runtime": 4.5424,
1701
+ "eval_samples_per_second": 220.147,
1702
+ "step": 27400
1703
+ },
1704
+ {
1705
+ "epoch": 2.43,
1706
+ "learning_rate": 5.747420407443336e-05,
1707
+ "loss": 0.0685,
1708
+ "step": 27500
1709
+ },
1710
+ {
1711
+ "epoch": 2.43,
1712
+ "eval_accuracy": 94.1,
1713
+ "eval_average_metrics": 93.11141522029372,
1714
+ "eval_f1": 92.12283044058745,
1715
+ "eval_loss": 0.05978462100028992,
1716
+ "eval_runtime": 4.4831,
1717
+ "eval_samples_per_second": 223.06,
1718
+ "step": 27600
1719
+ },
1720
+ {
1721
+ "epoch": 2.45,
1722
+ "eval_accuracy": 93.8,
1723
+ "eval_average_metrics": 92.87402597402597,
1724
+ "eval_f1": 91.94805194805194,
1725
+ "eval_loss": 0.06267183274030685,
1726
+ "eval_runtime": 4.4576,
1727
+ "eval_samples_per_second": 224.334,
1728
+ "step": 27800
1729
+ },
1730
+ {
1731
+ "epoch": 2.47,
1732
+ "learning_rate": 5.3064644148513973e-05,
1733
+ "loss": 0.0672,
1734
+ "step": 28000
1735
+ },
1736
+ {
1737
+ "epoch": 2.47,
1738
+ "eval_accuracy": 94.0,
1739
+ "eval_average_metrics": 93.06299212598425,
1740
+ "eval_f1": 92.1259842519685,
1741
+ "eval_loss": 0.061355073004961014,
1742
+ "eval_runtime": 4.5194,
1743
+ "eval_samples_per_second": 221.27,
1744
+ "step": 28000
1745
+ },
1746
+ {
1747
+ "epoch": 2.49,
1748
+ "eval_accuracy": 94.19999999999999,
1749
+ "eval_average_metrics": 93.29422572178477,
1750
+ "eval_f1": 92.38845144356955,
1751
+ "eval_loss": 0.06131287291646004,
1752
+ "eval_runtime": 4.5837,
1753
+ "eval_samples_per_second": 218.165,
1754
+ "step": 28200
1755
+ },
1756
+ {
1757
+ "epoch": 2.5,
1758
+ "eval_accuracy": 94.0,
1759
+ "eval_average_metrics": 93.04221635883906,
1760
+ "eval_f1": 92.0844327176781,
1761
+ "eval_loss": 0.06105473265051842,
1762
+ "eval_runtime": 4.5101,
1763
+ "eval_samples_per_second": 221.726,
1764
+ "step": 28400
1765
+ },
1766
+ {
1767
+ "epoch": 2.51,
1768
+ "learning_rate": 4.8655084222594584e-05,
1769
+ "loss": 0.0656,
1770
+ "step": 28500
1771
+ },
1772
+ {
1773
+ "epoch": 2.52,
1774
+ "eval_accuracy": 94.19999999999999,
1775
+ "eval_average_metrics": 93.25384615384615,
1776
+ "eval_f1": 92.3076923076923,
1777
+ "eval_loss": 0.06093791127204895,
1778
+ "eval_runtime": 4.6588,
1779
+ "eval_samples_per_second": 214.647,
1780
+ "step": 28600
1781
+ },
1782
+ {
1783
+ "epoch": 2.54,
1784
+ "eval_accuracy": 94.3,
1785
+ "eval_average_metrics": 93.40492772667542,
1786
+ "eval_f1": 92.50985545335085,
1787
+ "eval_loss": 0.061501096934080124,
1788
+ "eval_runtime": 4.5262,
1789
+ "eval_samples_per_second": 220.936,
1790
+ "step": 28800
1791
+ },
1792
+ {
1793
+ "epoch": 2.56,
1794
+ "learning_rate": 4.424552429667519e-05,
1795
+ "loss": 0.067,
1796
+ "step": 29000
1797
+ },
1798
+ {
1799
+ "epoch": 2.56,
1800
+ "eval_accuracy": 94.19999999999999,
1801
+ "eval_average_metrics": 93.27414248021108,
1802
+ "eval_f1": 92.34828496042218,
1803
+ "eval_loss": 0.05971948057413101,
1804
+ "eval_runtime": 4.5243,
1805
+ "eval_samples_per_second": 221.027,
1806
+ "step": 29000
1807
+ },
1808
+ {
1809
+ "epoch": 2.58,
1810
+ "eval_accuracy": 93.60000000000001,
1811
+ "eval_average_metrics": 92.65492227979274,
1812
+ "eval_f1": 91.70984455958549,
1813
+ "eval_loss": 0.063376285135746,
1814
+ "eval_runtime": 4.6334,
1815
+ "eval_samples_per_second": 215.825,
1816
+ "step": 29200
1817
+ },
1818
+ {
1819
+ "epoch": 2.59,
1820
+ "eval_accuracy": 94.19999999999999,
1821
+ "eval_average_metrics": 93.26402116402116,
1822
+ "eval_f1": 92.32804232804234,
1823
+ "eval_loss": 0.06081530451774597,
1824
+ "eval_runtime": 4.7045,
1825
+ "eval_samples_per_second": 212.561,
1826
+ "step": 29400
1827
+ },
1828
+ {
1829
+ "epoch": 2.6,
1830
+ "learning_rate": 3.98359643707558e-05,
1831
+ "loss": 0.0675,
1832
+ "step": 29500
1833
+ },
1834
+ {
1835
+ "epoch": 2.61,
1836
+ "eval_accuracy": 94.1,
1837
+ "eval_average_metrics": 93.18368283093054,
1838
+ "eval_f1": 92.26736566186108,
1839
+ "eval_loss": 0.062262628227472305,
1840
+ "eval_runtime": 4.6273,
1841
+ "eval_samples_per_second": 216.108,
1842
+ "step": 29600
1843
+ },
1844
+ {
1845
+ "epoch": 2.63,
1846
+ "eval_accuracy": 94.3,
1847
+ "eval_average_metrics": 93.3751655629139,
1848
+ "eval_f1": 92.45033112582782,
1849
+ "eval_loss": 0.06007382273674011,
1850
+ "eval_runtime": 4.5698,
1851
+ "eval_samples_per_second": 218.83,
1852
+ "step": 29800
1853
+ },
1854
+ {
1855
+ "epoch": 2.65,
1856
+ "learning_rate": 3.54264044448364e-05,
1857
+ "loss": 0.0682,
1858
+ "step": 30000
1859
+ },
1860
+ {
1861
+ "epoch": 2.65,
1862
+ "eval_accuracy": 94.1,
1863
+ "eval_average_metrics": 93.1633069828722,
1864
+ "eval_f1": 92.2266139657444,
1865
+ "eval_loss": 0.0607917495071888,
1866
+ "eval_runtime": 4.6423,
1867
+ "eval_samples_per_second": 215.411,
1868
+ "step": 30000
1869
+ },
1870
+ {
1871
+ "epoch": 2.66,
1872
+ "eval_accuracy": 94.39999999999999,
1873
+ "eval_average_metrics": 93.5254593175853,
1874
+ "eval_f1": 92.6509186351706,
1875
+ "eval_loss": 0.06171978637576103,
1876
+ "eval_runtime": 4.4956,
1877
+ "eval_samples_per_second": 222.439,
1878
+ "step": 30200
1879
+ },
1880
+ {
1881
+ "epoch": 2.68,
1882
+ "eval_accuracy": 94.39999999999999,
1883
+ "eval_average_metrics": 93.4566844919786,
1884
+ "eval_f1": 92.51336898395722,
1885
+ "eval_loss": 0.05954898148775101,
1886
+ "eval_runtime": 4.5069,
1887
+ "eval_samples_per_second": 221.881,
1888
+ "step": 30400
1889
+ },
1890
+ {
1891
+ "epoch": 2.69,
1892
+ "learning_rate": 3.1016844518917006e-05,
1893
+ "loss": 0.0684,
1894
+ "step": 30500
1895
+ },
1896
+ {
1897
+ "epoch": 2.7,
1898
+ "eval_accuracy": 94.5,
1899
+ "eval_average_metrics": 93.64580602883355,
1900
+ "eval_f1": 92.7916120576671,
1901
+ "eval_loss": 0.06073066592216492,
1902
+ "eval_runtime": 4.5568,
1903
+ "eval_samples_per_second": 219.452,
1904
+ "step": 30600
1905
+ },
1906
+ {
1907
+ "epoch": 2.72,
1908
+ "eval_accuracy": 94.5,
1909
+ "eval_average_metrics": 93.64580602883355,
1910
+ "eval_f1": 92.7916120576671,
1911
+ "eval_loss": 0.06212097778916359,
1912
+ "eval_runtime": 4.4991,
1913
+ "eval_samples_per_second": 222.265,
1914
+ "step": 30800
1915
+ },
1916
+ {
1917
+ "epoch": 2.73,
1918
+ "learning_rate": 2.6607284592997617e-05,
1919
+ "loss": 0.0644,
1920
+ "step": 31000
1921
+ },
1922
+ {
1923
+ "epoch": 2.73,
1924
+ "eval_accuracy": 94.5,
1925
+ "eval_average_metrics": 93.64580602883355,
1926
+ "eval_f1": 92.7916120576671,
1927
+ "eval_loss": 0.061464857310056686,
1928
+ "eval_runtime": 4.6313,
1929
+ "eval_samples_per_second": 215.924,
1930
+ "step": 31000
1931
+ },
1932
+ {
1933
+ "epoch": 2.75,
1934
+ "eval_accuracy": 94.19999999999999,
1935
+ "eval_average_metrics": 93.28421052631577,
1936
+ "eval_f1": 92.36842105263158,
1937
+ "eval_loss": 0.06165764480829239,
1938
+ "eval_runtime": 4.4772,
1939
+ "eval_samples_per_second": 223.356,
1940
+ "step": 31200
1941
+ },
1942
+ {
1943
+ "epoch": 2.77,
1944
+ "eval_accuracy": 94.19999999999999,
1945
+ "eval_average_metrics": 93.27414248021108,
1946
+ "eval_f1": 92.34828496042218,
1947
+ "eval_loss": 0.061222758144140244,
1948
+ "eval_runtime": 4.485,
1949
+ "eval_samples_per_second": 222.965,
1950
+ "step": 31400
1951
+ },
1952
+ {
1953
+ "epoch": 2.78,
1954
+ "learning_rate": 2.219772466707822e-05,
1955
+ "loss": 0.0656,
1956
+ "step": 31500
1957
+ },
1958
+ {
1959
+ "epoch": 2.79,
1960
+ "eval_accuracy": 94.3,
1961
+ "eval_average_metrics": 93.40492772667542,
1962
+ "eval_f1": 92.50985545335085,
1963
+ "eval_loss": 0.06175553798675537,
1964
+ "eval_runtime": 4.4473,
1965
+ "eval_samples_per_second": 224.857,
1966
+ "step": 31600
1967
+ },
1968
+ {
1969
+ "epoch": 2.8,
1970
+ "eval_accuracy": 94.0,
1971
+ "eval_average_metrics": 93.04221635883906,
1972
+ "eval_f1": 92.0844327176781,
1973
+ "eval_loss": 0.06141304597258568,
1974
+ "eval_runtime": 4.5384,
1975
+ "eval_samples_per_second": 220.341,
1976
+ "step": 31800
1977
+ },
1978
+ {
1979
+ "epoch": 2.82,
1980
+ "learning_rate": 1.778816474115883e-05,
1981
+ "loss": 0.0682,
1982
+ "step": 32000
1983
+ },
1984
+ {
1985
+ "epoch": 2.82,
1986
+ "eval_accuracy": 94.3,
1987
+ "eval_average_metrics": 93.39505928853755,
1988
+ "eval_f1": 92.49011857707511,
1989
+ "eval_loss": 0.06122256815433502,
1990
+ "eval_runtime": 4.532,
1991
+ "eval_samples_per_second": 220.652,
1992
+ "step": 32000
1993
+ },
1994
+ {
1995
+ "epoch": 2.84,
1996
+ "eval_accuracy": 94.39999999999999,
1997
+ "eval_average_metrics": 93.5254593175853,
1998
+ "eval_f1": 92.6509186351706,
1999
+ "eval_loss": 0.06179660186171532,
2000
+ "eval_runtime": 4.5432,
2001
+ "eval_samples_per_second": 220.11,
2002
+ "step": 32200
2003
+ },
2004
+ {
2005
+ "epoch": 2.86,
2006
+ "eval_accuracy": 94.19999999999999,
2007
+ "eval_average_metrics": 93.26402116402116,
2008
+ "eval_f1": 92.32804232804234,
2009
+ "eval_loss": 0.060935478657484055,
2010
+ "eval_runtime": 4.5308,
2011
+ "eval_samples_per_second": 220.712,
2012
+ "step": 32400
2013
+ },
2014
+ {
2015
+ "epoch": 2.87,
2016
+ "learning_rate": 1.3378604815239437e-05,
2017
+ "loss": 0.0628,
2018
+ "step": 32500
2019
+ },
2020
+ {
2021
+ "epoch": 2.88,
2022
+ "eval_accuracy": 94.39999999999999,
2023
+ "eval_average_metrics": 93.51578947368421,
2024
+ "eval_f1": 92.63157894736842,
2025
+ "eval_loss": 0.06167520210146904,
2026
+ "eval_runtime": 4.5363,
2027
+ "eval_samples_per_second": 220.442,
2028
+ "step": 32600
2029
+ },
2030
+ {
2031
+ "epoch": 2.89,
2032
+ "eval_accuracy": 94.39999999999999,
2033
+ "eval_average_metrics": 93.51578947368421,
2034
+ "eval_f1": 92.63157894736842,
2035
+ "eval_loss": 0.061225228011608124,
2036
+ "eval_runtime": 4.5208,
2037
+ "eval_samples_per_second": 221.199,
2038
+ "step": 32800
2039
+ },
2040
+ {
2041
+ "epoch": 2.91,
2042
+ "learning_rate": 8.969044889320046e-06,
2043
+ "loss": 0.0659,
2044
+ "step": 33000
2045
+ },
2046
+ {
2047
+ "epoch": 2.91,
2048
+ "eval_accuracy": 94.3,
2049
+ "eval_average_metrics": 93.3751655629139,
2050
+ "eval_f1": 92.45033112582782,
2051
+ "eval_loss": 0.06039771810173988,
2052
+ "eval_runtime": 4.5643,
2053
+ "eval_samples_per_second": 219.093,
2054
+ "step": 33000
2055
+ },
2056
+ {
2057
+ "epoch": 2.93,
2058
+ "eval_accuracy": 94.3,
2059
+ "eval_average_metrics": 93.40492772667542,
2060
+ "eval_f1": 92.50985545335085,
2061
+ "eval_loss": 0.06096240133047104,
2062
+ "eval_runtime": 4.5827,
2063
+ "eval_samples_per_second": 218.214,
2064
+ "step": 33200
2065
+ },
2066
+ {
2067
+ "epoch": 2.95,
2068
+ "eval_accuracy": 94.3,
2069
+ "eval_average_metrics": 93.38513870541613,
2070
+ "eval_f1": 92.47027741083225,
2071
+ "eval_loss": 0.060673393309116364,
2072
+ "eval_runtime": 4.9126,
2073
+ "eval_samples_per_second": 203.559,
2074
+ "step": 33400
2075
+ },
2076
+ {
2077
+ "epoch": 2.95,
2078
+ "learning_rate": 4.559484963400652e-06,
2079
+ "loss": 0.0692,
2080
+ "step": 33500
2081
+ },
2082
+ {
2083
+ "epoch": 2.96,
2084
+ "eval_accuracy": 94.19999999999999,
2085
+ "eval_average_metrics": 93.26402116402116,
2086
+ "eval_f1": 92.32804232804234,
2087
+ "eval_loss": 0.06072871759533882,
2088
+ "eval_runtime": 4.5081,
2089
+ "eval_samples_per_second": 221.824,
2090
+ "step": 33600
2091
+ },
2092
+ {
2093
+ "epoch": 2.98,
2094
+ "eval_accuracy": 94.19999999999999,
2095
+ "eval_average_metrics": 93.26402116402116,
2096
+ "eval_f1": 92.32804232804234,
2097
+ "eval_loss": 0.06088118627667427,
2098
+ "eval_runtime": 4.513,
2099
+ "eval_samples_per_second": 221.581,
2100
+ "step": 33800
2101
+ },
2102
+ {
2103
+ "epoch": 3.0,
2104
+ "learning_rate": 1.4992503748125936e-07,
2105
+ "loss": 0.0654,
2106
+ "step": 34000
2107
+ },
2108
+ {
2109
+ "epoch": 3.0,
2110
+ "eval_accuracy": 94.19999999999999,
2111
+ "eval_average_metrics": 93.26402116402116,
2112
+ "eval_f1": 92.32804232804234,
2113
+ "eval_loss": 0.060787323862314224,
2114
+ "eval_runtime": 4.526,
2115
+ "eval_samples_per_second": 220.947,
2116
+ "step": 34000
2117
+ },
2118
  {
2119
  "epoch": 3.0,
2120
+ "step": 34017,
2121
+ "total_flos": 1.0629344517601075e+17,
2122
+ "train_loss": 0.07169761398949699,
2123
+ "train_runtime": 13428.6442,
2124
+ "train_samples_per_second": 81.061,
2125
+ "train_steps_per_second": 2.533
2126
  }
2127
  ],
2128
+ "max_steps": 34017,
2129
  "num_train_epochs": 3,
2130
+ "total_flos": 1.0629344517601075e+17,
2131
  "trial_name": null,
2132
  "trial_params": null
2133
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61d24a33980bad97e5f675f768a1ff73bc9c32eeff971385a48f1f191aeeabfd
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7602f3c27193a11558b330a2f29b113c0d2a361010f05248a4c20f53c84c028c
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "qqp", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 3, "output_dir": "outputs/bitfit/t5-base/qqp", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "qqp", "test_dataset_config_name": ["en"], "test_dataset_name": "qqp", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "qqp", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 3, "output_dir": "outputs/bitfit/t5-base/qqp", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "qqp", "test_dataset_config_name": ["en"], "test_dataset_name": "qqp", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}