varcoder commited on
Commit
00aaf56
·
verified ·
1 Parent(s): 2768bc0

End of training

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [microsoft/resnet-101](https://huggingface.co/microsoft/resnet-101) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6683
21
- - Accuracy: 0.7676
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [microsoft/resnet-101](https://huggingface.co/microsoft/resnet-101) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6763
21
+ - Accuracy: 0.7701
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.94,
3
- "eval_accuracy": 0.7436274509803922,
4
- "eval_loss": 0.831763505935669,
5
- "eval_runtime": 25.9823,
6
- "eval_samples_per_second": 78.515,
7
- "eval_steps_per_second": 2.463,
8
- "total_flos": 1.5485654902849044e+18,
9
- "train_loss": 1.3921774001348586,
10
- "train_runtime": 1093.5355,
11
- "train_samples_per_second": 37.306,
12
- "train_steps_per_second": 0.288
13
  }
 
1
  {
2
+ "epoch": 9.8,
3
+ "eval_accuracy": 0.7700980392156863,
4
+ "eval_loss": 0.6762834191322327,
5
+ "eval_runtime": 27.2812,
6
+ "eval_samples_per_second": 74.777,
7
+ "eval_steps_per_second": 2.346,
8
+ "total_flos": 3.072509726495183e+18,
9
+ "train_loss": 1.3191132202148437,
10
+ "train_runtime": 2261.7516,
11
+ "train_samples_per_second": 36.074,
12
+ "train_steps_per_second": 0.111
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.94,
3
- "eval_accuracy": 0.7436274509803922,
4
- "eval_loss": 0.831763505935669,
5
- "eval_runtime": 25.9823,
6
- "eval_samples_per_second": 78.515,
7
- "eval_steps_per_second": 2.463
8
  }
 
1
  {
2
+ "epoch": 9.8,
3
+ "eval_accuracy": 0.7700980392156863,
4
+ "eval_loss": 0.6762834191322327,
5
+ "eval_runtime": 27.2812,
6
+ "eval_samples_per_second": 74.777,
7
+ "eval_steps_per_second": 2.346
8
  }
runs/Jan13_19-54-30_a732f801c5be/events.out.tfevents.1705177969.a732f801c5be.827.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebd8aaffa626a56c6d41efef37fe5ae608ebdc9d39b31f5edd35f950ed4c1527
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.94,
3
- "total_flos": 1.5485654902849044e+18,
4
- "train_loss": 1.3921774001348586,
5
- "train_runtime": 1093.5355,
6
- "train_samples_per_second": 37.306,
7
- "train_steps_per_second": 0.288
8
  }
 
1
  {
2
+ "epoch": 9.8,
3
+ "total_flos": 3.072509726495183e+18,
4
+ "train_loss": 1.3191132202148437,
5
+ "train_runtime": 2261.7516,
6
+ "train_samples_per_second": 36.074,
7
+ "train_steps_per_second": 0.111
8
  }
trainer_state.json CHANGED
@@ -1,451 +1,268 @@
1
  {
2
- "best_metric": 0.7436274509803922,
3
- "best_model_checkpoint": "resnet-101-finetuned-CivilEng11k/checkpoint-315",
4
- "epoch": 4.9411764705882355,
5
  "eval_steps": 500,
6
- "global_step": 315,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.08,
13
- "learning_rate": 1.5625e-05,
14
- "loss": 2.619,
15
- "step": 5
16
- },
17
- {
18
- "epoch": 0.16,
19
- "learning_rate": 3.125e-05,
20
- "loss": 2.6155,
21
  "step": 10
22
  },
23
  {
24
- "epoch": 0.24,
25
- "learning_rate": 4.6875e-05,
26
- "loss": 2.5999,
27
- "step": 15
28
- },
29
- {
30
- "epoch": 0.31,
31
- "learning_rate": 6.25e-05,
32
- "loss": 2.577,
33
  "step": 20
34
  },
35
  {
36
- "epoch": 0.39,
37
- "learning_rate": 7.8125e-05,
38
- "loss": 2.5468,
 
 
 
39
  "step": 25
40
  },
41
  {
42
- "epoch": 0.47,
43
- "learning_rate": 9.375e-05,
44
- "loss": 2.5146,
45
  "step": 30
46
  },
47
  {
48
- "epoch": 0.55,
49
- "learning_rate": 9.893992932862192e-05,
50
- "loss": 2.4682,
51
- "step": 35
52
- },
53
- {
54
- "epoch": 0.63,
55
- "learning_rate": 9.71731448763251e-05,
56
- "loss": 2.4167,
57
  "step": 40
58
  },
59
  {
60
- "epoch": 0.71,
61
- "learning_rate": 9.540636042402827e-05,
62
- "loss": 2.3337,
63
- "step": 45
64
- },
65
- {
66
- "epoch": 0.78,
67
- "learning_rate": 9.363957597173145e-05,
68
- "loss": 2.2503,
69
  "step": 50
70
  },
71
  {
72
- "epoch": 0.86,
73
- "learning_rate": 9.187279151943463e-05,
74
- "loss": 2.1624,
75
- "step": 55
 
 
 
76
  },
77
  {
78
- "epoch": 0.94,
79
- "learning_rate": 9.01060070671378e-05,
80
- "loss": 2.0986,
81
  "step": 60
82
  },
83
  {
84
- "epoch": 0.99,
85
- "eval_accuracy": 0.5431372549019607,
86
- "eval_loss": 1.9500516653060913,
87
- "eval_runtime": 32.3617,
88
- "eval_samples_per_second": 63.037,
89
- "eval_steps_per_second": 1.978,
90
- "step": 63
91
- },
92
- {
93
- "epoch": 1.02,
94
- "learning_rate": 8.833922261484099e-05,
95
- "loss": 1.9785,
96
- "step": 65
97
- },
98
- {
99
- "epoch": 1.1,
100
- "learning_rate": 8.657243816254418e-05,
101
- "loss": 1.8484,
102
  "step": 70
103
  },
104
  {
105
- "epoch": 1.18,
106
- "learning_rate": 8.480565371024736e-05,
107
- "loss": 1.7483,
108
- "step": 75
 
 
 
109
  },
110
  {
111
- "epoch": 1.25,
112
- "learning_rate": 8.303886925795053e-05,
113
- "loss": 1.6201,
114
  "step": 80
115
  },
116
  {
117
- "epoch": 1.33,
118
- "learning_rate": 8.127208480565371e-05,
119
- "loss": 1.5606,
120
- "step": 85
121
- },
122
- {
123
- "epoch": 1.41,
124
- "learning_rate": 7.95053003533569e-05,
125
- "loss": 1.4728,
126
  "step": 90
127
  },
128
  {
129
- "epoch": 1.49,
130
- "learning_rate": 7.773851590106007e-05,
131
- "loss": 1.5132,
132
- "step": 95
133
- },
134
- {
135
- "epoch": 1.57,
136
- "learning_rate": 7.597173144876325e-05,
137
- "loss": 1.4121,
138
  "step": 100
139
  },
140
  {
141
- "epoch": 1.65,
142
- "learning_rate": 7.420494699646644e-05,
143
- "loss": 1.4417,
144
- "step": 105
 
 
 
145
  },
146
  {
147
- "epoch": 1.73,
148
- "learning_rate": 7.243816254416963e-05,
149
- "loss": 1.4318,
150
  "step": 110
151
  },
152
  {
153
- "epoch": 1.8,
154
- "learning_rate": 7.06713780918728e-05,
155
- "loss": 1.3735,
156
- "step": 115
157
- },
158
- {
159
- "epoch": 1.88,
160
- "learning_rate": 6.890459363957598e-05,
161
- "loss": 1.3202,
162
  "step": 120
163
  },
164
  {
165
- "epoch": 1.96,
166
- "learning_rate": 6.713780918727916e-05,
167
- "loss": 1.299,
168
- "step": 125
169
- },
170
- {
171
- "epoch": 1.99,
172
- "eval_accuracy": 0.5779411764705882,
173
- "eval_loss": 1.253934621810913,
174
- "eval_runtime": 35.8869,
175
- "eval_samples_per_second": 56.845,
176
- "eval_steps_per_second": 1.783,
177
  "step": 127
178
  },
179
  {
180
- "epoch": 2.04,
181
- "learning_rate": 6.537102473498233e-05,
182
- "loss": 1.2606,
183
  "step": 130
184
  },
185
  {
186
- "epoch": 2.12,
187
- "learning_rate": 6.360424028268551e-05,
188
- "loss": 1.2339,
189
- "step": 135
190
- },
191
- {
192
- "epoch": 2.2,
193
- "learning_rate": 6.183745583038869e-05,
194
- "loss": 1.2239,
195
  "step": 140
196
  },
197
  {
198
- "epoch": 2.27,
199
- "learning_rate": 6.007067137809188e-05,
200
- "loss": 1.2909,
201
- "step": 145
202
- },
203
- {
204
- "epoch": 2.35,
205
- "learning_rate": 5.830388692579506e-05,
206
- "loss": 1.1971,
207
  "step": 150
208
  },
209
  {
210
- "epoch": 2.43,
211
- "learning_rate": 5.653710247349824e-05,
212
- "loss": 1.1964,
213
- "step": 155
 
 
 
214
  },
215
  {
216
- "epoch": 2.51,
217
- "learning_rate": 5.477031802120142e-05,
218
- "loss": 1.1352,
219
  "step": 160
220
  },
221
  {
222
- "epoch": 2.59,
223
- "learning_rate": 5.3003533568904595e-05,
224
- "loss": 1.1566,
225
- "step": 165
226
- },
227
- {
228
- "epoch": 2.67,
229
- "learning_rate": 5.123674911660777e-05,
230
- "loss": 1.1064,
231
  "step": 170
232
  },
233
  {
234
- "epoch": 2.75,
235
- "learning_rate": 4.946996466431096e-05,
236
- "loss": 1.0537,
237
- "step": 175
 
 
 
238
  },
239
  {
240
- "epoch": 2.82,
241
- "learning_rate": 4.7703180212014135e-05,
242
- "loss": 1.0977,
243
  "step": 180
244
  },
245
  {
246
- "epoch": 2.9,
247
- "learning_rate": 4.593639575971732e-05,
248
- "loss": 1.1238,
249
- "step": 185
250
- },
251
- {
252
- "epoch": 2.98,
253
- "learning_rate": 4.416961130742049e-05,
254
- "loss": 1.0809,
255
  "step": 190
256
  },
257
  {
258
- "epoch": 3.0,
259
- "eval_accuracy": 0.6622549019607843,
260
- "eval_loss": 1.0138132572174072,
261
- "eval_runtime": 38.0303,
262
- "eval_samples_per_second": 53.641,
263
- "eval_steps_per_second": 1.683,
264
- "step": 191
265
- },
266
- {
267
- "epoch": 3.06,
268
- "learning_rate": 4.240282685512368e-05,
269
- "loss": 0.9985,
270
- "step": 195
271
- },
272
- {
273
- "epoch": 3.14,
274
- "learning_rate": 4.063604240282686e-05,
275
- "loss": 1.0406,
276
  "step": 200
277
  },
278
  {
279
- "epoch": 3.22,
280
- "learning_rate": 3.886925795053003e-05,
281
- "loss": 1.0754,
282
- "step": 205
 
 
 
283
  },
284
  {
285
- "epoch": 3.29,
286
- "learning_rate": 3.710247349823322e-05,
287
- "loss": 0.9488,
288
  "step": 210
289
  },
290
  {
291
- "epoch": 3.37,
292
- "learning_rate": 3.53356890459364e-05,
293
- "loss": 1.0375,
294
- "step": 215
295
- },
296
- {
297
- "epoch": 3.45,
298
- "learning_rate": 3.356890459363958e-05,
299
- "loss": 0.9941,
300
  "step": 220
301
  },
302
  {
303
- "epoch": 3.53,
304
- "learning_rate": 3.1802120141342755e-05,
305
- "loss": 0.8968,
306
- "step": 225
 
 
 
307
  },
308
  {
309
- "epoch": 3.61,
310
- "learning_rate": 3.003533568904594e-05,
311
- "loss": 0.9441,
312
  "step": 230
313
  },
314
  {
315
- "epoch": 3.69,
316
- "learning_rate": 2.826855123674912e-05,
317
- "loss": 1.0055,
318
- "step": 235
319
- },
320
- {
321
- "epoch": 3.76,
322
- "learning_rate": 2.6501766784452298e-05,
323
- "loss": 0.9585,
324
  "step": 240
325
  },
326
  {
327
- "epoch": 3.84,
328
- "learning_rate": 2.473498233215548e-05,
329
- "loss": 0.9804,
330
- "step": 245
331
- },
332
- {
333
- "epoch": 3.92,
334
- "learning_rate": 2.296819787985866e-05,
335
- "loss": 0.9441,
336
  "step": 250
337
  },
338
  {
339
- "epoch": 4.0,
340
- "learning_rate": 2.120141342756184e-05,
341
- "loss": 0.9495,
342
- "step": 255
343
- },
344
- {
345
- "epoch": 4.0,
346
- "eval_accuracy": 0.7318627450980392,
347
- "eval_loss": 0.8730877041816711,
348
- "eval_runtime": 38.4534,
349
- "eval_samples_per_second": 53.051,
350
- "eval_steps_per_second": 1.664,
351
- "step": 255
352
- },
353
- {
354
- "epoch": 4.08,
355
- "learning_rate": 1.9434628975265016e-05,
356
- "loss": 0.9134,
357
- "step": 260
358
- },
359
- {
360
- "epoch": 4.16,
361
- "learning_rate": 1.76678445229682e-05,
362
- "loss": 0.8873,
363
- "step": 265
364
- },
365
- {
366
- "epoch": 4.24,
367
- "learning_rate": 1.5901060070671377e-05,
368
- "loss": 0.898,
369
- "step": 270
370
- },
371
- {
372
- "epoch": 4.31,
373
- "learning_rate": 1.413427561837456e-05,
374
- "loss": 0.8586,
375
- "step": 275
376
- },
377
- {
378
- "epoch": 4.39,
379
- "learning_rate": 1.236749116607774e-05,
380
- "loss": 0.8358,
381
- "step": 280
382
- },
383
- {
384
- "epoch": 4.47,
385
- "learning_rate": 1.060070671378092e-05,
386
- "loss": 0.9083,
387
- "step": 285
388
- },
389
- {
390
- "epoch": 4.55,
391
- "learning_rate": 8.8339222614841e-06,
392
- "loss": 0.9129,
393
- "step": 290
394
- },
395
- {
396
- "epoch": 4.63,
397
- "learning_rate": 7.06713780918728e-06,
398
- "loss": 0.8401,
399
- "step": 295
400
- },
401
- {
402
- "epoch": 4.71,
403
- "learning_rate": 5.30035335689046e-06,
404
- "loss": 0.9357,
405
- "step": 300
406
- },
407
- {
408
- "epoch": 4.78,
409
- "learning_rate": 3.53356890459364e-06,
410
- "loss": 0.82,
411
- "step": 305
412
- },
413
- {
414
- "epoch": 4.86,
415
- "learning_rate": 1.76678445229682e-06,
416
- "loss": 0.9039,
417
- "step": 310
418
  },
419
  {
420
- "epoch": 4.94,
421
- "learning_rate": 0.0,
422
- "loss": 0.8398,
423
- "step": 315
424
- },
425
- {
426
- "epoch": 4.94,
427
- "eval_accuracy": 0.7436274509803922,
428
- "eval_loss": 0.831763505935669,
429
- "eval_runtime": 39.256,
430
- "eval_samples_per_second": 51.967,
431
- "eval_steps_per_second": 1.63,
432
- "step": 315
433
- },
434
- {
435
- "epoch": 4.94,
436
- "step": 315,
437
- "total_flos": 1.5485654902849044e+18,
438
- "train_loss": 1.3921774001348586,
439
- "train_runtime": 1093.5355,
440
- "train_samples_per_second": 37.306,
441
- "train_steps_per_second": 0.288
442
  }
443
  ],
444
- "logging_steps": 5,
445
- "max_steps": 315,
446
- "num_train_epochs": 5,
447
  "save_steps": 500,
448
- "total_flos": 1.5485654902849044e+18,
449
  "trial_name": null,
450
  "trial_params": null
451
  }
 
1
  {
2
+ "best_metric": 0.7700980392156863,
3
+ "best_model_checkpoint": "resnet-101-finetuned-CivilEng11k/checkpoint-229",
4
+ "epoch": 9.803921568627452,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.39,
13
+ "learning_rate": 4e-05,
14
+ "loss": 2.6157,
 
 
 
 
 
 
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.78,
19
+ "learning_rate": 8e-05,
20
+ "loss": 2.5769,
 
 
 
 
 
 
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.98,
25
+ "eval_accuracy": 0.5269607843137255,
26
+ "eval_loss": 2.49281907081604,
27
+ "eval_runtime": 40.5904,
28
+ "eval_samples_per_second": 50.258,
29
+ "eval_steps_per_second": 1.577,
30
  "step": 25
31
  },
32
  {
33
+ "epoch": 1.18,
34
+ "learning_rate": 9.777777777777778e-05,
35
+ "loss": 2.4933,
36
  "step": 30
37
  },
38
  {
39
+ "epoch": 1.57,
40
+ "learning_rate": 9.333333333333334e-05,
41
+ "loss": 2.3726,
 
 
 
 
 
 
42
  "step": 40
43
  },
44
  {
45
+ "epoch": 1.96,
46
+ "learning_rate": 8.888888888888889e-05,
47
+ "loss": 2.2271,
 
 
 
 
 
 
48
  "step": 50
49
  },
50
  {
51
+ "epoch": 2.0,
52
+ "eval_accuracy": 0.5284313725490196,
53
+ "eval_loss": 2.184375047683716,
54
+ "eval_runtime": 41.3607,
55
+ "eval_samples_per_second": 49.322,
56
+ "eval_steps_per_second": 1.547,
57
+ "step": 51
58
  },
59
  {
60
+ "epoch": 2.35,
61
+ "learning_rate": 8.444444444444444e-05,
62
+ "loss": 2.004,
63
  "step": 60
64
  },
65
  {
66
+ "epoch": 2.75,
67
+ "learning_rate": 8e-05,
68
+ "loss": 1.6261,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  "step": 70
70
  },
71
  {
72
+ "epoch": 2.98,
73
+ "eval_accuracy": 0.5269607843137255,
74
+ "eval_loss": 1.4098289012908936,
75
+ "eval_runtime": 40.6734,
76
+ "eval_samples_per_second": 50.156,
77
+ "eval_steps_per_second": 1.574,
78
+ "step": 76
79
  },
80
  {
81
+ "epoch": 3.14,
82
+ "learning_rate": 7.555555555555556e-05,
83
+ "loss": 1.4818,
84
  "step": 80
85
  },
86
  {
87
+ "epoch": 3.53,
88
+ "learning_rate": 7.111111111111112e-05,
89
+ "loss": 1.3531,
 
 
 
 
 
 
90
  "step": 90
91
  },
92
  {
93
+ "epoch": 3.92,
94
+ "learning_rate": 6.666666666666667e-05,
95
+ "loss": 1.2715,
 
 
 
 
 
 
96
  "step": 100
97
  },
98
  {
99
+ "epoch": 4.0,
100
+ "eval_accuracy": 0.5799019607843138,
101
+ "eval_loss": 1.204033613204956,
102
+ "eval_runtime": 41.3797,
103
+ "eval_samples_per_second": 49.3,
104
+ "eval_steps_per_second": 1.547,
105
+ "step": 102
106
  },
107
  {
108
+ "epoch": 4.31,
109
+ "learning_rate": 6.222222222222222e-05,
110
+ "loss": 1.1843,
111
  "step": 110
112
  },
113
  {
114
+ "epoch": 4.71,
115
+ "learning_rate": 5.7777777777777776e-05,
116
+ "loss": 1.1368,
 
 
 
 
 
 
117
  "step": 120
118
  },
119
  {
120
+ "epoch": 4.98,
121
+ "eval_accuracy": 0.6852941176470588,
122
+ "eval_loss": 1.0043939352035522,
123
+ "eval_runtime": 40.9737,
124
+ "eval_samples_per_second": 49.788,
125
+ "eval_steps_per_second": 1.562,
 
 
 
 
 
 
126
  "step": 127
127
  },
128
  {
129
+ "epoch": 5.1,
130
+ "learning_rate": 5.333333333333333e-05,
131
+ "loss": 1.0742,
132
  "step": 130
133
  },
134
  {
135
+ "epoch": 5.49,
136
+ "learning_rate": 4.888888888888889e-05,
137
+ "loss": 0.9825,
 
 
 
 
 
 
138
  "step": 140
139
  },
140
  {
141
+ "epoch": 5.88,
142
+ "learning_rate": 4.4444444444444447e-05,
143
+ "loss": 0.9366,
 
 
 
 
 
 
144
  "step": 150
145
  },
146
  {
147
+ "epoch": 6.0,
148
+ "eval_accuracy": 0.7455882352941177,
149
+ "eval_loss": 0.8462627530097961,
150
+ "eval_runtime": 40.2496,
151
+ "eval_samples_per_second": 50.684,
152
+ "eval_steps_per_second": 1.59,
153
+ "step": 153
154
  },
155
  {
156
+ "epoch": 6.27,
157
+ "learning_rate": 4e-05,
158
+ "loss": 0.899,
159
  "step": 160
160
  },
161
  {
162
+ "epoch": 6.67,
163
+ "learning_rate": 3.555555555555556e-05,
164
+ "loss": 0.8249,
 
 
 
 
 
 
165
  "step": 170
166
  },
167
  {
168
+ "epoch": 6.98,
169
+ "eval_accuracy": 0.7686274509803922,
170
+ "eval_loss": 0.7511501908302307,
171
+ "eval_runtime": 47.4571,
172
+ "eval_samples_per_second": 42.986,
173
+ "eval_steps_per_second": 1.349,
174
+ "step": 178
175
  },
176
  {
177
+ "epoch": 7.06,
178
+ "learning_rate": 3.111111111111111e-05,
179
+ "loss": 0.8197,
180
  "step": 180
181
  },
182
  {
183
+ "epoch": 7.45,
184
+ "learning_rate": 2.6666666666666667e-05,
185
+ "loss": 0.7562,
 
 
 
 
 
 
186
  "step": 190
187
  },
188
  {
189
+ "epoch": 7.84,
190
+ "learning_rate": 2.2222222222222223e-05,
191
+ "loss": 0.7635,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  "step": 200
193
  },
194
  {
195
+ "epoch": 8.0,
196
+ "eval_accuracy": 0.7612745098039215,
197
+ "eval_loss": 0.7078944444656372,
198
+ "eval_runtime": 40.9903,
199
+ "eval_samples_per_second": 49.768,
200
+ "eval_steps_per_second": 1.561,
201
+ "step": 204
202
  },
203
  {
204
+ "epoch": 8.24,
205
+ "learning_rate": 1.777777777777778e-05,
206
+ "loss": 0.7415,
207
  "step": 210
208
  },
209
  {
210
+ "epoch": 8.63,
211
+ "learning_rate": 1.3333333333333333e-05,
212
+ "loss": 0.7213,
 
 
 
 
 
 
213
  "step": 220
214
  },
215
  {
216
+ "epoch": 8.98,
217
+ "eval_accuracy": 0.7700980392156863,
218
+ "eval_loss": 0.6762834191322327,
219
+ "eval_runtime": 40.8595,
220
+ "eval_samples_per_second": 49.927,
221
+ "eval_steps_per_second": 1.566,
222
+ "step": 229
223
  },
224
  {
225
+ "epoch": 9.02,
226
+ "learning_rate": 8.88888888888889e-06,
227
+ "loss": 0.6951,
228
  "step": 230
229
  },
230
  {
231
+ "epoch": 9.41,
232
+ "learning_rate": 4.444444444444445e-06,
233
+ "loss": 0.7295,
 
 
 
 
 
 
234
  "step": 240
235
  },
236
  {
237
+ "epoch": 9.8,
238
+ "learning_rate": 0.0,
239
+ "loss": 0.6905,
 
 
 
 
 
 
240
  "step": 250
241
  },
242
  {
243
+ "epoch": 9.8,
244
+ "eval_accuracy": 0.7676470588235295,
245
+ "eval_loss": 0.6683045625686646,
246
+ "eval_runtime": 39.5198,
247
+ "eval_samples_per_second": 51.62,
248
+ "eval_steps_per_second": 1.619,
249
+ "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  },
251
  {
252
+ "epoch": 9.8,
253
+ "step": 250,
254
+ "total_flos": 3.072509726495183e+18,
255
+ "train_loss": 1.3191132202148437,
256
+ "train_runtime": 2261.7516,
257
+ "train_samples_per_second": 36.074,
258
+ "train_steps_per_second": 0.111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
+ "logging_steps": 10,
262
+ "max_steps": 250,
263
+ "num_train_epochs": 10,
264
  "save_steps": 500,
265
+ "total_flos": 3.072509726495183e+18,
266
  "trial_name": null,
267
  "trial_params": null
268
  }