barten commited on
Commit
3e1cb09
·
1 Parent(s): ce32900

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +306 -120
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.97,
3
- "total_flos": 1.573510172037415e+18,
4
- "train_loss": 1.3174563893732034,
5
- "train_runtime": 880.088,
6
- "train_samples_per_second": 23.261,
7
- "train_steps_per_second": 0.181
8
  }
 
1
  {
2
+ "epoch": 9.91,
3
+ "total_flos": 5.240694044220752e+18,
4
+ "train_loss": 0.3209688276614783,
5
+ "train_runtime": 2757.2929,
6
+ "train_samples_per_second": 24.749,
7
+ "train_steps_per_second": 0.192
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.97,
3
- "total_flos": 1.573510172037415e+18,
4
- "train_loss": 1.3174563893732034,
5
- "train_runtime": 880.088,
6
- "train_samples_per_second": 23.261,
7
- "train_steps_per_second": 0.181
8
  }
 
1
  {
2
+ "epoch": 9.91,
3
+ "total_flos": 5.240694044220752e+18,
4
+ "train_loss": 0.3209688276614783,
5
+ "train_runtime": 2757.2929,
6
+ "train_samples_per_second": 24.749,
7
+ "train_steps_per_second": 0.192
8
  }
trainer_state.json CHANGED
@@ -1,304 +1,490 @@
1
  {
2
- "best_metric": 0.7335680751173709,
3
- "best_model_checkpoint": "vit-base-patch16-224-type/checkpoint-372",
4
- "epoch": 5.975903614457831,
5
  "eval_steps": 500,
6
- "global_step": 372,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
- "learning_rate": 1.3157894736842106e-05,
14
- "loss": 2.2533,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.32,
19
- "learning_rate": 2.6315789473684212e-05,
20
- "loss": 2.0412,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.48,
25
- "learning_rate": 3.9473684210526316e-05,
26
- "loss": 1.8051,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.64,
31
- "learning_rate": 4.970059880239521e-05,
32
- "loss": 1.5407,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.8,
37
- "learning_rate": 4.820359281437126e-05,
38
- "loss": 1.3111,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.96,
43
- "learning_rate": 4.670658682634731e-05,
44
- "loss": 1.1891,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 1.0,
49
- "eval_accuracy": 0.6549295774647887,
50
- "eval_loss": 1.0236921310424805,
51
- "eval_runtime": 23.861,
52
- "eval_samples_per_second": 71.414,
53
- "eval_steps_per_second": 2.263,
54
  "step": 62
55
  },
56
  {
57
  "epoch": 1.12,
58
- "learning_rate": 4.5209580838323355e-05,
59
- "loss": 1.1132,
60
  "step": 70
61
  },
62
  {
63
  "epoch": 1.29,
64
- "learning_rate": 4.3712574850299406e-05,
65
- "loss": 1.0744,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 1.45,
70
- "learning_rate": 4.221556886227545e-05,
71
- "loss": 1.0471,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.61,
76
- "learning_rate": 4.07185628742515e-05,
77
- "loss": 1.0616,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.77,
82
- "learning_rate": 3.9221556886227544e-05,
83
- "loss": 0.9812,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.93,
88
- "learning_rate": 3.7724550898203595e-05,
89
- "loss": 0.9452,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.99,
94
- "eval_accuracy": 0.6948356807511737,
95
- "eval_loss": 0.8808437585830688,
96
- "eval_runtime": 23.7153,
97
- "eval_samples_per_second": 71.852,
98
- "eval_steps_per_second": 2.277,
99
  "step": 124
100
  },
101
  {
102
  "epoch": 2.09,
103
- "learning_rate": 3.6227544910179645e-05,
104
- "loss": 0.9356,
105
  "step": 130
106
  },
107
  {
108
  "epoch": 2.25,
109
- "learning_rate": 3.473053892215569e-05,
110
- "loss": 0.906,
111
  "step": 140
112
  },
113
  {
114
  "epoch": 2.41,
115
- "learning_rate": 3.323353293413174e-05,
116
- "loss": 0.8799,
117
  "step": 150
118
  },
119
  {
120
  "epoch": 2.57,
121
- "learning_rate": 3.1736526946107784e-05,
122
- "loss": 0.9281,
123
  "step": 160
124
  },
125
  {
126
  "epoch": 2.73,
127
- "learning_rate": 3.0239520958083834e-05,
128
- "loss": 0.9325,
129
  "step": 170
130
  },
131
  {
132
  "epoch": 2.89,
133
- "learning_rate": 2.874251497005988e-05,
134
- "loss": 0.8955,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 2.99,
139
- "eval_accuracy": 0.7124413145539906,
140
- "eval_loss": 0.8164246082305908,
141
- "eval_runtime": 23.6115,
142
- "eval_samples_per_second": 72.168,
143
- "eval_steps_per_second": 2.287,
144
  "step": 186
145
  },
146
  {
147
  "epoch": 3.05,
148
- "learning_rate": 2.724550898203593e-05,
149
- "loss": 0.9305,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 3.21,
154
- "learning_rate": 2.5748502994011976e-05,
155
- "loss": 0.8252,
156
  "step": 200
157
  },
158
  {
159
  "epoch": 3.37,
160
- "learning_rate": 2.4251497005988023e-05,
161
- "loss": 0.7959,
162
  "step": 210
163
  },
164
  {
165
  "epoch": 3.53,
166
- "learning_rate": 2.275449101796407e-05,
167
- "loss": 0.8614,
168
  "step": 220
169
  },
170
  {
171
  "epoch": 3.69,
172
- "learning_rate": 2.125748502994012e-05,
173
- "loss": 0.8008,
174
  "step": 230
175
  },
176
  {
177
  "epoch": 3.86,
178
- "learning_rate": 1.9760479041916168e-05,
179
- "loss": 0.8389,
180
  "step": 240
181
  },
182
  {
183
  "epoch": 4.0,
184
- "eval_accuracy": 0.7282863849765259,
185
- "eval_loss": 0.7755224108695984,
186
- "eval_runtime": 23.4912,
187
- "eval_samples_per_second": 72.538,
188
- "eval_steps_per_second": 2.299,
189
  "step": 249
190
  },
191
  {
192
  "epoch": 4.02,
193
- "learning_rate": 1.8263473053892215e-05,
194
- "loss": 0.805,
195
  "step": 250
196
  },
197
  {
198
  "epoch": 4.18,
199
- "learning_rate": 1.6766467065868263e-05,
200
- "loss": 0.7374,
201
  "step": 260
202
  },
203
  {
204
  "epoch": 4.34,
205
- "learning_rate": 1.5269461077844313e-05,
206
- "loss": 0.785,
207
  "step": 270
208
  },
209
  {
210
  "epoch": 4.5,
211
- "learning_rate": 1.377245508982036e-05,
212
- "loss": 0.7534,
213
  "step": 280
214
  },
215
  {
216
  "epoch": 4.66,
217
- "learning_rate": 1.2275449101796408e-05,
218
- "loss": 0.7473,
219
  "step": 290
220
  },
221
  {
222
  "epoch": 4.82,
223
- "learning_rate": 1.0778443113772455e-05,
224
- "loss": 0.7854,
225
  "step": 300
226
  },
227
  {
228
  "epoch": 4.98,
229
- "learning_rate": 9.281437125748502e-06,
230
- "loss": 0.8038,
231
  "step": 310
232
  },
233
  {
234
  "epoch": 5.0,
235
- "eval_accuracy": 0.7306338028169014,
236
- "eval_loss": 0.7651309370994568,
237
- "eval_runtime": 23.7527,
238
- "eval_samples_per_second": 71.739,
239
- "eval_steps_per_second": 2.273,
240
  "step": 311
241
  },
242
  {
243
  "epoch": 5.14,
244
- "learning_rate": 7.784431137724551e-06,
245
- "loss": 0.7728,
246
  "step": 320
247
  },
248
  {
249
  "epoch": 5.3,
250
- "learning_rate": 6.2874251497005985e-06,
251
- "loss": 0.7011,
252
  "step": 330
253
  },
254
  {
255
  "epoch": 5.46,
256
- "learning_rate": 4.7904191616766475e-06,
257
- "loss": 0.7337,
258
  "step": 340
259
  },
260
  {
261
  "epoch": 5.62,
262
- "learning_rate": 3.293413173652695e-06,
263
- "loss": 0.7479,
264
  "step": 350
265
  },
266
  {
267
  "epoch": 5.78,
268
- "learning_rate": 1.7964071856287426e-06,
269
- "loss": 0.7345,
270
  "step": 360
271
  },
272
  {
273
  "epoch": 5.94,
274
- "learning_rate": 2.9940119760479047e-07,
275
- "loss": 0.71,
276
  "step": 370
277
  },
278
  {
279
- "epoch": 5.98,
280
- "eval_accuracy": 0.7335680751173709,
281
- "eval_loss": 0.7623339891433716,
282
- "eval_runtime": 23.6551,
283
- "eval_samples_per_second": 72.035,
284
- "eval_steps_per_second": 2.283,
285
- "step": 372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  },
287
  {
288
- "epoch": 5.98,
289
- "step": 372,
290
- "total_flos": 3.6877651691946025e+18,
291
- "train_loss": 0.9907107369233203,
292
- "train_runtime": 1939.9637,
293
- "train_samples_per_second": 24.625,
294
- "train_steps_per_second": 0.192
295
  }
296
  ],
297
  "logging_steps": 10,
298
- "max_steps": 372,
299
- "num_train_epochs": 6,
300
  "save_steps": 500,
301
- "total_flos": 3.6877651691946025e+18,
302
  "trial_name": null,
303
  "trial_params": null
304
  }
 
1
  {
2
+ "best_metric": 0.7593896713615024,
3
+ "best_model_checkpoint": "vit-base-patch16-224-type/checkpoint-560",
4
+ "epoch": 9.959839357429718,
5
  "eval_steps": 500,
6
+ "global_step": 620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
+ "learning_rate": 8.064516129032258e-06,
14
+ "loss": 2.3882,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.32,
19
+ "learning_rate": 1.6129032258064517e-05,
20
+ "loss": 2.2523,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.48,
25
+ "learning_rate": 2.4193548387096777e-05,
26
+ "loss": 2.0603,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.64,
31
+ "learning_rate": 3.2258064516129034e-05,
32
+ "loss": 1.8355,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.8,
37
+ "learning_rate": 4.032258064516129e-05,
38
+ "loss": 1.5709,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.96,
43
+ "learning_rate": 4.8387096774193554e-05,
44
+ "loss": 1.3494,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 1.0,
49
+ "eval_accuracy": 0.6349765258215962,
50
+ "eval_loss": 1.1001132726669312,
51
+ "eval_runtime": 22.8557,
52
+ "eval_samples_per_second": 74.555,
53
+ "eval_steps_per_second": 2.363,
54
  "step": 62
55
  },
56
  {
57
  "epoch": 1.12,
58
+ "learning_rate": 4.92831541218638e-05,
59
+ "loss": 1.1904,
60
  "step": 70
61
  },
62
  {
63
  "epoch": 1.29,
64
+ "learning_rate": 4.8387096774193554e-05,
65
+ "loss": 1.139,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 1.45,
70
+ "learning_rate": 4.74910394265233e-05,
71
+ "loss": 1.0743,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.61,
76
+ "learning_rate": 4.659498207885305e-05,
77
+ "loss": 1.1071,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.77,
82
+ "learning_rate": 4.56989247311828e-05,
83
+ "loss": 1.0113,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.93,
88
+ "learning_rate": 4.4802867383512545e-05,
89
+ "loss": 0.9612,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.99,
94
+ "eval_accuracy": 0.6772300469483568,
95
+ "eval_loss": 0.8708141446113586,
96
+ "eval_runtime": 22.7556,
97
+ "eval_samples_per_second": 74.883,
98
+ "eval_steps_per_second": 2.373,
99
  "step": 124
100
  },
101
  {
102
  "epoch": 2.09,
103
+ "learning_rate": 4.390681003584229e-05,
104
+ "loss": 0.9621,
105
  "step": 130
106
  },
107
  {
108
  "epoch": 2.25,
109
+ "learning_rate": 4.301075268817205e-05,
110
+ "loss": 0.9291,
111
  "step": 140
112
  },
113
  {
114
  "epoch": 2.41,
115
+ "learning_rate": 4.2114695340501795e-05,
116
+ "loss": 0.8999,
117
  "step": 150
118
  },
119
  {
120
  "epoch": 2.57,
121
+ "learning_rate": 4.121863799283154e-05,
122
+ "loss": 0.9236,
123
  "step": 160
124
  },
125
  {
126
  "epoch": 2.73,
127
+ "learning_rate": 4.032258064516129e-05,
128
+ "loss": 0.9358,
129
  "step": 170
130
  },
131
  {
132
  "epoch": 2.89,
133
+ "learning_rate": 3.9426523297491045e-05,
134
+ "loss": 0.8817,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 2.99,
139
+ "eval_accuracy": 0.7265258215962441,
140
+ "eval_loss": 0.7897992730140686,
141
+ "eval_runtime": 23.0974,
142
+ "eval_samples_per_second": 73.774,
143
+ "eval_steps_per_second": 2.338,
144
  "step": 186
145
  },
146
  {
147
  "epoch": 3.05,
148
+ "learning_rate": 3.8530465949820786e-05,
149
+ "loss": 0.9056,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 3.21,
154
+ "learning_rate": 3.763440860215054e-05,
155
+ "loss": 0.8122,
156
  "step": 200
157
  },
158
  {
159
  "epoch": 3.37,
160
+ "learning_rate": 3.673835125448029e-05,
161
+ "loss": 0.7853,
162
  "step": 210
163
  },
164
  {
165
  "epoch": 3.53,
166
+ "learning_rate": 3.5842293906810036e-05,
167
+ "loss": 0.8539,
168
  "step": 220
169
  },
170
  {
171
  "epoch": 3.69,
172
+ "learning_rate": 3.494623655913979e-05,
173
+ "loss": 0.7822,
174
  "step": 230
175
  },
176
  {
177
  "epoch": 3.86,
178
+ "learning_rate": 3.405017921146954e-05,
179
+ "loss": 0.8362,
180
  "step": 240
181
  },
182
  {
183
  "epoch": 4.0,
184
+ "eval_accuracy": 0.7276995305164319,
185
+ "eval_loss": 0.7642938494682312,
186
+ "eval_runtime": 22.8716,
187
+ "eval_samples_per_second": 74.503,
188
+ "eval_steps_per_second": 2.361,
189
  "step": 249
190
  },
191
  {
192
  "epoch": 4.02,
193
+ "learning_rate": 3.3154121863799286e-05,
194
+ "loss": 0.806,
195
  "step": 250
196
  },
197
  {
198
  "epoch": 4.18,
199
+ "learning_rate": 3.2258064516129034e-05,
200
+ "loss": 0.7131,
201
  "step": 260
202
  },
203
  {
204
  "epoch": 4.34,
205
+ "learning_rate": 3.136200716845878e-05,
206
+ "loss": 0.7659,
207
  "step": 270
208
  },
209
  {
210
  "epoch": 4.5,
211
+ "learning_rate": 3.046594982078853e-05,
212
+ "loss": 0.7285,
213
  "step": 280
214
  },
215
  {
216
  "epoch": 4.66,
217
+ "learning_rate": 2.9569892473118284e-05,
218
+ "loss": 0.7295,
219
  "step": 290
220
  },
221
  {
222
  "epoch": 4.82,
223
+ "learning_rate": 2.8673835125448028e-05,
224
+ "loss": 0.776,
225
  "step": 300
226
  },
227
  {
228
  "epoch": 4.98,
229
+ "learning_rate": 2.777777777777778e-05,
230
+ "loss": 0.7959,
231
  "step": 310
232
  },
233
  {
234
  "epoch": 5.0,
235
+ "eval_accuracy": 0.7382629107981221,
236
+ "eval_loss": 0.731025218963623,
237
+ "eval_runtime": 22.8792,
238
+ "eval_samples_per_second": 74.478,
239
+ "eval_steps_per_second": 2.36,
240
  "step": 311
241
  },
242
  {
243
  "epoch": 5.14,
244
+ "learning_rate": 2.6881720430107527e-05,
245
+ "loss": 0.7269,
246
  "step": 320
247
  },
248
  {
249
  "epoch": 5.3,
250
+ "learning_rate": 2.5985663082437278e-05,
251
+ "loss": 0.6598,
252
  "step": 330
253
  },
254
  {
255
  "epoch": 5.46,
256
+ "learning_rate": 2.5089605734767026e-05,
257
+ "loss": 0.6914,
258
  "step": 340
259
  },
260
  {
261
  "epoch": 5.62,
262
+ "learning_rate": 2.4193548387096777e-05,
263
+ "loss": 0.6695,
264
  "step": 350
265
  },
266
  {
267
  "epoch": 5.78,
268
+ "learning_rate": 2.3297491039426525e-05,
269
+ "loss": 0.6746,
270
  "step": 360
271
  },
272
  {
273
  "epoch": 5.94,
274
+ "learning_rate": 2.2401433691756272e-05,
275
+ "loss": 0.6765,
276
  "step": 370
277
  },
278
  {
279
+ "epoch": 5.99,
280
+ "eval_accuracy": 0.7470657276995305,
281
+ "eval_loss": 0.7246695756912231,
282
+ "eval_runtime": 22.9912,
283
+ "eval_samples_per_second": 74.115,
284
+ "eval_steps_per_second": 2.349,
285
+ "step": 373
286
+ },
287
+ {
288
+ "epoch": 6.1,
289
+ "learning_rate": 2.1505376344086024e-05,
290
+ "loss": 0.6417,
291
+ "step": 380
292
+ },
293
+ {
294
+ "epoch": 6.27,
295
+ "learning_rate": 2.060931899641577e-05,
296
+ "loss": 0.633,
297
+ "step": 390
298
+ },
299
+ {
300
+ "epoch": 6.43,
301
+ "learning_rate": 1.9713261648745522e-05,
302
+ "loss": 0.6314,
303
+ "step": 400
304
+ },
305
+ {
306
+ "epoch": 6.59,
307
+ "learning_rate": 1.881720430107527e-05,
308
+ "loss": 0.6661,
309
+ "step": 410
310
+ },
311
+ {
312
+ "epoch": 6.75,
313
+ "learning_rate": 1.7921146953405018e-05,
314
+ "loss": 0.6203,
315
+ "step": 420
316
+ },
317
+ {
318
+ "epoch": 6.91,
319
+ "learning_rate": 1.702508960573477e-05,
320
+ "loss": 0.6504,
321
+ "step": 430
322
+ },
323
+ {
324
+ "epoch": 6.99,
325
+ "eval_accuracy": 0.7576291079812206,
326
+ "eval_loss": 0.6938906311988831,
327
+ "eval_runtime": 22.8151,
328
+ "eval_samples_per_second": 74.687,
329
+ "eval_steps_per_second": 2.367,
330
+ "step": 435
331
+ },
332
+ {
333
+ "epoch": 7.07,
334
+ "learning_rate": 1.6129032258064517e-05,
335
+ "loss": 0.5945,
336
+ "step": 440
337
+ },
338
+ {
339
+ "epoch": 7.23,
340
+ "learning_rate": 1.5232974910394265e-05,
341
+ "loss": 0.5912,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 7.39,
346
+ "learning_rate": 1.4336917562724014e-05,
347
+ "loss": 0.5645,
348
+ "step": 460
349
+ },
350
+ {
351
+ "epoch": 7.55,
352
+ "learning_rate": 1.3440860215053763e-05,
353
+ "loss": 0.5811,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 7.71,
358
+ "learning_rate": 1.2544802867383513e-05,
359
+ "loss": 0.6433,
360
+ "step": 480
361
+ },
362
+ {
363
+ "epoch": 7.87,
364
+ "learning_rate": 1.1648745519713262e-05,
365
+ "loss": 0.5846,
366
+ "step": 490
367
+ },
368
+ {
369
+ "epoch": 8.0,
370
+ "eval_accuracy": 0.7576291079812206,
371
+ "eval_loss": 0.6983180046081543,
372
+ "eval_runtime": 22.9755,
373
+ "eval_samples_per_second": 74.166,
374
+ "eval_steps_per_second": 2.35,
375
+ "step": 498
376
+ },
377
+ {
378
+ "epoch": 8.03,
379
+ "learning_rate": 1.0752688172043012e-05,
380
+ "loss": 0.6193,
381
+ "step": 500
382
+ },
383
+ {
384
+ "epoch": 8.19,
385
+ "learning_rate": 9.856630824372761e-06,
386
+ "loss": 0.6121,
387
+ "step": 510
388
+ },
389
+ {
390
+ "epoch": 8.35,
391
+ "learning_rate": 8.960573476702509e-06,
392
+ "loss": 0.5985,
393
+ "step": 520
394
+ },
395
+ {
396
+ "epoch": 8.51,
397
+ "learning_rate": 8.064516129032258e-06,
398
+ "loss": 0.554,
399
+ "step": 530
400
+ },
401
+ {
402
+ "epoch": 8.67,
403
+ "learning_rate": 7.168458781362007e-06,
404
+ "loss": 0.5987,
405
+ "step": 540
406
+ },
407
+ {
408
+ "epoch": 8.84,
409
+ "learning_rate": 6.2724014336917564e-06,
410
+ "loss": 0.5881,
411
+ "step": 550
412
+ },
413
+ {
414
+ "epoch": 9.0,
415
+ "learning_rate": 5.376344086021506e-06,
416
+ "loss": 0.5774,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 9.0,
421
+ "eval_accuracy": 0.7593896713615024,
422
+ "eval_loss": 0.6935292482376099,
423
+ "eval_runtime": 23.2143,
424
+ "eval_samples_per_second": 73.403,
425
+ "eval_steps_per_second": 2.326,
426
+ "step": 560
427
+ },
428
+ {
429
+ "epoch": 9.16,
430
+ "learning_rate": 4.4802867383512545e-06,
431
+ "loss": 0.5731,
432
+ "step": 570
433
+ },
434
+ {
435
+ "epoch": 9.32,
436
+ "learning_rate": 3.5842293906810035e-06,
437
+ "loss": 0.5458,
438
+ "step": 580
439
+ },
440
+ {
441
+ "epoch": 9.48,
442
+ "learning_rate": 2.688172043010753e-06,
443
+ "loss": 0.5373,
444
+ "step": 590
445
+ },
446
+ {
447
+ "epoch": 9.64,
448
+ "learning_rate": 1.7921146953405017e-06,
449
+ "loss": 0.5995,
450
+ "step": 600
451
+ },
452
+ {
453
+ "epoch": 9.8,
454
+ "learning_rate": 8.960573476702509e-07,
455
+ "loss": 0.5283,
456
+ "step": 610
457
+ },
458
+ {
459
+ "epoch": 9.96,
460
+ "learning_rate": 0.0,
461
+ "loss": 0.5749,
462
+ "step": 620
463
+ },
464
+ {
465
+ "epoch": 9.96,
466
+ "eval_accuracy": 0.7570422535211268,
467
+ "eval_loss": 0.6881020665168762,
468
+ "eval_runtime": 22.8182,
469
+ "eval_samples_per_second": 74.677,
470
+ "eval_steps_per_second": 2.367,
471
+ "step": 620
472
  },
473
  {
474
+ "epoch": 9.96,
475
+ "step": 620,
476
+ "total_flos": 6.145965294585532e+18,
477
+ "train_loss": 0.8511337226436985,
478
+ "train_runtime": 3069.2061,
479
+ "train_samples_per_second": 25.942,
480
+ "train_steps_per_second": 0.202
481
  }
482
  ],
483
  "logging_steps": 10,
484
+ "max_steps": 620,
485
+ "num_train_epochs": 10,
486
  "save_steps": 500,
487
+ "total_flos": 6.145965294585532e+18,
488
  "trial_name": null,
489
  "trial_params": null
490
  }