Thibaut commited on
Commit
2612e46
·
verified ·
1 Parent(s): 79b7f33

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/convnext-small-224
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # Validated_Balanced_Raw_Data_model_boost8
18
 
19
- This model is a fine-tuned version of [facebook/convnext-small-224](https://huggingface.co/facebook/convnext-small-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.1157
22
- - Accuracy: 0.5283
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: facebook/convnext-small-224
5
  tags:
6
+ - image-classification
7
+ - vision
8
  - generated_from_trainer
9
  metrics:
10
  - accuracy
 
18
 
19
  # Validated_Balanced_Raw_Data_model_boost8
20
 
21
+ This model is a fine-tuned version of [facebook/convnext-small-224](https://huggingface.co/facebook/convnext-small-224) on the Logiroad/Validated_Balanced_Raw_Dataset dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.1054
24
+ - Accuracy: 0.5425
25
 
26
  ## Model description
27
 
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 4.2409746528731136e+17,
4
- "train_loss": 1.319512176513672,
5
- "train_runtime": 85.175,
6
- "train_samples_per_second": 37.335,
7
- "train_steps_per_second": 4.696
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.5424528301886793,
4
+ "eval_loss": 1.1053968667984009,
5
+ "eval_runtime": 1.3811,
6
+ "eval_samples_per_second": 153.499,
7
+ "eval_steps_per_second": 10.137,
8
+ "total_flos": 2.1204873264365568e+18,
9
+ "train_loss": 0.8860943021774292,
10
+ "train_runtime": 622.5434,
11
+ "train_samples_per_second": 25.54,
12
+ "train_steps_per_second": 3.213
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.5424528301886793,
4
+ "eval_loss": 1.1053968667984009,
5
+ "eval_runtime": 1.3811,
6
+ "eval_samples_per_second": 153.499,
7
+ "eval_steps_per_second": 10.137
8
+ }
runs/Nov28_10-18-28_algo-1/events.out.tfevents.1764325856.algo-1.68.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9f6a5b1ff1588aad13f79afa9750dd2db30ba4f887a0bc591150849293f285
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 4.2409746528731136e+17,
4
- "train_loss": 1.319512176513672,
5
- "train_runtime": 85.175,
6
- "train_samples_per_second": 37.335,
7
- "train_steps_per_second": 4.696
8
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "total_flos": 2.1204873264365568e+18,
4
+ "train_loss": 0.8860943021774292,
5
+ "train_runtime": 622.5434,
6
+ "train_samples_per_second": 25.54,
7
+ "train_steps_per_second": 3.213
8
  }
trainer_state.json CHANGED
@@ -1,128 +1,532 @@
1
  {
2
- "best_metric": 0.39622641509433965,
3
- "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost8_outputs/checkpoint-80",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.625,
13
- "grad_norm": 1.026680588722229,
14
- "learning_rate": 2.9541003989089956e-05,
15
- "loss": 1.3729,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.39622641509433965,
21
- "eval_loss": 1.3302552700042725,
22
- "eval_runtime": 1.4024,
23
- "eval_samples_per_second": 151.165,
24
- "eval_steps_per_second": 9.983,
25
  "step": 80
26
  },
27
  {
28
  "epoch": 1.25,
29
- "grad_norm": 1.0671523809432983,
30
- "learning_rate": 2.6837107640945904e-05,
31
- "loss": 1.3422,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 1.875,
36
- "grad_norm": 1.1258270740509033,
37
- "learning_rate": 2.2139210895556104e-05,
38
- "loss": 1.3243,
39
  "step": 150
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_accuracy": 0.38207547169811323,
44
- "eval_loss": 1.3050692081451416,
45
- "eval_runtime": 1.3895,
46
- "eval_samples_per_second": 152.569,
47
- "eval_steps_per_second": 10.075,
48
  "step": 160
49
  },
50
  {
51
  "epoch": 2.5,
52
- "grad_norm": 1.1245783567428589,
53
- "learning_rate": 1.623869018208499e-05,
54
- "loss": 1.3093,
55
  "step": 200
56
  },
57
  {
58
  "epoch": 3.0,
59
- "eval_accuracy": 0.37735849056603776,
60
- "eval_loss": 1.2962387800216675,
61
- "eval_runtime": 1.3825,
62
- "eval_samples_per_second": 153.341,
63
- "eval_steps_per_second": 10.126,
64
  "step": 240
65
  },
66
  {
67
  "epoch": 3.125,
68
- "grad_norm": 0.9223591685295105,
69
- "learning_rate": 1.0129507961929749e-05,
70
- "loss": 1.3138,
71
  "step": 250
72
  },
73
  {
74
  "epoch": 3.75,
75
- "grad_norm": 1.2660913467407227,
76
- "learning_rate": 4.840776425613887e-06,
77
- "loss": 1.2908,
78
  "step": 300
79
  },
80
  {
81
  "epoch": 4.0,
82
- "eval_accuracy": 0.38207547169811323,
83
- "eval_loss": 1.2940889596939087,
84
- "eval_runtime": 1.3786,
85
- "eval_samples_per_second": 153.781,
86
- "eval_steps_per_second": 10.155,
87
  "step": 320
88
  },
89
  {
90
  "epoch": 4.375,
91
- "grad_norm": 1.12974214553833,
92
- "learning_rate": 1.2634001001741375e-06,
93
- "loss": 1.3193,
94
  "step": 350
95
  },
96
  {
97
  "epoch": 5.0,
98
- "grad_norm": 1.500447392463684,
99
- "learning_rate": 0.0,
100
- "loss": 1.2836,
101
  "step": 400
102
  },
103
  {
104
  "epoch": 5.0,
105
- "eval_accuracy": 0.37735849056603776,
106
- "eval_loss": 1.2936153411865234,
107
- "eval_runtime": 1.4054,
108
- "eval_samples_per_second": 150.848,
109
- "eval_steps_per_second": 9.962,
110
  "step": 400
111
  },
112
  {
113
- "epoch": 5.0,
114
- "step": 400,
115
- "total_flos": 4.2409746528731136e+17,
116
- "train_loss": 1.319512176513672,
117
- "train_runtime": 85.175,
118
- "train_samples_per_second": 37.335,
119
- "train_steps_per_second": 4.696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
  ],
122
  "logging_steps": 50,
123
- "max_steps": 400,
124
  "num_input_tokens_seen": 0,
125
- "num_train_epochs": 5,
126
  "save_steps": 500,
127
  "stateful_callbacks": {
128
  "TrainerControl": {
@@ -136,7 +540,7 @@
136
  "attributes": {}
137
  }
138
  },
139
- "total_flos": 4.2409746528731136e+17,
140
  "train_batch_size": 8,
141
  "trial_name": null,
142
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5424528301886793,
3
+ "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost8_outputs/checkpoint-640",
4
+ "epoch": 25.0,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.625,
13
+ "grad_norm": 1.6869874000549316,
14
+ "learning_rate": 1.5e-05,
15
+ "loss": 1.326,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.3867924528301887,
21
+ "eval_loss": 1.2989507913589478,
22
+ "eval_runtime": 1.3738,
23
+ "eval_samples_per_second": 154.315,
24
+ "eval_steps_per_second": 10.191,
25
  "step": 80
26
  },
27
  {
28
  "epoch": 1.25,
29
+ "grad_norm": 4.385875701904297,
30
+ "learning_rate": 2.97e-05,
31
+ "loss": 1.2994,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 1.875,
36
+ "grad_norm": 12.015926361083984,
37
+ "learning_rate": 2.9950795096316707e-05,
38
+ "loss": 1.2698,
39
  "step": 150
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_accuracy": 0.37264150943396224,
44
+ "eval_loss": 1.2874739170074463,
45
+ "eval_runtime": 1.3783,
46
+ "eval_samples_per_second": 153.812,
47
+ "eval_steps_per_second": 10.157,
48
  "step": 160
49
  },
50
  {
51
  "epoch": 2.5,
52
+ "grad_norm": 4.884477615356445,
53
+ "learning_rate": 2.9803503201606352e-05,
54
+ "loss": 1.2271,
55
  "step": 200
56
  },
57
  {
58
  "epoch": 3.0,
59
+ "eval_accuracy": 0.42452830188679247,
60
+ "eval_loss": 1.2136298418045044,
61
+ "eval_runtime": 1.3761,
62
+ "eval_samples_per_second": 154.055,
63
+ "eval_steps_per_second": 10.173,
64
  "step": 240
65
  },
66
  {
67
  "epoch": 3.125,
68
+ "grad_norm": 13.981613159179688,
69
+ "learning_rate": 2.955909064700128e-05,
70
+ "loss": 1.2369,
71
  "step": 250
72
  },
73
  {
74
  "epoch": 3.75,
75
+ "grad_norm": 7.470583438873291,
76
+ "learning_rate": 2.921124361809201e-05,
77
+ "loss": 1.1742,
78
  "step": 300
79
  },
80
  {
81
  "epoch": 4.0,
82
+ "eval_accuracy": 0.4716981132075472,
83
+ "eval_loss": 1.1844068765640259,
84
+ "eval_runtime": 1.3851,
85
+ "eval_samples_per_second": 153.063,
86
+ "eval_steps_per_second": 10.108,
87
  "step": 320
88
  },
89
  {
90
  "epoch": 4.375,
91
+ "grad_norm": 3.380803346633911,
92
+ "learning_rate": 2.8766319385259717e-05,
93
+ "loss": 1.1633,
94
  "step": 350
95
  },
96
  {
97
  "epoch": 5.0,
98
+ "grad_norm": 23.32581901550293,
99
+ "learning_rate": 2.822735723216188e-05,
100
+ "loss": 1.1507,
101
  "step": 400
102
  },
103
  {
104
  "epoch": 5.0,
105
+ "eval_accuracy": 0.49056603773584906,
106
+ "eval_loss": 1.1472444534301758,
107
+ "eval_runtime": 1.3725,
108
+ "eval_samples_per_second": 154.464,
109
+ "eval_steps_per_second": 10.2,
110
  "step": 400
111
  },
112
  {
113
+ "epoch": 5.625,
114
+ "grad_norm": 5.383129596710205,
115
+ "learning_rate": 2.7598038816804598e-05,
116
+ "loss": 1.1228,
117
+ "step": 450
118
+ },
119
+ {
120
+ "epoch": 6.0,
121
+ "eval_accuracy": 0.46226415094339623,
122
+ "eval_loss": 1.156751275062561,
123
+ "eval_runtime": 1.3826,
124
+ "eval_samples_per_second": 153.335,
125
+ "eval_steps_per_second": 10.126,
126
+ "step": 480
127
+ },
128
+ {
129
+ "epoch": 6.25,
130
+ "grad_norm": 5.6944193840026855,
131
+ "learning_rate": 2.6882663022085234e-05,
132
+ "loss": 1.0677,
133
+ "step": 500
134
+ },
135
+ {
136
+ "epoch": 6.875,
137
+ "grad_norm": 5.666170597076416,
138
+ "learning_rate": 2.608611659006323e-05,
139
+ "loss": 1.0484,
140
+ "step": 550
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_accuracy": 0.4811320754716981,
145
+ "eval_loss": 1.1222484111785889,
146
+ "eval_runtime": 1.3869,
147
+ "eval_samples_per_second": 152.855,
148
+ "eval_steps_per_second": 10.094,
149
+ "step": 560
150
+ },
151
+ {
152
+ "epoch": 7.5,
153
+ "grad_norm": 6.5580291748046875,
154
+ "learning_rate": 2.5213840740556754e-05,
155
+ "loss": 1.0224,
156
+ "step": 600
157
+ },
158
+ {
159
+ "epoch": 8.0,
160
+ "eval_accuracy": 0.5424528301886793,
161
+ "eval_loss": 1.1053968667984009,
162
+ "eval_runtime": 1.3797,
163
+ "eval_samples_per_second": 153.654,
164
+ "eval_steps_per_second": 10.147,
165
+ "step": 640
166
+ },
167
+ {
168
+ "epoch": 8.125,
169
+ "grad_norm": 6.871710300445557,
170
+ "learning_rate": 2.4271794002094025e-05,
171
+ "loss": 0.9804,
172
+ "step": 650
173
+ },
174
+ {
175
+ "epoch": 8.75,
176
+ "grad_norm": 11.924201011657715,
177
+ "learning_rate": 2.3287096096947202e-05,
178
+ "loss": 0.9876,
179
+ "step": 700
180
+ },
181
+ {
182
+ "epoch": 9.0,
183
+ "eval_accuracy": 0.5,
184
+ "eval_loss": 1.1332839727401733,
185
+ "eval_runtime": 1.3822,
186
+ "eval_samples_per_second": 153.382,
187
+ "eval_steps_per_second": 10.129,
188
+ "step": 720
189
+ },
190
+ {
191
+ "epoch": 9.375,
192
+ "grad_norm": 10.64323616027832,
193
+ "learning_rate": 2.222630511152573e-05,
194
+ "loss": 0.897,
195
+ "step": 750
196
+ },
197
+ {
198
+ "epoch": 10.0,
199
+ "grad_norm": 25.509368896484375,
200
+ "learning_rate": 2.1116151134815555e-05,
201
+ "loss": 0.9897,
202
+ "step": 800
203
+ },
204
+ {
205
+ "epoch": 10.0,
206
+ "eval_accuracy": 0.4811320754716981,
207
+ "eval_loss": 1.1367976665496826,
208
+ "eval_runtime": 1.3805,
209
+ "eval_samples_per_second": 153.567,
210
+ "eval_steps_per_second": 10.141,
211
+ "step": 800
212
+ },
213
+ {
214
+ "epoch": 10.625,
215
+ "grad_norm": 19.3117733001709,
216
+ "learning_rate": 1.9964217644158925e-05,
217
+ "loss": 0.9133,
218
+ "step": 850
219
+ },
220
+ {
221
+ "epoch": 11.0,
222
+ "eval_accuracy": 0.5,
223
+ "eval_loss": 1.0922900438308716,
224
+ "eval_runtime": 1.382,
225
+ "eval_samples_per_second": 153.404,
226
+ "eval_steps_per_second": 10.13,
227
+ "step": 880
228
+ },
229
+ {
230
+ "epoch": 11.25,
231
+ "grad_norm": 15.118478775024414,
232
+ "learning_rate": 1.8778373513342223e-05,
233
+ "loss": 0.9207,
234
+ "step": 900
235
+ },
236
+ {
237
+ "epoch": 11.875,
238
+ "grad_norm": 20.54922103881836,
239
+ "learning_rate": 1.7591151985494456e-05,
240
+ "loss": 0.8814,
241
+ "step": 950
242
+ },
243
+ {
244
+ "epoch": 12.0,
245
+ "eval_accuracy": 0.4716981132075472,
246
+ "eval_loss": 1.1101481914520264,
247
+ "eval_runtime": 1.3773,
248
+ "eval_samples_per_second": 153.926,
249
+ "eval_steps_per_second": 10.165,
250
+ "step": 960
251
+ },
252
+ {
253
+ "epoch": 12.5,
254
+ "grad_norm": 20.49024200439453,
255
+ "learning_rate": 1.6362233121333124e-05,
256
+ "loss": 0.8185,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 13.0,
261
+ "eval_accuracy": 0.49528301886792453,
262
+ "eval_loss": 1.1416065692901611,
263
+ "eval_runtime": 1.3819,
264
+ "eval_samples_per_second": 153.417,
265
+ "eval_steps_per_second": 10.131,
266
+ "step": 1040
267
+ },
268
+ {
269
+ "epoch": 13.125,
270
+ "grad_norm": 19.4349365234375,
271
+ "learning_rate": 1.5124008823666874e-05,
272
+ "loss": 0.8224,
273
+ "step": 1050
274
+ },
275
+ {
276
+ "epoch": 13.75,
277
+ "grad_norm": 23.32369041442871,
278
+ "learning_rate": 1.3884937419991688e-05,
279
+ "loss": 0.7917,
280
+ "step": 1100
281
+ },
282
+ {
283
+ "epoch": 14.0,
284
+ "eval_accuracy": 0.5047169811320755,
285
+ "eval_loss": 1.1236770153045654,
286
+ "eval_runtime": 1.3744,
287
+ "eval_samples_per_second": 154.25,
288
+ "eval_steps_per_second": 10.186,
289
+ "step": 1120
290
+ },
291
+ {
292
+ "epoch": 14.375,
293
+ "grad_norm": 9.466968536376953,
294
+ "learning_rate": 1.2653483024396535e-05,
295
+ "loss": 0.7934,
296
+ "step": 1150
297
+ },
298
+ {
299
+ "epoch": 15.0,
300
+ "grad_norm": 24.469558715820312,
301
+ "learning_rate": 1.1438057719081672e-05,
302
+ "loss": 0.7773,
303
+ "step": 1200
304
+ },
305
+ {
306
+ "epoch": 15.0,
307
+ "eval_accuracy": 0.5047169811320755,
308
+ "eval_loss": 1.099358081817627,
309
+ "eval_runtime": 1.3935,
310
+ "eval_samples_per_second": 152.138,
311
+ "eval_steps_per_second": 10.047,
312
+ "step": 1200
313
+ },
314
+ {
315
+ "epoch": 15.625,
316
+ "grad_norm": 10.011290550231934,
317
+ "learning_rate": 1.0246964091307435e-05,
318
+ "loss": 0.7289,
319
+ "step": 1250
320
+ },
321
+ {
322
+ "epoch": 16.0,
323
+ "eval_accuracy": 0.5094339622641509,
324
+ "eval_loss": 1.1058666706085205,
325
+ "eval_runtime": 1.3811,
326
+ "eval_samples_per_second": 153.497,
327
+ "eval_steps_per_second": 10.137,
328
+ "step": 1280
329
+ },
330
+ {
331
+ "epoch": 16.25,
332
+ "grad_norm": 22.63361358642578,
333
+ "learning_rate": 9.08833851830458e-06,
334
+ "loss": 0.7104,
335
+ "step": 1300
336
+ },
337
+ {
338
+ "epoch": 16.875,
339
+ "grad_norm": 4.78521203994751,
340
+ "learning_rate": 7.97009558756758e-06,
341
+ "loss": 0.7337,
342
+ "step": 1350
343
+ },
344
+ {
345
+ "epoch": 17.0,
346
+ "eval_accuracy": 0.5141509433962265,
347
+ "eval_loss": 1.1084901094436646,
348
+ "eval_runtime": 1.3844,
349
+ "eval_samples_per_second": 153.134,
350
+ "eval_steps_per_second": 10.113,
351
+ "step": 1360
352
+ },
353
+ {
354
+ "epoch": 17.5,
355
+ "grad_norm": 13.2825288772583,
356
+ "learning_rate": 6.899874032196796e-06,
357
+ "loss": 0.7052,
358
+ "step": 1400
359
+ },
360
+ {
361
+ "epoch": 18.0,
362
+ "eval_accuracy": 0.5188679245283019,
363
+ "eval_loss": 1.1131378412246704,
364
+ "eval_runtime": 1.3839,
365
+ "eval_samples_per_second": 153.192,
366
+ "eval_steps_per_second": 10.116,
367
+ "step": 1440
368
+ },
369
+ {
370
+ "epoch": 18.125,
371
+ "grad_norm": 10.307984352111816,
372
+ "learning_rate": 5.884984550605782e-06,
373
+ "loss": 0.7075,
374
+ "step": 1450
375
+ },
376
+ {
377
+ "epoch": 18.75,
378
+ "grad_norm": 12.91500186920166,
379
+ "learning_rate": 4.93235986703821e-06,
380
+ "loss": 0.6703,
381
+ "step": 1500
382
+ },
383
+ {
384
+ "epoch": 19.0,
385
+ "eval_accuracy": 0.5330188679245284,
386
+ "eval_loss": 1.1068452596664429,
387
+ "eval_runtime": 1.3783,
388
+ "eval_samples_per_second": 153.808,
389
+ "eval_steps_per_second": 10.157,
390
+ "step": 1520
391
+ },
392
+ {
393
+ "epoch": 19.375,
394
+ "grad_norm": 22.758438110351562,
395
+ "learning_rate": 4.048507374031557e-06,
396
+ "loss": 0.6943,
397
+ "step": 1550
398
+ },
399
+ {
400
+ "epoch": 20.0,
401
+ "grad_norm": 19.057764053344727,
402
+ "learning_rate": 3.2394646803277063e-06,
403
+ "loss": 0.6482,
404
+ "step": 1600
405
+ },
406
+ {
407
+ "epoch": 20.0,
408
+ "eval_accuracy": 0.5188679245283019,
409
+ "eval_loss": 1.1251068115234375,
410
+ "eval_runtime": 1.3783,
411
+ "eval_samples_per_second": 153.811,
412
+ "eval_steps_per_second": 10.157,
413
+ "step": 1600
414
+ },
415
+ {
416
+ "epoch": 20.625,
417
+ "grad_norm": 24.637718200683594,
418
+ "learning_rate": 2.5107583678831445e-06,
419
+ "loss": 0.6421,
420
+ "step": 1650
421
+ },
422
+ {
423
+ "epoch": 21.0,
424
+ "eval_accuracy": 0.5283018867924528,
425
+ "eval_loss": 1.11643385887146,
426
+ "eval_runtime": 1.4272,
427
+ "eval_samples_per_second": 148.544,
428
+ "eval_steps_per_second": 9.81,
429
+ "step": 1680
430
+ },
431
+ {
432
+ "epoch": 21.25,
433
+ "grad_norm": 18.683311462402344,
434
+ "learning_rate": 1.867366239710358e-06,
435
+ "loss": 0.6103,
436
+ "step": 1700
437
+ },
438
+ {
439
+ "epoch": 21.875,
440
+ "grad_norm": 13.443337440490723,
441
+ "learning_rate": 1.313683316435793e-06,
442
+ "loss": 0.6738,
443
+ "step": 1750
444
+ },
445
+ {
446
+ "epoch": 22.0,
447
+ "eval_accuracy": 0.5377358490566038,
448
+ "eval_loss": 1.1147156953811646,
449
+ "eval_runtime": 1.3829,
450
+ "eval_samples_per_second": 153.297,
451
+ "eval_steps_per_second": 10.123,
452
+ "step": 1760
453
+ },
454
+ {
455
+ "epoch": 22.5,
456
+ "grad_norm": 24.684009552001953,
457
+ "learning_rate": 8.534918138525211e-07,
458
+ "loss": 0.6459,
459
+ "step": 1800
460
+ },
461
+ {
462
+ "epoch": 23.0,
463
+ "eval_accuracy": 0.5283018867924528,
464
+ "eval_loss": 1.1151989698410034,
465
+ "eval_runtime": 1.381,
466
+ "eval_samples_per_second": 153.516,
467
+ "eval_steps_per_second": 10.138,
468
+ "step": 1840
469
+ },
470
+ {
471
+ "epoch": 23.125,
472
+ "grad_norm": 7.173917293548584,
473
+ "learning_rate": 4.899353065512263e-07,
474
+ "loss": 0.6673,
475
+ "step": 1850
476
+ },
477
+ {
478
+ "epoch": 23.75,
479
+ "grad_norm": 9.602700233459473,
480
+ "learning_rate": 2.2549725411822485e-07,
481
+ "loss": 0.6302,
482
+ "step": 1900
483
+ },
484
+ {
485
+ "epoch": 24.0,
486
+ "eval_accuracy": 0.5283018867924528,
487
+ "eval_loss": 1.1155682802200317,
488
+ "eval_runtime": 1.3752,
489
+ "eval_samples_per_second": 154.158,
490
+ "eval_steps_per_second": 10.18,
491
+ "step": 1920
492
+ },
493
+ {
494
+ "epoch": 24.375,
495
+ "grad_norm": 34.81510925292969,
496
+ "learning_rate": 6.198403658829233e-08,
497
+ "loss": 0.6049,
498
+ "step": 1950
499
+ },
500
+ {
501
+ "epoch": 25.0,
502
+ "grad_norm": 22.63814926147461,
503
+ "learning_rate": 5.126150373813144e-10,
504
+ "loss": 0.689,
505
+ "step": 2000
506
+ },
507
+ {
508
+ "epoch": 25.0,
509
+ "eval_accuracy": 0.5283018867924528,
510
+ "eval_loss": 1.1156727075576782,
511
+ "eval_runtime": 1.3867,
512
+ "eval_samples_per_second": 152.879,
513
+ "eval_steps_per_second": 10.096,
514
+ "step": 2000
515
+ },
516
+ {
517
+ "epoch": 25.0,
518
+ "step": 2000,
519
+ "total_flos": 2.1204873264365568e+18,
520
+ "train_loss": 0.8860943021774292,
521
+ "train_runtime": 622.5434,
522
+ "train_samples_per_second": 25.54,
523
+ "train_steps_per_second": 3.213
524
  }
525
  ],
526
  "logging_steps": 50,
527
+ "max_steps": 2000,
528
  "num_input_tokens_seen": 0,
529
+ "num_train_epochs": 25,
530
  "save_steps": 500,
531
  "stateful_callbacks": {
532
  "TrainerControl": {
 
540
  "attributes": {}
541
  }
542
  },
543
+ "total_flos": 2.1204873264365568e+18,
544
  "train_batch_size": 8,
545
  "trial_name": null,
546
  "trial_params": null