dima806 commited on
Commit
63aee5d
·
verified ·
1 Parent(s): bee9363

Upload folder using huggingface_hub

Browse files
checkpoint-5640/config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "encoder_stride": 16,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "adidas_forum_high",
12
+ "1": "adidas_forum_low",
13
+ "2": "adidas_gazelle",
14
+ "3": "adidas_nmd_r1",
15
+ "4": "adidas_samba",
16
+ "5": "adidas_stan_smith",
17
+ "6": "adidas_superstar",
18
+ "7": "adidas_ultraboost",
19
+ "8": "asics_gel-lyte_iii",
20
+ "9": "converse_chuck_70_high",
21
+ "10": "converse_chuck_70_low",
22
+ "11": "converse_chuck_taylor_all-star_high",
23
+ "12": "converse_chuck_taylor_all-star_low",
24
+ "13": "converse_one_star",
25
+ "14": "new_balance_327",
26
+ "15": "new_balance_550",
27
+ "16": "new_balance_574",
28
+ "17": "new_balance_990",
29
+ "18": "new_balance_992",
30
+ "19": "nike_air_force_1_high",
31
+ "20": "nike_air_force_1_low",
32
+ "21": "nike_air_force_1_mid",
33
+ "22": "nike_air_jordan_11",
34
+ "23": "nike_air_jordan_1_high",
35
+ "24": "nike_air_jordan_1_low",
36
+ "25": "nike_air_jordan_3",
37
+ "26": "nike_air_jordan_4",
38
+ "27": "nike_air_max_1",
39
+ "28": "nike_air_max_270",
40
+ "29": "nike_air_max_90",
41
+ "30": "nike_air_max_95",
42
+ "31": "nike_air_max_97",
43
+ "32": "nike_air_max_plus_(tn)",
44
+ "33": "nike_air_vapormax_flyknit",
45
+ "34": "nike_air_vapormax_plus",
46
+ "35": "nike_blazer_mid_77",
47
+ "36": "nike_cortez",
48
+ "37": "nike_dunk_high",
49
+ "38": "nike_dunk_low",
50
+ "39": "puma_suede_classic",
51
+ "40": "reebok_classic_leather",
52
+ "41": "reebok_club_c_85",
53
+ "42": "salomon_xt-6",
54
+ "43": "vans_authentic",
55
+ "44": "vans_old_skool",
56
+ "45": "vans_sk8-hi",
57
+ "46": "vans_slip-on_checkerboard",
58
+ "47": "yeezy_700_wave_runner",
59
+ "48": "yeezy_boost_350_v2",
60
+ "49": "yeezy_slide"
61
+ },
62
+ "image_size": 224,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 3072,
65
+ "label2id": {
66
+ "adidas_forum_high": 0,
67
+ "adidas_forum_low": 1,
68
+ "adidas_gazelle": 2,
69
+ "adidas_nmd_r1": 3,
70
+ "adidas_samba": 4,
71
+ "adidas_stan_smith": 5,
72
+ "adidas_superstar": 6,
73
+ "adidas_ultraboost": 7,
74
+ "asics_gel-lyte_iii": 8,
75
+ "converse_chuck_70_high": 9,
76
+ "converse_chuck_70_low": 10,
77
+ "converse_chuck_taylor_all-star_high": 11,
78
+ "converse_chuck_taylor_all-star_low": 12,
79
+ "converse_one_star": 13,
80
+ "new_balance_327": 14,
81
+ "new_balance_550": 15,
82
+ "new_balance_574": 16,
83
+ "new_balance_990": 17,
84
+ "new_balance_992": 18,
85
+ "nike_air_force_1_high": 19,
86
+ "nike_air_force_1_low": 20,
87
+ "nike_air_force_1_mid": 21,
88
+ "nike_air_jordan_11": 22,
89
+ "nike_air_jordan_1_high": 23,
90
+ "nike_air_jordan_1_low": 24,
91
+ "nike_air_jordan_3": 25,
92
+ "nike_air_jordan_4": 26,
93
+ "nike_air_max_1": 27,
94
+ "nike_air_max_270": 28,
95
+ "nike_air_max_90": 29,
96
+ "nike_air_max_95": 30,
97
+ "nike_air_max_97": 31,
98
+ "nike_air_max_plus_(tn)": 32,
99
+ "nike_air_vapormax_flyknit": 33,
100
+ "nike_air_vapormax_plus": 34,
101
+ "nike_blazer_mid_77": 35,
102
+ "nike_cortez": 36,
103
+ "nike_dunk_high": 37,
104
+ "nike_dunk_low": 38,
105
+ "puma_suede_classic": 39,
106
+ "reebok_classic_leather": 40,
107
+ "reebok_club_c_85": 41,
108
+ "salomon_xt-6": 42,
109
+ "vans_authentic": 43,
110
+ "vans_old_skool": 44,
111
+ "vans_sk8-hi": 45,
112
+ "vans_slip-on_checkerboard": 46,
113
+ "yeezy_700_wave_runner": 47,
114
+ "yeezy_boost_350_v2": 48,
115
+ "yeezy_slide": 49
116
+ },
117
+ "layer_norm_eps": 1e-12,
118
+ "model_type": "vit",
119
+ "num_attention_heads": 12,
120
+ "num_channels": 3,
121
+ "num_hidden_layers": 12,
122
+ "patch_size": 16,
123
+ "pooler_act": "tanh",
124
+ "pooler_output_size": 768,
125
+ "problem_type": "single_label_classification",
126
+ "qkv_bias": true,
127
+ "torch_dtype": "float32",
128
+ "transformers_version": "4.51.3"
129
+ }
checkpoint-5640/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4745e3fdcd455e985f55ae003c88c87d197975d2307ad5dda833f8c30809d0
3
+ size 343371632
checkpoint-5640/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3310bf09978d639f503bac2f5400e0837b4ed61f1f3db0b2565bafa6ba106db1
3
+ size 686864186
checkpoint-5640/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-5640/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5591368b5e645f6f84c8526df6dfd0c8c2904179c960a8a757d795a3b9c9edef
3
+ size 14244
checkpoint-5640/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a6de07b073daad8f68678fd6f5f692094c61f764b1579a3cc1b60ed9aa9687
3
+ size 1064
checkpoint-5640/trainer_state.json ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 5640,
3
+ "best_metric": 2.5463945865631104,
4
+ "best_model_checkpoint": "popular_sneakers_detection/checkpoint-5640",
5
+ "epoch": 40.0,
6
+ "eval_steps": 500,
7
+ "global_step": 5640,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.042333333333333334,
15
+ "eval_loss": 3.8944478034973145,
16
+ "eval_model_preparation_time": 0.0041,
17
+ "eval_runtime": 27.4259,
18
+ "eval_samples_per_second": 109.386,
19
+ "eval_steps_per_second": 13.673,
20
+ "step": 141
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.06366666666666666,
25
+ "eval_loss": 3.8702337741851807,
26
+ "eval_model_preparation_time": 0.0041,
27
+ "eval_runtime": 26.7233,
28
+ "eval_samples_per_second": 112.261,
29
+ "eval_steps_per_second": 14.033,
30
+ "step": 282
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_accuracy": 0.094,
35
+ "eval_loss": 3.8375682830810547,
36
+ "eval_model_preparation_time": 0.0041,
37
+ "eval_runtime": 27.5229,
38
+ "eval_samples_per_second": 109.0,
39
+ "eval_steps_per_second": 13.625,
40
+ "step": 423
41
+ },
42
+ {
43
+ "epoch": 3.546099290780142,
44
+ "grad_norm": 1.5614980459213257,
45
+ "learning_rate": 4.598389982110913e-06,
46
+ "loss": 3.8547,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_accuracy": 0.13733333333333334,
52
+ "eval_loss": 3.79917573928833,
53
+ "eval_model_preparation_time": 0.0041,
54
+ "eval_runtime": 28.4527,
55
+ "eval_samples_per_second": 105.438,
56
+ "eval_steps_per_second": 13.18,
57
+ "step": 564
58
+ },
59
+ {
60
+ "epoch": 5.0,
61
+ "eval_accuracy": 0.19166666666666668,
62
+ "eval_loss": 3.7531919479370117,
63
+ "eval_model_preparation_time": 0.0041,
64
+ "eval_runtime": 27.2395,
65
+ "eval_samples_per_second": 110.134,
66
+ "eval_steps_per_second": 13.767,
67
+ "step": 705
68
+ },
69
+ {
70
+ "epoch": 6.0,
71
+ "eval_accuracy": 0.25533333333333336,
72
+ "eval_loss": 3.6991868019104004,
73
+ "eval_model_preparation_time": 0.0041,
74
+ "eval_runtime": 27.7403,
75
+ "eval_samples_per_second": 108.146,
76
+ "eval_steps_per_second": 13.518,
77
+ "step": 846
78
+ },
79
+ {
80
+ "epoch": 7.0,
81
+ "eval_accuracy": 0.32566666666666666,
82
+ "eval_loss": 3.6358957290649414,
83
+ "eval_model_preparation_time": 0.0041,
84
+ "eval_runtime": 27.4022,
85
+ "eval_samples_per_second": 109.48,
86
+ "eval_steps_per_second": 13.685,
87
+ "step": 987
88
+ },
89
+ {
90
+ "epoch": 7.092198581560283,
91
+ "grad_norm": 1.7323589324951172,
92
+ "learning_rate": 4.151162790697675e-06,
93
+ "loss": 3.6677,
94
+ "step": 1000
95
+ },
96
+ {
97
+ "epoch": 8.0,
98
+ "eval_accuracy": 0.391,
99
+ "eval_loss": 3.561831474304199,
100
+ "eval_model_preparation_time": 0.0041,
101
+ "eval_runtime": 28.1913,
102
+ "eval_samples_per_second": 106.416,
103
+ "eval_steps_per_second": 13.302,
104
+ "step": 1128
105
+ },
106
+ {
107
+ "epoch": 9.0,
108
+ "eval_accuracy": 0.42866666666666664,
109
+ "eval_loss": 3.492708444595337,
110
+ "eval_model_preparation_time": 0.0041,
111
+ "eval_runtime": 27.5068,
112
+ "eval_samples_per_second": 109.064,
113
+ "eval_steps_per_second": 13.633,
114
+ "step": 1269
115
+ },
116
+ {
117
+ "epoch": 10.0,
118
+ "eval_accuracy": 0.4796666666666667,
119
+ "eval_loss": 3.4180657863616943,
120
+ "eval_model_preparation_time": 0.0041,
121
+ "eval_runtime": 27.5222,
122
+ "eval_samples_per_second": 109.003,
123
+ "eval_steps_per_second": 13.625,
124
+ "step": 1410
125
+ },
126
+ {
127
+ "epoch": 10.638297872340425,
128
+ "grad_norm": 1.7677080631256104,
129
+ "learning_rate": 3.7039355992844365e-06,
130
+ "loss": 3.4053,
131
+ "step": 1500
132
+ },
133
+ {
134
+ "epoch": 11.0,
135
+ "eval_accuracy": 0.49633333333333335,
136
+ "eval_loss": 3.353686571121216,
137
+ "eval_model_preparation_time": 0.0041,
138
+ "eval_runtime": 27.4731,
139
+ "eval_samples_per_second": 109.198,
140
+ "eval_steps_per_second": 13.65,
141
+ "step": 1551
142
+ },
143
+ {
144
+ "epoch": 12.0,
145
+ "eval_accuracy": 0.5183333333333333,
146
+ "eval_loss": 3.29174542427063,
147
+ "eval_model_preparation_time": 0.0041,
148
+ "eval_runtime": 27.3958,
149
+ "eval_samples_per_second": 109.506,
150
+ "eval_steps_per_second": 13.688,
151
+ "step": 1692
152
+ },
153
+ {
154
+ "epoch": 13.0,
155
+ "eval_accuracy": 0.5416666666666666,
156
+ "eval_loss": 3.230329990386963,
157
+ "eval_model_preparation_time": 0.0041,
158
+ "eval_runtime": 27.2445,
159
+ "eval_samples_per_second": 110.114,
160
+ "eval_steps_per_second": 13.764,
161
+ "step": 1833
162
+ },
163
+ {
164
+ "epoch": 14.0,
165
+ "eval_accuracy": 0.5566666666666666,
166
+ "eval_loss": 3.1734297275543213,
167
+ "eval_model_preparation_time": 0.0041,
168
+ "eval_runtime": 27.0335,
169
+ "eval_samples_per_second": 110.973,
170
+ "eval_steps_per_second": 13.872,
171
+ "step": 1974
172
+ },
173
+ {
174
+ "epoch": 14.184397163120567,
175
+ "grad_norm": 1.914097785949707,
176
+ "learning_rate": 3.2567084078711986e-06,
177
+ "loss": 3.1467,
178
+ "step": 2000
179
+ },
180
+ {
181
+ "epoch": 15.0,
182
+ "eval_accuracy": 0.5753333333333334,
183
+ "eval_loss": 3.1160686016082764,
184
+ "eval_model_preparation_time": 0.0041,
185
+ "eval_runtime": 26.9914,
186
+ "eval_samples_per_second": 111.146,
187
+ "eval_steps_per_second": 13.893,
188
+ "step": 2115
189
+ },
190
+ {
191
+ "epoch": 16.0,
192
+ "eval_accuracy": 0.5836666666666667,
193
+ "eval_loss": 3.0692453384399414,
194
+ "eval_model_preparation_time": 0.0041,
195
+ "eval_runtime": 27.4615,
196
+ "eval_samples_per_second": 109.244,
197
+ "eval_steps_per_second": 13.655,
198
+ "step": 2256
199
+ },
200
+ {
201
+ "epoch": 17.0,
202
+ "eval_accuracy": 0.602,
203
+ "eval_loss": 3.0239269733428955,
204
+ "eval_model_preparation_time": 0.0041,
205
+ "eval_runtime": 27.511,
206
+ "eval_samples_per_second": 109.047,
207
+ "eval_steps_per_second": 13.631,
208
+ "step": 2397
209
+ },
210
+ {
211
+ "epoch": 17.73049645390071,
212
+ "grad_norm": 1.9646587371826172,
213
+ "learning_rate": 2.809481216457961e-06,
214
+ "loss": 2.9364,
215
+ "step": 2500
216
+ },
217
+ {
218
+ "epoch": 18.0,
219
+ "eval_accuracy": 0.604,
220
+ "eval_loss": 2.983224868774414,
221
+ "eval_model_preparation_time": 0.0041,
222
+ "eval_runtime": 27.5742,
223
+ "eval_samples_per_second": 108.797,
224
+ "eval_steps_per_second": 13.6,
225
+ "step": 2538
226
+ },
227
+ {
228
+ "epoch": 19.0,
229
+ "eval_accuracy": 0.6146666666666667,
230
+ "eval_loss": 2.9459805488586426,
231
+ "eval_model_preparation_time": 0.0041,
232
+ "eval_runtime": 27.3324,
233
+ "eval_samples_per_second": 109.76,
234
+ "eval_steps_per_second": 13.72,
235
+ "step": 2679
236
+ },
237
+ {
238
+ "epoch": 20.0,
239
+ "eval_accuracy": 0.6283333333333333,
240
+ "eval_loss": 2.9052772521972656,
241
+ "eval_model_preparation_time": 0.0041,
242
+ "eval_runtime": 27.2251,
243
+ "eval_samples_per_second": 110.192,
244
+ "eval_steps_per_second": 13.774,
245
+ "step": 2820
246
+ },
247
+ {
248
+ "epoch": 21.0,
249
+ "eval_accuracy": 0.6386666666666667,
250
+ "eval_loss": 2.8673319816589355,
251
+ "eval_model_preparation_time": 0.0041,
252
+ "eval_runtime": 27.5302,
253
+ "eval_samples_per_second": 108.971,
254
+ "eval_steps_per_second": 13.621,
255
+ "step": 2961
256
+ },
257
+ {
258
+ "epoch": 21.27659574468085,
259
+ "grad_norm": 2.0768418312072754,
260
+ "learning_rate": 2.3622540250447227e-06,
261
+ "loss": 2.7701,
262
+ "step": 3000
263
+ },
264
+ {
265
+ "epoch": 22.0,
266
+ "eval_accuracy": 0.642,
267
+ "eval_loss": 2.8295445442199707,
268
+ "eval_model_preparation_time": 0.0041,
269
+ "eval_runtime": 27.3684,
270
+ "eval_samples_per_second": 109.616,
271
+ "eval_steps_per_second": 13.702,
272
+ "step": 3102
273
+ },
274
+ {
275
+ "epoch": 23.0,
276
+ "eval_accuracy": 0.658,
277
+ "eval_loss": 2.7955031394958496,
278
+ "eval_model_preparation_time": 0.0041,
279
+ "eval_runtime": 27.6846,
280
+ "eval_samples_per_second": 108.363,
281
+ "eval_steps_per_second": 13.545,
282
+ "step": 3243
283
+ },
284
+ {
285
+ "epoch": 24.0,
286
+ "eval_accuracy": 0.6533333333333333,
287
+ "eval_loss": 2.7718265056610107,
288
+ "eval_model_preparation_time": 0.0041,
289
+ "eval_runtime": 28.3048,
290
+ "eval_samples_per_second": 105.989,
291
+ "eval_steps_per_second": 13.249,
292
+ "step": 3384
293
+ },
294
+ {
295
+ "epoch": 24.822695035460992,
296
+ "grad_norm": 2.5180611610412598,
297
+ "learning_rate": 1.915026833631485e-06,
298
+ "loss": 2.6383,
299
+ "step": 3500
300
+ },
301
+ {
302
+ "epoch": 25.0,
303
+ "eval_accuracy": 0.6656666666666666,
304
+ "eval_loss": 2.7361390590667725,
305
+ "eval_model_preparation_time": 0.0041,
306
+ "eval_runtime": 27.5955,
307
+ "eval_samples_per_second": 108.714,
308
+ "eval_steps_per_second": 13.589,
309
+ "step": 3525
310
+ },
311
+ {
312
+ "epoch": 26.0,
313
+ "eval_accuracy": 0.6693333333333333,
314
+ "eval_loss": 2.712496519088745,
315
+ "eval_model_preparation_time": 0.0041,
316
+ "eval_runtime": 27.5376,
317
+ "eval_samples_per_second": 108.942,
318
+ "eval_steps_per_second": 13.618,
319
+ "step": 3666
320
+ },
321
+ {
322
+ "epoch": 27.0,
323
+ "eval_accuracy": 0.6656666666666666,
324
+ "eval_loss": 2.695204257965088,
325
+ "eval_model_preparation_time": 0.0041,
326
+ "eval_runtime": 27.822,
327
+ "eval_samples_per_second": 107.828,
328
+ "eval_steps_per_second": 13.479,
329
+ "step": 3807
330
+ },
331
+ {
332
+ "epoch": 28.0,
333
+ "eval_accuracy": 0.6756666666666666,
334
+ "eval_loss": 2.6688482761383057,
335
+ "eval_model_preparation_time": 0.0041,
336
+ "eval_runtime": 27.4009,
337
+ "eval_samples_per_second": 109.485,
338
+ "eval_steps_per_second": 13.686,
339
+ "step": 3948
340
+ },
341
+ {
342
+ "epoch": 28.368794326241133,
343
+ "grad_norm": 2.0685389041900635,
344
+ "learning_rate": 1.4677996422182469e-06,
345
+ "loss": 2.5324,
346
+ "step": 4000
347
+ },
348
+ {
349
+ "epoch": 29.0,
350
+ "eval_accuracy": 0.6776666666666666,
351
+ "eval_loss": 2.651216745376587,
352
+ "eval_model_preparation_time": 0.0041,
353
+ "eval_runtime": 27.4184,
354
+ "eval_samples_per_second": 109.415,
355
+ "eval_steps_per_second": 13.677,
356
+ "step": 4089
357
+ },
358
+ {
359
+ "epoch": 30.0,
360
+ "eval_accuracy": 0.684,
361
+ "eval_loss": 2.631049633026123,
362
+ "eval_model_preparation_time": 0.0041,
363
+ "eval_runtime": 27.4391,
364
+ "eval_samples_per_second": 109.333,
365
+ "eval_steps_per_second": 13.667,
366
+ "step": 4230
367
+ },
368
+ {
369
+ "epoch": 31.0,
370
+ "eval_accuracy": 0.6876666666666666,
371
+ "eval_loss": 2.61474347114563,
372
+ "eval_model_preparation_time": 0.0041,
373
+ "eval_runtime": 27.3793,
374
+ "eval_samples_per_second": 109.572,
375
+ "eval_steps_per_second": 13.697,
376
+ "step": 4371
377
+ },
378
+ {
379
+ "epoch": 31.914893617021278,
380
+ "grad_norm": 2.6519553661346436,
381
+ "learning_rate": 1.020572450805009e-06,
382
+ "loss": 2.4528,
383
+ "step": 4500
384
+ },
385
+ {
386
+ "epoch": 32.0,
387
+ "eval_accuracy": 0.6866666666666666,
388
+ "eval_loss": 2.5972399711608887,
389
+ "eval_model_preparation_time": 0.0041,
390
+ "eval_runtime": 26.9694,
391
+ "eval_samples_per_second": 111.237,
392
+ "eval_steps_per_second": 13.905,
393
+ "step": 4512
394
+ },
395
+ {
396
+ "epoch": 33.0,
397
+ "eval_accuracy": 0.6883333333333334,
398
+ "eval_loss": 2.586773157119751,
399
+ "eval_model_preparation_time": 0.0041,
400
+ "eval_runtime": 27.608,
401
+ "eval_samples_per_second": 108.664,
402
+ "eval_steps_per_second": 13.583,
403
+ "step": 4653
404
+ },
405
+ {
406
+ "epoch": 34.0,
407
+ "eval_accuracy": 0.6876666666666666,
408
+ "eval_loss": 2.576591730117798,
409
+ "eval_model_preparation_time": 0.0041,
410
+ "eval_runtime": 27.5729,
411
+ "eval_samples_per_second": 108.802,
412
+ "eval_steps_per_second": 13.6,
413
+ "step": 4794
414
+ },
415
+ {
416
+ "epoch": 35.0,
417
+ "eval_accuracy": 0.6926666666666667,
418
+ "eval_loss": 2.569082736968994,
419
+ "eval_model_preparation_time": 0.0041,
420
+ "eval_runtime": 27.9391,
421
+ "eval_samples_per_second": 107.376,
422
+ "eval_steps_per_second": 13.422,
423
+ "step": 4935
424
+ },
425
+ {
426
+ "epoch": 35.46099290780142,
427
+ "grad_norm": 2.5418479442596436,
428
+ "learning_rate": 5.733452593917711e-07,
429
+ "loss": 2.4022,
430
+ "step": 5000
431
+ },
432
+ {
433
+ "epoch": 36.0,
434
+ "eval_accuracy": 0.693,
435
+ "eval_loss": 2.5580801963806152,
436
+ "eval_model_preparation_time": 0.0041,
437
+ "eval_runtime": 27.1847,
438
+ "eval_samples_per_second": 110.356,
439
+ "eval_steps_per_second": 13.795,
440
+ "step": 5076
441
+ },
442
+ {
443
+ "epoch": 37.0,
444
+ "eval_accuracy": 0.694,
445
+ "eval_loss": 2.5533597469329834,
446
+ "eval_model_preparation_time": 0.0041,
447
+ "eval_runtime": 27.3678,
448
+ "eval_samples_per_second": 109.618,
449
+ "eval_steps_per_second": 13.702,
450
+ "step": 5217
451
+ },
452
+ {
453
+ "epoch": 38.0,
454
+ "eval_accuracy": 0.695,
455
+ "eval_loss": 2.5490283966064453,
456
+ "eval_model_preparation_time": 0.0041,
457
+ "eval_runtime": 27.0235,
458
+ "eval_samples_per_second": 111.014,
459
+ "eval_steps_per_second": 13.877,
460
+ "step": 5358
461
+ },
462
+ {
463
+ "epoch": 39.0,
464
+ "eval_accuracy": 0.694,
465
+ "eval_loss": 2.547304153442383,
466
+ "eval_model_preparation_time": 0.0041,
467
+ "eval_runtime": 27.9193,
468
+ "eval_samples_per_second": 107.453,
469
+ "eval_steps_per_second": 13.432,
470
+ "step": 5499
471
+ },
472
+ {
473
+ "epoch": 39.00709219858156,
474
+ "grad_norm": 2.5994229316711426,
475
+ "learning_rate": 1.2611806797853311e-07,
476
+ "loss": 2.3657,
477
+ "step": 5500
478
+ },
479
+ {
480
+ "epoch": 40.0,
481
+ "eval_accuracy": 0.6943333333333334,
482
+ "eval_loss": 2.5463945865631104,
483
+ "eval_model_preparation_time": 0.0041,
484
+ "eval_runtime": 27.6641,
485
+ "eval_samples_per_second": 108.444,
486
+ "eval_steps_per_second": 13.555,
487
+ "step": 5640
488
+ }
489
+ ],
490
+ "logging_steps": 500,
491
+ "max_steps": 5640,
492
+ "num_input_tokens_seen": 0,
493
+ "num_train_epochs": 40,
494
+ "save_steps": 500,
495
+ "stateful_callbacks": {
496
+ "TrainerControl": {
497
+ "args": {
498
+ "should_epoch_stop": false,
499
+ "should_evaluate": false,
500
+ "should_log": false,
501
+ "should_save": true,
502
+ "should_training_stop": true
503
+ },
504
+ "attributes": {}
505
+ }
506
+ },
507
+ "total_flos": 1.395455892332544e+19,
508
+ "train_batch_size": 32,
509
+ "trial_name": null,
510
+ "trial_params": null
511
+ }
checkpoint-5640/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85b11601d829e022858ca77f9c10bbf712a1c51fb1d678c6dc467ebd3fa2edc
3
+ size 5240
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6de4695b0e2563d48d7141b39971a00a648200228a269df5318d12f65ffd7785
3
  size 343371632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4745e3fdcd455e985f55ae003c88c87d197975d2307ad5dda833f8c30809d0
3
  size 343371632
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12094f0fa6ae3c859ec5ffdcf98aea4889e656117cbc55d1c2abe137dce288c9
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85b11601d829e022858ca77f9c10bbf712a1c51fb1d678c6dc467ebd3fa2edc
3
  size 5240