YaswanthReddy23 commited on
Commit
db7e301
·
verified ·
1 Parent(s): a7d8da4

Uploaded trained model

Browse files
all_results.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.9742063492063492,
3
+ "eval_loss": 0.10980728268623352
4
+ }
checkpoint-400/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Healthy",
13
+ "1": "Mosaic",
14
+ "2": "RedRot",
15
+ "3": "Rust",
16
+ "4": "Yellow"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "Healthy": "0",
23
+ "Mosaic": "1",
24
+ "RedRot": "2",
25
+ "Rust": "3",
26
+ "Yellow": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.47.1"
38
+ }
checkpoint-400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93e9c2cc6195e7885db451b6aa506def9d9298f9f7904ec4830f50b89e002c0
3
+ size 343233204
checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30420ae379c7bc0e40d2e609aa37b656e0084222d1241aa0d04ff8569a370e92
3
+ size 686581690
checkpoint-400/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2abbcbec909b5db88aa9231df83b1a7c42cea56d0dc2d1f4637d003e30a90e7f
3
+ size 13990
checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0f59af94580b17eaa80224b6dbbfcb5ac4881b94fba5e5b1c75dfb13b6e720f
3
+ size 1064
checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.10980728268623352,
3
+ "best_model_checkpoint": "./vit-sugarcane-leaf/checkpoint-400",
4
+ "epoch": 3.6036036036036037,
5
+ "eval_steps": 100,
6
+ "global_step": 400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09009009009009009,
13
+ "grad_norm": 1.9724489450454712,
14
+ "learning_rate": 0.0001954954954954955,
15
+ "loss": 1.4657,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.18018018018018017,
20
+ "grad_norm": 2.1435556411743164,
21
+ "learning_rate": 0.000190990990990991,
22
+ "loss": 0.9696,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.2702702702702703,
27
+ "grad_norm": 4.765104293823242,
28
+ "learning_rate": 0.0001864864864864865,
29
+ "loss": 0.5248,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.36036036036036034,
34
+ "grad_norm": 4.9860734939575195,
35
+ "learning_rate": 0.000181981981981982,
36
+ "loss": 0.5041,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.45045045045045046,
41
+ "grad_norm": 2.0347421169281006,
42
+ "learning_rate": 0.0001774774774774775,
43
+ "loss": 0.4714,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.5405405405405406,
48
+ "grad_norm": 1.83969247341156,
49
+ "learning_rate": 0.000172972972972973,
50
+ "loss": 0.3427,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.6306306306306306,
55
+ "grad_norm": 3.7638847827911377,
56
+ "learning_rate": 0.00016846846846846846,
57
+ "loss": 0.3153,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.7207207207207207,
62
+ "grad_norm": 4.0417351722717285,
63
+ "learning_rate": 0.00016396396396396395,
64
+ "loss": 0.3075,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.8108108108108109,
69
+ "grad_norm": 1.5025771856307983,
70
+ "learning_rate": 0.00015945945945945947,
71
+ "loss": 0.1659,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.9009009009009009,
76
+ "grad_norm": 4.198694229125977,
77
+ "learning_rate": 0.00015495495495495496,
78
+ "loss": 0.1933,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.9009009009009009,
83
+ "eval_accuracy": 0.9464285714285714,
84
+ "eval_loss": 0.1887584924697876,
85
+ "eval_runtime": 307.7246,
86
+ "eval_samples_per_second": 1.638,
87
+ "eval_steps_per_second": 0.205,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.990990990990991,
92
+ "grad_norm": 1.807138442993164,
93
+ "learning_rate": 0.00015045045045045046,
94
+ "loss": 0.1295,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 1.0810810810810811,
99
+ "grad_norm": 0.8208228349685669,
100
+ "learning_rate": 0.00014594594594594595,
101
+ "loss": 0.1241,
102
+ "step": 120
103
+ },
104
+ {
105
+ "epoch": 1.1711711711711712,
106
+ "grad_norm": 0.37107059359550476,
107
+ "learning_rate": 0.00014144144144144144,
108
+ "loss": 0.1543,
109
+ "step": 130
110
+ },
111
+ {
112
+ "epoch": 1.2612612612612613,
113
+ "grad_norm": 3.343444585800171,
114
+ "learning_rate": 0.00013693693693693693,
115
+ "loss": 0.1222,
116
+ "step": 140
117
+ },
118
+ {
119
+ "epoch": 1.3513513513513513,
120
+ "grad_norm": 0.42869412899017334,
121
+ "learning_rate": 0.00013243243243243243,
122
+ "loss": 0.1076,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 1.4414414414414414,
127
+ "grad_norm": 0.1316378265619278,
128
+ "learning_rate": 0.00012792792792792795,
129
+ "loss": 0.1289,
130
+ "step": 160
131
+ },
132
+ {
133
+ "epoch": 1.5315315315315314,
134
+ "grad_norm": 0.11681246012449265,
135
+ "learning_rate": 0.00012342342342342344,
136
+ "loss": 0.1037,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.6216216216216215,
141
+ "grad_norm": 4.53013801574707,
142
+ "learning_rate": 0.00011891891891891893,
143
+ "loss": 0.0999,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.7117117117117115,
148
+ "grad_norm": 2.964167594909668,
149
+ "learning_rate": 0.00011441441441441443,
150
+ "loss": 0.2104,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.8018018018018018,
155
+ "grad_norm": 0.1299811601638794,
156
+ "learning_rate": 0.00010990990990990993,
157
+ "loss": 0.0794,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.8018018018018018,
162
+ "eval_accuracy": 0.9623015873015873,
163
+ "eval_loss": 0.17631131410598755,
164
+ "eval_runtime": 312.6911,
165
+ "eval_samples_per_second": 1.612,
166
+ "eval_steps_per_second": 0.201,
167
+ "step": 200
168
+ },
169
+ {
170
+ "epoch": 1.8918918918918919,
171
+ "grad_norm": 4.118229866027832,
172
+ "learning_rate": 0.0001054054054054054,
173
+ "loss": 0.1379,
174
+ "step": 210
175
+ },
176
+ {
177
+ "epoch": 1.981981981981982,
178
+ "grad_norm": 0.08945044875144958,
179
+ "learning_rate": 0.00010090090090090089,
180
+ "loss": 0.1375,
181
+ "step": 220
182
+ },
183
+ {
184
+ "epoch": 2.0720720720720722,
185
+ "grad_norm": 0.09754334390163422,
186
+ "learning_rate": 9.639639639639641e-05,
187
+ "loss": 0.0309,
188
+ "step": 230
189
+ },
190
+ {
191
+ "epoch": 2.1621621621621623,
192
+ "grad_norm": 0.08668765425682068,
193
+ "learning_rate": 9.18918918918919e-05,
194
+ "loss": 0.0473,
195
+ "step": 240
196
+ },
197
+ {
198
+ "epoch": 2.2522522522522523,
199
+ "grad_norm": 1.020466923713684,
200
+ "learning_rate": 8.738738738738738e-05,
201
+ "loss": 0.0479,
202
+ "step": 250
203
+ },
204
+ {
205
+ "epoch": 2.3423423423423424,
206
+ "grad_norm": 4.378125190734863,
207
+ "learning_rate": 8.288288288288289e-05,
208
+ "loss": 0.0266,
209
+ "step": 260
210
+ },
211
+ {
212
+ "epoch": 2.4324324324324325,
213
+ "grad_norm": 0.8731359243392944,
214
+ "learning_rate": 7.837837837837838e-05,
215
+ "loss": 0.0555,
216
+ "step": 270
217
+ },
218
+ {
219
+ "epoch": 2.5225225225225225,
220
+ "grad_norm": 0.1169867143034935,
221
+ "learning_rate": 7.387387387387387e-05,
222
+ "loss": 0.0908,
223
+ "step": 280
224
+ },
225
+ {
226
+ "epoch": 2.6126126126126126,
227
+ "grad_norm": 0.08126968890428543,
228
+ "learning_rate": 6.936936936936938e-05,
229
+ "loss": 0.1091,
230
+ "step": 290
231
+ },
232
+ {
233
+ "epoch": 2.7027027027027026,
234
+ "grad_norm": 0.06706036627292633,
235
+ "learning_rate": 6.486486486486487e-05,
236
+ "loss": 0.0773,
237
+ "step": 300
238
+ },
239
+ {
240
+ "epoch": 2.7027027027027026,
241
+ "eval_accuracy": 0.9603174603174603,
242
+ "eval_loss": 0.1754508912563324,
243
+ "eval_runtime": 310.4039,
244
+ "eval_samples_per_second": 1.624,
245
+ "eval_steps_per_second": 0.203,
246
+ "step": 300
247
+ },
248
+ {
249
+ "epoch": 2.7927927927927927,
250
+ "grad_norm": 5.0717315673828125,
251
+ "learning_rate": 6.0360360360360365e-05,
252
+ "loss": 0.0989,
253
+ "step": 310
254
+ },
255
+ {
256
+ "epoch": 2.8828828828828827,
257
+ "grad_norm": 3.2063348293304443,
258
+ "learning_rate": 5.585585585585585e-05,
259
+ "loss": 0.0418,
260
+ "step": 320
261
+ },
262
+ {
263
+ "epoch": 2.972972972972973,
264
+ "grad_norm": 0.19697409868240356,
265
+ "learning_rate": 5.135135135135135e-05,
266
+ "loss": 0.0192,
267
+ "step": 330
268
+ },
269
+ {
270
+ "epoch": 3.063063063063063,
271
+ "grad_norm": 0.07727790623903275,
272
+ "learning_rate": 4.684684684684685e-05,
273
+ "loss": 0.0468,
274
+ "step": 340
275
+ },
276
+ {
277
+ "epoch": 3.153153153153153,
278
+ "grad_norm": 0.06686805188655853,
279
+ "learning_rate": 4.234234234234234e-05,
280
+ "loss": 0.0359,
281
+ "step": 350
282
+ },
283
+ {
284
+ "epoch": 3.2432432432432434,
285
+ "grad_norm": 0.055528514087200165,
286
+ "learning_rate": 3.783783783783784e-05,
287
+ "loss": 0.0566,
288
+ "step": 360
289
+ },
290
+ {
291
+ "epoch": 3.3333333333333335,
292
+ "grad_norm": 0.05775384604930878,
293
+ "learning_rate": 3.3333333333333335e-05,
294
+ "loss": 0.0357,
295
+ "step": 370
296
+ },
297
+ {
298
+ "epoch": 3.4234234234234235,
299
+ "grad_norm": 0.058484651148319244,
300
+ "learning_rate": 2.882882882882883e-05,
301
+ "loss": 0.017,
302
+ "step": 380
303
+ },
304
+ {
305
+ "epoch": 3.5135135135135136,
306
+ "grad_norm": 0.052604686468839645,
307
+ "learning_rate": 2.4324324324324327e-05,
308
+ "loss": 0.0147,
309
+ "step": 390
310
+ },
311
+ {
312
+ "epoch": 3.6036036036036037,
313
+ "grad_norm": 0.055011238902807236,
314
+ "learning_rate": 1.981981981981982e-05,
315
+ "loss": 0.0144,
316
+ "step": 400
317
+ },
318
+ {
319
+ "epoch": 3.6036036036036037,
320
+ "eval_accuracy": 0.9742063492063492,
321
+ "eval_loss": 0.10980728268623352,
322
+ "eval_runtime": 310.9729,
323
+ "eval_samples_per_second": 1.621,
324
+ "eval_steps_per_second": 0.203,
325
+ "step": 400
326
+ }
327
+ ],
328
+ "logging_steps": 10,
329
+ "max_steps": 444,
330
+ "num_input_tokens_seen": 0,
331
+ "num_train_epochs": 4,
332
+ "save_steps": 100,
333
+ "stateful_callbacks": {
334
+ "TrainerControl": {
335
+ "args": {
336
+ "should_epoch_stop": false,
337
+ "should_evaluate": false,
338
+ "should_log": false,
339
+ "should_save": true,
340
+ "should_training_stop": false
341
+ },
342
+ "attributes": {}
343
+ }
344
+ },
345
+ "total_flos": 4.9270731755290214e+17,
346
+ "train_batch_size": 16,
347
+ "trial_name": null,
348
+ "trial_params": null
349
+ }
checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6838184368dc2ec5d278560cab1221860da265467950db67f760dc0dedb2b22b
3
+ size 5304
checkpoint-444/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Healthy",
13
+ "1": "Mosaic",
14
+ "2": "RedRot",
15
+ "3": "Rust",
16
+ "4": "Yellow"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "Healthy": "0",
23
+ "Mosaic": "1",
24
+ "RedRot": "2",
25
+ "Rust": "3",
26
+ "Yellow": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.47.1"
38
+ }
checkpoint-444/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd8dce2e895f0a0119f10e7b3064a9f330e7147ecf5623922cfb81058b9b3fdb
3
+ size 343233204
checkpoint-444/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bf55c27e29cb14d443926255fdf771e3e5dc952012fc2da2b45670933a1ab36
3
+ size 686581690
checkpoint-444/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-444/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2abbcbec909b5db88aa9231df83b1a7c42cea56d0dc2d1f4637d003e30a90e7f
3
+ size 13990
checkpoint-444/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1805ba1e1c502c52489cdfe3ea0610e2f03c76421ceb42db92238f0b9237789
3
+ size 1064
checkpoint-444/trainer_state.json ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.10980728268623352,
3
+ "best_model_checkpoint": "./vit-sugarcane-leaf/checkpoint-400",
4
+ "epoch": 4.0,
5
+ "eval_steps": 100,
6
+ "global_step": 444,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09009009009009009,
13
+ "grad_norm": 1.9724489450454712,
14
+ "learning_rate": 0.0001954954954954955,
15
+ "loss": 1.4657,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.18018018018018017,
20
+ "grad_norm": 2.1435556411743164,
21
+ "learning_rate": 0.000190990990990991,
22
+ "loss": 0.9696,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.2702702702702703,
27
+ "grad_norm": 4.765104293823242,
28
+ "learning_rate": 0.0001864864864864865,
29
+ "loss": 0.5248,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.36036036036036034,
34
+ "grad_norm": 4.9860734939575195,
35
+ "learning_rate": 0.000181981981981982,
36
+ "loss": 0.5041,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.45045045045045046,
41
+ "grad_norm": 2.0347421169281006,
42
+ "learning_rate": 0.0001774774774774775,
43
+ "loss": 0.4714,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.5405405405405406,
48
+ "grad_norm": 1.83969247341156,
49
+ "learning_rate": 0.000172972972972973,
50
+ "loss": 0.3427,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.6306306306306306,
55
+ "grad_norm": 3.7638847827911377,
56
+ "learning_rate": 0.00016846846846846846,
57
+ "loss": 0.3153,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.7207207207207207,
62
+ "grad_norm": 4.0417351722717285,
63
+ "learning_rate": 0.00016396396396396395,
64
+ "loss": 0.3075,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.8108108108108109,
69
+ "grad_norm": 1.5025771856307983,
70
+ "learning_rate": 0.00015945945945945947,
71
+ "loss": 0.1659,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.9009009009009009,
76
+ "grad_norm": 4.198694229125977,
77
+ "learning_rate": 0.00015495495495495496,
78
+ "loss": 0.1933,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.9009009009009009,
83
+ "eval_accuracy": 0.9464285714285714,
84
+ "eval_loss": 0.1887584924697876,
85
+ "eval_runtime": 307.7246,
86
+ "eval_samples_per_second": 1.638,
87
+ "eval_steps_per_second": 0.205,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.990990990990991,
92
+ "grad_norm": 1.807138442993164,
93
+ "learning_rate": 0.00015045045045045046,
94
+ "loss": 0.1295,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 1.0810810810810811,
99
+ "grad_norm": 0.8208228349685669,
100
+ "learning_rate": 0.00014594594594594595,
101
+ "loss": 0.1241,
102
+ "step": 120
103
+ },
104
+ {
105
+ "epoch": 1.1711711711711712,
106
+ "grad_norm": 0.37107059359550476,
107
+ "learning_rate": 0.00014144144144144144,
108
+ "loss": 0.1543,
109
+ "step": 130
110
+ },
111
+ {
112
+ "epoch": 1.2612612612612613,
113
+ "grad_norm": 3.343444585800171,
114
+ "learning_rate": 0.00013693693693693693,
115
+ "loss": 0.1222,
116
+ "step": 140
117
+ },
118
+ {
119
+ "epoch": 1.3513513513513513,
120
+ "grad_norm": 0.42869412899017334,
121
+ "learning_rate": 0.00013243243243243243,
122
+ "loss": 0.1076,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 1.4414414414414414,
127
+ "grad_norm": 0.1316378265619278,
128
+ "learning_rate": 0.00012792792792792795,
129
+ "loss": 0.1289,
130
+ "step": 160
131
+ },
132
+ {
133
+ "epoch": 1.5315315315315314,
134
+ "grad_norm": 0.11681246012449265,
135
+ "learning_rate": 0.00012342342342342344,
136
+ "loss": 0.1037,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.6216216216216215,
141
+ "grad_norm": 4.53013801574707,
142
+ "learning_rate": 0.00011891891891891893,
143
+ "loss": 0.0999,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.7117117117117115,
148
+ "grad_norm": 2.964167594909668,
149
+ "learning_rate": 0.00011441441441441443,
150
+ "loss": 0.2104,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.8018018018018018,
155
+ "grad_norm": 0.1299811601638794,
156
+ "learning_rate": 0.00010990990990990993,
157
+ "loss": 0.0794,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.8018018018018018,
162
+ "eval_accuracy": 0.9623015873015873,
163
+ "eval_loss": 0.17631131410598755,
164
+ "eval_runtime": 312.6911,
165
+ "eval_samples_per_second": 1.612,
166
+ "eval_steps_per_second": 0.201,
167
+ "step": 200
168
+ },
169
+ {
170
+ "epoch": 1.8918918918918919,
171
+ "grad_norm": 4.118229866027832,
172
+ "learning_rate": 0.0001054054054054054,
173
+ "loss": 0.1379,
174
+ "step": 210
175
+ },
176
+ {
177
+ "epoch": 1.981981981981982,
178
+ "grad_norm": 0.08945044875144958,
179
+ "learning_rate": 0.00010090090090090089,
180
+ "loss": 0.1375,
181
+ "step": 220
182
+ },
183
+ {
184
+ "epoch": 2.0720720720720722,
185
+ "grad_norm": 0.09754334390163422,
186
+ "learning_rate": 9.639639639639641e-05,
187
+ "loss": 0.0309,
188
+ "step": 230
189
+ },
190
+ {
191
+ "epoch": 2.1621621621621623,
192
+ "grad_norm": 0.08668765425682068,
193
+ "learning_rate": 9.18918918918919e-05,
194
+ "loss": 0.0473,
195
+ "step": 240
196
+ },
197
+ {
198
+ "epoch": 2.2522522522522523,
199
+ "grad_norm": 1.020466923713684,
200
+ "learning_rate": 8.738738738738738e-05,
201
+ "loss": 0.0479,
202
+ "step": 250
203
+ },
204
+ {
205
+ "epoch": 2.3423423423423424,
206
+ "grad_norm": 4.378125190734863,
207
+ "learning_rate": 8.288288288288289e-05,
208
+ "loss": 0.0266,
209
+ "step": 260
210
+ },
211
+ {
212
+ "epoch": 2.4324324324324325,
213
+ "grad_norm": 0.8731359243392944,
214
+ "learning_rate": 7.837837837837838e-05,
215
+ "loss": 0.0555,
216
+ "step": 270
217
+ },
218
+ {
219
+ "epoch": 2.5225225225225225,
220
+ "grad_norm": 0.1169867143034935,
221
+ "learning_rate": 7.387387387387387e-05,
222
+ "loss": 0.0908,
223
+ "step": 280
224
+ },
225
+ {
226
+ "epoch": 2.6126126126126126,
227
+ "grad_norm": 0.08126968890428543,
228
+ "learning_rate": 6.936936936936938e-05,
229
+ "loss": 0.1091,
230
+ "step": 290
231
+ },
232
+ {
233
+ "epoch": 2.7027027027027026,
234
+ "grad_norm": 0.06706036627292633,
235
+ "learning_rate": 6.486486486486487e-05,
236
+ "loss": 0.0773,
237
+ "step": 300
238
+ },
239
+ {
240
+ "epoch": 2.7027027027027026,
241
+ "eval_accuracy": 0.9603174603174603,
242
+ "eval_loss": 0.1754508912563324,
243
+ "eval_runtime": 310.4039,
244
+ "eval_samples_per_second": 1.624,
245
+ "eval_steps_per_second": 0.203,
246
+ "step": 300
247
+ },
248
+ {
249
+ "epoch": 2.7927927927927927,
250
+ "grad_norm": 5.0717315673828125,
251
+ "learning_rate": 6.0360360360360365e-05,
252
+ "loss": 0.0989,
253
+ "step": 310
254
+ },
255
+ {
256
+ "epoch": 2.8828828828828827,
257
+ "grad_norm": 3.2063348293304443,
258
+ "learning_rate": 5.585585585585585e-05,
259
+ "loss": 0.0418,
260
+ "step": 320
261
+ },
262
+ {
263
+ "epoch": 2.972972972972973,
264
+ "grad_norm": 0.19697409868240356,
265
+ "learning_rate": 5.135135135135135e-05,
266
+ "loss": 0.0192,
267
+ "step": 330
268
+ },
269
+ {
270
+ "epoch": 3.063063063063063,
271
+ "grad_norm": 0.07727790623903275,
272
+ "learning_rate": 4.684684684684685e-05,
273
+ "loss": 0.0468,
274
+ "step": 340
275
+ },
276
+ {
277
+ "epoch": 3.153153153153153,
278
+ "grad_norm": 0.06686805188655853,
279
+ "learning_rate": 4.234234234234234e-05,
280
+ "loss": 0.0359,
281
+ "step": 350
282
+ },
283
+ {
284
+ "epoch": 3.2432432432432434,
285
+ "grad_norm": 0.055528514087200165,
286
+ "learning_rate": 3.783783783783784e-05,
287
+ "loss": 0.0566,
288
+ "step": 360
289
+ },
290
+ {
291
+ "epoch": 3.3333333333333335,
292
+ "grad_norm": 0.05775384604930878,
293
+ "learning_rate": 3.3333333333333335e-05,
294
+ "loss": 0.0357,
295
+ "step": 370
296
+ },
297
+ {
298
+ "epoch": 3.4234234234234235,
299
+ "grad_norm": 0.058484651148319244,
300
+ "learning_rate": 2.882882882882883e-05,
301
+ "loss": 0.017,
302
+ "step": 380
303
+ },
304
+ {
305
+ "epoch": 3.5135135135135136,
306
+ "grad_norm": 0.052604686468839645,
307
+ "learning_rate": 2.4324324324324327e-05,
308
+ "loss": 0.0147,
309
+ "step": 390
310
+ },
311
+ {
312
+ "epoch": 3.6036036036036037,
313
+ "grad_norm": 0.055011238902807236,
314
+ "learning_rate": 1.981981981981982e-05,
315
+ "loss": 0.0144,
316
+ "step": 400
317
+ },
318
+ {
319
+ "epoch": 3.6036036036036037,
320
+ "eval_accuracy": 0.9742063492063492,
321
+ "eval_loss": 0.10980728268623352,
322
+ "eval_runtime": 310.9729,
323
+ "eval_samples_per_second": 1.621,
324
+ "eval_steps_per_second": 0.203,
325
+ "step": 400
326
+ },
327
+ {
328
+ "epoch": 3.6936936936936937,
329
+ "grad_norm": 0.05112636834383011,
330
+ "learning_rate": 1.5315315315315316e-05,
331
+ "loss": 0.0147,
332
+ "step": 410
333
+ },
334
+ {
335
+ "epoch": 3.7837837837837838,
336
+ "grad_norm": 0.05180066451430321,
337
+ "learning_rate": 1.0810810810810812e-05,
338
+ "loss": 0.0137,
339
+ "step": 420
340
+ },
341
+ {
342
+ "epoch": 3.873873873873874,
343
+ "grad_norm": 0.05081380903720856,
344
+ "learning_rate": 6.306306306306306e-06,
345
+ "loss": 0.0135,
346
+ "step": 430
347
+ },
348
+ {
349
+ "epoch": 3.963963963963964,
350
+ "grad_norm": 0.0710749477148056,
351
+ "learning_rate": 1.801801801801802e-06,
352
+ "loss": 0.0138,
353
+ "step": 440
354
+ }
355
+ ],
356
+ "logging_steps": 10,
357
+ "max_steps": 444,
358
+ "num_input_tokens_seen": 0,
359
+ "num_train_epochs": 4,
360
+ "save_steps": 100,
361
+ "stateful_callbacks": {
362
+ "TrainerControl": {
363
+ "args": {
364
+ "should_epoch_stop": false,
365
+ "should_evaluate": false,
366
+ "should_log": false,
367
+ "should_save": true,
368
+ "should_training_stop": true
369
+ },
370
+ "attributes": {}
371
+ }
372
+ },
373
+ "total_flos": 5.4617822807688806e+17,
374
+ "train_batch_size": 16,
375
+ "trial_name": null,
376
+ "trial_params": null
377
+ }
checkpoint-444/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6838184368dc2ec5d278560cab1221860da265467950db67f760dc0dedb2b22b
3
+ size 5304
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Healthy",
13
+ "1": "Mosaic",
14
+ "2": "RedRot",
15
+ "3": "Rust",
16
+ "4": "Yellow"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "Healthy": "0",
23
+ "Mosaic": "1",
24
+ "RedRot": "2",
25
+ "Rust": "3",
26
+ "Yellow": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.47.1"
38
+ }
eval_results.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.9742063492063492,
3
+ "eval_loss": 0.10980728268623352
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93e9c2cc6195e7885db451b6aa506def9d9298f9f7904ec4830f50b89e002c0
3
+ size 343233204
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
runs/Jan26_06-44-07_fded3d64124b/events.out.tfevents.1737873865.fded3d64124b.1209.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e710723a2c62f56cd9f9204c0e657cbf3ccc331f7d48e140f67462f4809639b
3
+ size 7225
runs/Jan26_07-45-24_fded3d64124b/events.out.tfevents.1737877531.fded3d64124b.1209.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17af0119318fa50843b1d3e89c29e7986311cfb107742d46c0924aa60f6e11f0
3
+ size 16031
runs/Jan26_07-45-24_fded3d64124b/events.out.tfevents.1737892316.fded3d64124b.1209.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a289624f2a5aec3e4c0c3627ab9f1a7b855bddde4c2627e33b12405115527e8c
3
+ size 5514
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6838184368dc2ec5d278560cab1221860da265467950db67f760dc0dedb2b22b
3
+ size 5304