darklorddad commited on
Commit
92e9e01
·
verified ·
1 Parent(s): fd9dea2

Upload 13 files

Browse files
README.md CHANGED
@@ -1,3 +1,42 @@
 
1
  ---
2
- license: agpl-3.0
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  ---
3
+ tags:
4
+ - autotrain
5
+ - transformers
6
+ - image-classification
7
+ base_model: microsoft/swin-tiny-patch4-window7-224
8
+ widget:
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
10
+ example_title: Tiger
11
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
12
+ example_title: Teapot
13
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
14
+ example_title: Palace
15
  ---
16
+
17
+ # Model Trained Using AutoTrain
18
+
19
+ - Problem type: Image Classification
20
+
21
+ ## Validation Metrics
22
+ loss: 0.7968999147415161
23
+
24
+ f1_macro: 0.7757715160656338
25
+
26
+ f1_micro: 0.7844262295081967
27
+
28
+ f1_weighted: 0.779143908165123
29
+
30
+ precision_macro: 0.8028276290702762
31
+
32
+ precision_micro: 0.7844262295081967
33
+
34
+ precision_weighted: 0.8051517111678635
35
+
36
+ recall_macro: 0.7802916666666665
37
+
38
+ recall_micro: 0.7844262295081967
39
+
40
+ recall_weighted: 0.7844262295081967
41
+
42
+ accuracy: 0.7844262295081967
checkpoint-1275/config.json ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/swin-tiny-patch4-window7-224",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 6,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 96,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 768,
20
+ "id2label": {
21
+ "0": "acadian_flycatcher",
22
+ "1": "american_crow",
23
+ "2": "american_goldfinch",
24
+ "3": "american_pipit",
25
+ "4": "american_redstart",
26
+ "5": "american_three_toed_woodpecker",
27
+ "6": "anna_hummingbird",
28
+ "7": "artic_tern",
29
+ "8": "baird_sparrow",
30
+ "9": "baltimore_oriole",
31
+ "10": "bank_swallow",
32
+ "11": "barn_swallow",
33
+ "12": "bay_breasted_warbler",
34
+ "13": "belted_kingfisher",
35
+ "14": "bewick_wren",
36
+ "15": "black_and_white_warbler",
37
+ "16": "black_billed_cuckoo",
38
+ "17": "black_capped_vireo",
39
+ "18": "black_footed_albatross",
40
+ "19": "black_tern",
41
+ "20": "black_throated_blue_warbler",
42
+ "21": "black_throated_sparrow",
43
+ "22": "blue_grosbeak",
44
+ "23": "blue_headed_vireo",
45
+ "24": "blue_jay",
46
+ "25": "blue_winged_warbler",
47
+ "26": "boat_tailed_grackle",
48
+ "27": "bobolink",
49
+ "28": "bohemian_waxwing",
50
+ "29": "brandt_cormorant",
51
+ "30": "brewer_blackbird",
52
+ "31": "brewer_sparrow",
53
+ "32": "bronzed_cowbird",
54
+ "33": "brown_creeper",
55
+ "34": "brown_pelican",
56
+ "35": "brown_thrasher",
57
+ "36": "cactus_wren",
58
+ "37": "california_gull",
59
+ "38": "canada_warbler",
60
+ "39": "cape_glossy_starling",
61
+ "40": "cape_may_warbler",
62
+ "41": "cardinal",
63
+ "42": "carolina_wren",
64
+ "43": "caspian_tern",
65
+ "44": "cedar_waxwing",
66
+ "45": "cerulean_warbler",
67
+ "46": "chestnut_sided_warbler",
68
+ "47": "chipping_sparrow",
69
+ "48": "chuck_will_widow",
70
+ "49": "clark_nutcracker",
71
+ "50": "clay_colored_sparrow",
72
+ "51": "cliff_swallow",
73
+ "52": "common_raven",
74
+ "53": "common_tern",
75
+ "54": "common_yellowthroat",
76
+ "55": "crested_auklet",
77
+ "56": "dark_eyed_junco",
78
+ "57": "downy_woodpecker",
79
+ "58": "eared_grebe",
80
+ "59": "eastern_towhee",
81
+ "60": "elegant_tern",
82
+ "61": "european_goldfinch",
83
+ "62": "evening_grosbeak",
84
+ "63": "field_sparrow",
85
+ "64": "fish_crow",
86
+ "65": "florida_jay",
87
+ "66": "forsters_tern",
88
+ "67": "fox_sparrow",
89
+ "68": "frigatebird",
90
+ "69": "gadwall",
91
+ "70": "geococcyx",
92
+ "71": "glaucous_winged_gull",
93
+ "72": "golden_winged_warbler",
94
+ "73": "grasshopper_sparrow",
95
+ "74": "gray_catbird",
96
+ "75": "gray_crowned_rosy_finch",
97
+ "76": "gray_kingbird",
98
+ "77": "great_crested_flycatcher",
99
+ "78": "great_grey_shrike",
100
+ "79": "green_jay",
101
+ "80": "green_kingfisher",
102
+ "81": "green_tailed_towhee",
103
+ "82": "green_violetear",
104
+ "83": "groove_billed_ani",
105
+ "84": "harris_sparrow",
106
+ "85": "heermann_gull",
107
+ "86": "henslow_sparrow",
108
+ "87": "herring_gull",
109
+ "88": "hooded_merganser",
110
+ "89": "hooded_oriole",
111
+ "90": "hooded_warbler",
112
+ "91": "horned_grebe",
113
+ "92": "horned_lark",
114
+ "93": "horned_puffin",
115
+ "94": "house_sparrow",
116
+ "95": "house_wren",
117
+ "96": "indigo_bunting",
118
+ "97": "ivory_gull",
119
+ "98": "kentucky_warbler",
120
+ "99": "laysan_albatross",
121
+ "100": "lazuli_bunting",
122
+ "101": "le_conte_sparrow",
123
+ "102": "least_auklet",
124
+ "103": "least_flycatcher",
125
+ "104": "least_tern",
126
+ "105": "lincoln_sparrow",
127
+ "106": "loggerhead_shrike",
128
+ "107": "long_tailed_jaeger",
129
+ "108": "louisiana_waterthrush",
130
+ "109": "magnolia_warbler",
131
+ "110": "mallard",
132
+ "111": "mangrove_cuckoo",
133
+ "112": "marsh_wren",
134
+ "113": "mockingbird",
135
+ "114": "mourning_warbler",
136
+ "115": "myrtle_warbler",
137
+ "116": "nashville_warbler",
138
+ "117": "nelson_sharp_tailed_sparrow",
139
+ "118": "nighthawk",
140
+ "119": "northern_flicker",
141
+ "120": "northern_fulmar",
142
+ "121": "northern_waterthrush",
143
+ "122": "olive_sided_flycatcher",
144
+ "123": "orange_crowned_warbler",
145
+ "124": "orchard_oriole",
146
+ "125": "ovenbird",
147
+ "126": "pacific_loon",
148
+ "127": "painted_bunting",
149
+ "128": "palm_warbler",
150
+ "129": "parakeet_auklet",
151
+ "130": "pelagic_cormorant",
152
+ "131": "philadelphia_vireo",
153
+ "132": "pied_billed_grebe",
154
+ "133": "pied_kingfisher",
155
+ "134": "pigeon_guillemot",
156
+ "135": "pileated_woodpecker",
157
+ "136": "pine_grosbeak",
158
+ "137": "pine_warbler",
159
+ "138": "pomarine_jaeger",
160
+ "139": "prairie_warbler",
161
+ "140": "prothonotary_warbler",
162
+ "141": "purple_finch",
163
+ "142": "red_bellied_woodpecker",
164
+ "143": "red_breasted_merganser",
165
+ "144": "red_cockaded_woodpecker",
166
+ "145": "red_eyed_vireo",
167
+ "146": "red_faced_cormorant",
168
+ "147": "red_headed_woodpecker",
169
+ "148": "red_legged_kittiwake",
170
+ "149": "red_winged_blackbird",
171
+ "150": "rhinoceros_auklet",
172
+ "151": "ring_billed_gull",
173
+ "152": "ringed_kingfisher",
174
+ "153": "rock_wren",
175
+ "154": "rose_breasted_grosbeak",
176
+ "155": "ruby_throated_hummingbird",
177
+ "156": "rufous_hummingbird",
178
+ "157": "rusty_blackbird",
179
+ "158": "sage_thrasher",
180
+ "159": "savannah_sparrow",
181
+ "160": "sayornis",
182
+ "161": "scarlet_tanager",
183
+ "162": "scissor_tailed_flycatcher",
184
+ "163": "scott_oriole",
185
+ "164": "seaside_sparrow",
186
+ "165": "shiny_cowbird",
187
+ "166": "slaty_backed_gull",
188
+ "167": "song_sparrow",
189
+ "168": "sooty_albatross",
190
+ "169": "spotted_catbird",
191
+ "170": "summer_tanager",
192
+ "171": "swainson_warbler",
193
+ "172": "tennessee_warbler",
194
+ "173": "tree_sparrow",
195
+ "174": "tree_swallow",
196
+ "175": "tropical_kingbird",
197
+ "176": "vermilion_flycatcher",
198
+ "177": "vesper_sparrow",
199
+ "178": "warbling_vireo",
200
+ "179": "western_grebe",
201
+ "180": "western_gull",
202
+ "181": "western_meadowlark",
203
+ "182": "western_wood_pewee",
204
+ "183": "whip_poor_will",
205
+ "184": "white_breasted_kingfisher",
206
+ "185": "white_breasted_nuthatch",
207
+ "186": "white_crowned_sparrow",
208
+ "187": "white_eyed_vireo",
209
+ "188": "white_necked_raven",
210
+ "189": "white_pelican",
211
+ "190": "white_throated_sparrow",
212
+ "191": "wilson_warbler",
213
+ "192": "winter_wren",
214
+ "193": "worm_eating_warbler",
215
+ "194": "yellow_bellied_flycatcher",
216
+ "195": "yellow_billed_cuckoo",
217
+ "196": "yellow_breasted_chat",
218
+ "197": "yellow_headed_blackbird",
219
+ "198": "yellow_throated_vireo",
220
+ "199": "yellow_warbler"
221
+ },
222
+ "image_size": 224,
223
+ "initializer_range": 0.02,
224
+ "label2id": {
225
+ "acadian_flycatcher": 0,
226
+ "american_crow": 1,
227
+ "american_goldfinch": 2,
228
+ "american_pipit": 3,
229
+ "american_redstart": 4,
230
+ "american_three_toed_woodpecker": 5,
231
+ "anna_hummingbird": 6,
232
+ "artic_tern": 7,
233
+ "baird_sparrow": 8,
234
+ "baltimore_oriole": 9,
235
+ "bank_swallow": 10,
236
+ "barn_swallow": 11,
237
+ "bay_breasted_warbler": 12,
238
+ "belted_kingfisher": 13,
239
+ "bewick_wren": 14,
240
+ "black_and_white_warbler": 15,
241
+ "black_billed_cuckoo": 16,
242
+ "black_capped_vireo": 17,
243
+ "black_footed_albatross": 18,
244
+ "black_tern": 19,
245
+ "black_throated_blue_warbler": 20,
246
+ "black_throated_sparrow": 21,
247
+ "blue_grosbeak": 22,
248
+ "blue_headed_vireo": 23,
249
+ "blue_jay": 24,
250
+ "blue_winged_warbler": 25,
251
+ "boat_tailed_grackle": 26,
252
+ "bobolink": 27,
253
+ "bohemian_waxwing": 28,
254
+ "brandt_cormorant": 29,
255
+ "brewer_blackbird": 30,
256
+ "brewer_sparrow": 31,
257
+ "bronzed_cowbird": 32,
258
+ "brown_creeper": 33,
259
+ "brown_pelican": 34,
260
+ "brown_thrasher": 35,
261
+ "cactus_wren": 36,
262
+ "california_gull": 37,
263
+ "canada_warbler": 38,
264
+ "cape_glossy_starling": 39,
265
+ "cape_may_warbler": 40,
266
+ "cardinal": 41,
267
+ "carolina_wren": 42,
268
+ "caspian_tern": 43,
269
+ "cedar_waxwing": 44,
270
+ "cerulean_warbler": 45,
271
+ "chestnut_sided_warbler": 46,
272
+ "chipping_sparrow": 47,
273
+ "chuck_will_widow": 48,
274
+ "clark_nutcracker": 49,
275
+ "clay_colored_sparrow": 50,
276
+ "cliff_swallow": 51,
277
+ "common_raven": 52,
278
+ "common_tern": 53,
279
+ "common_yellowthroat": 54,
280
+ "crested_auklet": 55,
281
+ "dark_eyed_junco": 56,
282
+ "downy_woodpecker": 57,
283
+ "eared_grebe": 58,
284
+ "eastern_towhee": 59,
285
+ "elegant_tern": 60,
286
+ "european_goldfinch": 61,
287
+ "evening_grosbeak": 62,
288
+ "field_sparrow": 63,
289
+ "fish_crow": 64,
290
+ "florida_jay": 65,
291
+ "forsters_tern": 66,
292
+ "fox_sparrow": 67,
293
+ "frigatebird": 68,
294
+ "gadwall": 69,
295
+ "geococcyx": 70,
296
+ "glaucous_winged_gull": 71,
297
+ "golden_winged_warbler": 72,
298
+ "grasshopper_sparrow": 73,
299
+ "gray_catbird": 74,
300
+ "gray_crowned_rosy_finch": 75,
301
+ "gray_kingbird": 76,
302
+ "great_crested_flycatcher": 77,
303
+ "great_grey_shrike": 78,
304
+ "green_jay": 79,
305
+ "green_kingfisher": 80,
306
+ "green_tailed_towhee": 81,
307
+ "green_violetear": 82,
308
+ "groove_billed_ani": 83,
309
+ "harris_sparrow": 84,
310
+ "heermann_gull": 85,
311
+ "henslow_sparrow": 86,
312
+ "herring_gull": 87,
313
+ "hooded_merganser": 88,
314
+ "hooded_oriole": 89,
315
+ "hooded_warbler": 90,
316
+ "horned_grebe": 91,
317
+ "horned_lark": 92,
318
+ "horned_puffin": 93,
319
+ "house_sparrow": 94,
320
+ "house_wren": 95,
321
+ "indigo_bunting": 96,
322
+ "ivory_gull": 97,
323
+ "kentucky_warbler": 98,
324
+ "laysan_albatross": 99,
325
+ "lazuli_bunting": 100,
326
+ "le_conte_sparrow": 101,
327
+ "least_auklet": 102,
328
+ "least_flycatcher": 103,
329
+ "least_tern": 104,
330
+ "lincoln_sparrow": 105,
331
+ "loggerhead_shrike": 106,
332
+ "long_tailed_jaeger": 107,
333
+ "louisiana_waterthrush": 108,
334
+ "magnolia_warbler": 109,
335
+ "mallard": 110,
336
+ "mangrove_cuckoo": 111,
337
+ "marsh_wren": 112,
338
+ "mockingbird": 113,
339
+ "mourning_warbler": 114,
340
+ "myrtle_warbler": 115,
341
+ "nashville_warbler": 116,
342
+ "nelson_sharp_tailed_sparrow": 117,
343
+ "nighthawk": 118,
344
+ "northern_flicker": 119,
345
+ "northern_fulmar": 120,
346
+ "northern_waterthrush": 121,
347
+ "olive_sided_flycatcher": 122,
348
+ "orange_crowned_warbler": 123,
349
+ "orchard_oriole": 124,
350
+ "ovenbird": 125,
351
+ "pacific_loon": 126,
352
+ "painted_bunting": 127,
353
+ "palm_warbler": 128,
354
+ "parakeet_auklet": 129,
355
+ "pelagic_cormorant": 130,
356
+ "philadelphia_vireo": 131,
357
+ "pied_billed_grebe": 132,
358
+ "pied_kingfisher": 133,
359
+ "pigeon_guillemot": 134,
360
+ "pileated_woodpecker": 135,
361
+ "pine_grosbeak": 136,
362
+ "pine_warbler": 137,
363
+ "pomarine_jaeger": 138,
364
+ "prairie_warbler": 139,
365
+ "prothonotary_warbler": 140,
366
+ "purple_finch": 141,
367
+ "red_bellied_woodpecker": 142,
368
+ "red_breasted_merganser": 143,
369
+ "red_cockaded_woodpecker": 144,
370
+ "red_eyed_vireo": 145,
371
+ "red_faced_cormorant": 146,
372
+ "red_headed_woodpecker": 147,
373
+ "red_legged_kittiwake": 148,
374
+ "red_winged_blackbird": 149,
375
+ "rhinoceros_auklet": 150,
376
+ "ring_billed_gull": 151,
377
+ "ringed_kingfisher": 152,
378
+ "rock_wren": 153,
379
+ "rose_breasted_grosbeak": 154,
380
+ "ruby_throated_hummingbird": 155,
381
+ "rufous_hummingbird": 156,
382
+ "rusty_blackbird": 157,
383
+ "sage_thrasher": 158,
384
+ "savannah_sparrow": 159,
385
+ "sayornis": 160,
386
+ "scarlet_tanager": 161,
387
+ "scissor_tailed_flycatcher": 162,
388
+ "scott_oriole": 163,
389
+ "seaside_sparrow": 164,
390
+ "shiny_cowbird": 165,
391
+ "slaty_backed_gull": 166,
392
+ "song_sparrow": 167,
393
+ "sooty_albatross": 168,
394
+ "spotted_catbird": 169,
395
+ "summer_tanager": 170,
396
+ "swainson_warbler": 171,
397
+ "tennessee_warbler": 172,
398
+ "tree_sparrow": 173,
399
+ "tree_swallow": 174,
400
+ "tropical_kingbird": 175,
401
+ "vermilion_flycatcher": 176,
402
+ "vesper_sparrow": 177,
403
+ "warbling_vireo": 178,
404
+ "western_grebe": 179,
405
+ "western_gull": 180,
406
+ "western_meadowlark": 181,
407
+ "western_wood_pewee": 182,
408
+ "whip_poor_will": 183,
409
+ "white_breasted_kingfisher": 184,
410
+ "white_breasted_nuthatch": 185,
411
+ "white_crowned_sparrow": 186,
412
+ "white_eyed_vireo": 187,
413
+ "white_necked_raven": 188,
414
+ "white_pelican": 189,
415
+ "white_throated_sparrow": 190,
416
+ "wilson_warbler": 191,
417
+ "winter_wren": 192,
418
+ "worm_eating_warbler": 193,
419
+ "yellow_bellied_flycatcher": 194,
420
+ "yellow_billed_cuckoo": 195,
421
+ "yellow_breasted_chat": 196,
422
+ "yellow_headed_blackbird": 197,
423
+ "yellow_throated_vireo": 198,
424
+ "yellow_warbler": 199
425
+ },
426
+ "layer_norm_eps": 1e-05,
427
+ "mlp_ratio": 4.0,
428
+ "model_type": "swin",
429
+ "num_channels": 3,
430
+ "num_heads": [
431
+ 3,
432
+ 6,
433
+ 12,
434
+ 24
435
+ ],
436
+ "num_layers": 4,
437
+ "out_features": [
438
+ "stage4"
439
+ ],
440
+ "out_indices": [
441
+ 4
442
+ ],
443
+ "patch_size": 4,
444
+ "path_norm": true,
445
+ "problem_type": "single_label_classification",
446
+ "qkv_bias": true,
447
+ "stage_names": [
448
+ "stem",
449
+ "stage1",
450
+ "stage2",
451
+ "stage3",
452
+ "stage4"
453
+ ],
454
+ "torch_dtype": "float32",
455
+ "transformers_version": "4.48.0",
456
+ "use_absolute_embeddings": false,
457
+ "window_size": 7
458
+ }
checkpoint-1275/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece3ff365348e4ec99152a1e2e68271a89b4ca6c4b9ef7997ce929dce77f3452
3
+ size 14244
checkpoint-1275/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abfb41d8963e26d42b1b2b5d933e991567e940c3a7bfa1df8fa6831c0d990e47
3
+ size 1064
checkpoint-1275/trainer_state.json ADDED
@@ -0,0 +1,1766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7968999147415161,
3
+ "best_model_checkpoint": "Model-Swin-Tiny-\\checkpoint-1275",
4
+ "epoch": 25.0,
5
+ "eval_steps": 7,
6
+ "global_step": 1275,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1390728476821192,
13
+ "grad_norm": 2.582465887069702,
14
+ "learning_rate": 7.000000000000001e-07,
15
+ "loss": 5.3188,
16
+ "step": 7
17
+ },
18
+ {
19
+ "epoch": 0.2781456953642384,
20
+ "grad_norm": 2.5382866859436035,
21
+ "learning_rate": 1.4000000000000001e-06,
22
+ "loss": 5.3177,
23
+ "step": 14
24
+ },
25
+ {
26
+ "epoch": 0.41721854304635764,
27
+ "grad_norm": 2.584491729736328,
28
+ "learning_rate": 2.1000000000000002e-06,
29
+ "loss": 5.328,
30
+ "step": 21
31
+ },
32
+ {
33
+ "epoch": 0.5562913907284768,
34
+ "grad_norm": 2.427229166030884,
35
+ "learning_rate": 2.8000000000000003e-06,
36
+ "loss": 5.318,
37
+ "step": 28
38
+ },
39
+ {
40
+ "epoch": 0.695364238410596,
41
+ "grad_norm": 2.237027645111084,
42
+ "learning_rate": 3.5000000000000004e-06,
43
+ "loss": 5.3118,
44
+ "step": 35
45
+ },
46
+ {
47
+ "epoch": 0.8344370860927153,
48
+ "grad_norm": 2.2959165573120117,
49
+ "learning_rate": 4.2000000000000004e-06,
50
+ "loss": 5.3152,
51
+ "step": 42
52
+ },
53
+ {
54
+ "epoch": 0.9735099337748344,
55
+ "grad_norm": 2.21553897857666,
56
+ "learning_rate": 4.9000000000000005e-06,
57
+ "loss": 5.3108,
58
+ "step": 49
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.010655737704918032,
63
+ "eval_f1_macro": 0.008245740297878333,
64
+ "eval_f1_micro": 0.010655737704918032,
65
+ "eval_f1_weighted": 0.007833702951703764,
66
+ "eval_loss": 5.3028178215026855,
67
+ "eval_precision_macro": 0.013928344671201815,
68
+ "eval_precision_micro": 0.010655737704918032,
69
+ "eval_precision_weighted": 0.012875227686703097,
70
+ "eval_recall_macro": 0.010809523809523809,
71
+ "eval_recall_micro": 0.010655737704918032,
72
+ "eval_recall_weighted": 0.010655737704918032,
73
+ "eval_runtime": 8.5908,
74
+ "eval_samples_per_second": 142.012,
75
+ "eval_steps_per_second": 2.328,
76
+ "step": 51
77
+ },
78
+ {
79
+ "epoch": 1.099337748344371,
80
+ "grad_norm": 2.847919225692749,
81
+ "learning_rate": 5.600000000000001e-06,
82
+ "loss": 4.8132,
83
+ "step": 56
84
+ },
85
+ {
86
+ "epoch": 1.23841059602649,
87
+ "grad_norm": 3.2894906997680664,
88
+ "learning_rate": 6.300000000000001e-06,
89
+ "loss": 5.2983,
90
+ "step": 63
91
+ },
92
+ {
93
+ "epoch": 1.3774834437086092,
94
+ "grad_norm": 2.435445785522461,
95
+ "learning_rate": 7.000000000000001e-06,
96
+ "loss": 5.2865,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 1.5165562913907285,
101
+ "grad_norm": 2.3491370677948,
102
+ "learning_rate": 7.7e-06,
103
+ "loss": 5.2764,
104
+ "step": 77
105
+ },
106
+ {
107
+ "epoch": 1.6556291390728477,
108
+ "grad_norm": 2.514453172683716,
109
+ "learning_rate": 8.400000000000001e-06,
110
+ "loss": 5.2795,
111
+ "step": 84
112
+ },
113
+ {
114
+ "epoch": 1.794701986754967,
115
+ "grad_norm": 2.1590051651000977,
116
+ "learning_rate": 9.100000000000001e-06,
117
+ "loss": 5.258,
118
+ "step": 91
119
+ },
120
+ {
121
+ "epoch": 1.9337748344370862,
122
+ "grad_norm": 2.380048990249634,
123
+ "learning_rate": 9.800000000000001e-06,
124
+ "loss": 5.2679,
125
+ "step": 98
126
+ },
127
+ {
128
+ "epoch": 2.0,
129
+ "eval_accuracy": 0.009836065573770493,
130
+ "eval_f1_macro": 0.008410365987289064,
131
+ "eval_f1_micro": 0.009836065573770493,
132
+ "eval_f1_weighted": 0.00796402280639355,
133
+ "eval_loss": 5.247003078460693,
134
+ "eval_precision_macro": 0.012720153303486637,
135
+ "eval_precision_micro": 0.009836065573770493,
136
+ "eval_precision_weighted": 0.011648029161690365,
137
+ "eval_recall_macro": 0.009976190476190477,
138
+ "eval_recall_micro": 0.009836065573770493,
139
+ "eval_recall_weighted": 0.009836065573770493,
140
+ "eval_runtime": 8.1459,
141
+ "eval_samples_per_second": 149.769,
142
+ "eval_steps_per_second": 2.455,
143
+ "step": 102
144
+ },
145
+ {
146
+ "epoch": 2.0596026490066226,
147
+ "grad_norm": 2.1982994079589844,
148
+ "learning_rate": 1.05e-05,
149
+ "loss": 4.7568,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 2.198675496688742,
154
+ "grad_norm": 2.7330551147460938,
155
+ "learning_rate": 1.1200000000000001e-05,
156
+ "loss": 5.222,
157
+ "step": 112
158
+ },
159
+ {
160
+ "epoch": 2.337748344370861,
161
+ "grad_norm": 2.5250086784362793,
162
+ "learning_rate": 1.19e-05,
163
+ "loss": 5.2234,
164
+ "step": 119
165
+ },
166
+ {
167
+ "epoch": 2.47682119205298,
168
+ "grad_norm": 2.860276460647583,
169
+ "learning_rate": 1.2600000000000001e-05,
170
+ "loss": 5.1993,
171
+ "step": 126
172
+ },
173
+ {
174
+ "epoch": 2.6158940397350996,
175
+ "grad_norm": 3.1756386756896973,
176
+ "learning_rate": 1.3300000000000001e-05,
177
+ "loss": 5.1973,
178
+ "step": 133
179
+ },
180
+ {
181
+ "epoch": 2.7549668874172184,
182
+ "grad_norm": 2.984741449356079,
183
+ "learning_rate": 1.4000000000000001e-05,
184
+ "loss": 5.1596,
185
+ "step": 140
186
+ },
187
+ {
188
+ "epoch": 2.8940397350993377,
189
+ "grad_norm": 3.4375150203704834,
190
+ "learning_rate": 1.47e-05,
191
+ "loss": 5.1535,
192
+ "step": 147
193
+ },
194
+ {
195
+ "epoch": 3.0,
196
+ "eval_accuracy": 0.047540983606557376,
197
+ "eval_f1_macro": 0.0334518053806519,
198
+ "eval_f1_micro": 0.047540983606557376,
199
+ "eval_f1_weighted": 0.03398378381798487,
200
+ "eval_loss": 5.093519687652588,
201
+ "eval_precision_macro": 0.03341768076645832,
202
+ "eval_precision_micro": 0.047540983606557376,
203
+ "eval_precision_weighted": 0.033445451851956465,
204
+ "eval_recall_macro": 0.04548214285714286,
205
+ "eval_recall_micro": 0.047540983606557376,
206
+ "eval_recall_weighted": 0.047540983606557376,
207
+ "eval_runtime": 7.8608,
208
+ "eval_samples_per_second": 155.2,
209
+ "eval_steps_per_second": 2.544,
210
+ "step": 153
211
+ },
212
+ {
213
+ "epoch": 3.019867549668874,
214
+ "grad_norm": 3.4943971633911133,
215
+ "learning_rate": 1.54e-05,
216
+ "loss": 4.6315,
217
+ "step": 154
218
+ },
219
+ {
220
+ "epoch": 3.1589403973509933,
221
+ "grad_norm": 4.077115535736084,
222
+ "learning_rate": 1.6100000000000002e-05,
223
+ "loss": 5.0664,
224
+ "step": 161
225
+ },
226
+ {
227
+ "epoch": 3.2980132450331126,
228
+ "grad_norm": 4.969892978668213,
229
+ "learning_rate": 1.6800000000000002e-05,
230
+ "loss": 5.0309,
231
+ "step": 168
232
+ },
233
+ {
234
+ "epoch": 3.437086092715232,
235
+ "grad_norm": 6.20643424987793,
236
+ "learning_rate": 1.75e-05,
237
+ "loss": 4.9506,
238
+ "step": 175
239
+ },
240
+ {
241
+ "epoch": 3.576158940397351,
242
+ "grad_norm": 5.409440994262695,
243
+ "learning_rate": 1.8200000000000002e-05,
244
+ "loss": 4.9324,
245
+ "step": 182
246
+ },
247
+ {
248
+ "epoch": 3.7152317880794703,
249
+ "grad_norm": 6.3031134605407715,
250
+ "learning_rate": 1.8900000000000002e-05,
251
+ "loss": 4.8746,
252
+ "step": 189
253
+ },
254
+ {
255
+ "epoch": 3.8543046357615895,
256
+ "grad_norm": 5.86908483505249,
257
+ "learning_rate": 1.9600000000000002e-05,
258
+ "loss": 4.7977,
259
+ "step": 196
260
+ },
261
+ {
262
+ "epoch": 3.993377483443709,
263
+ "grad_norm": 6.539762020111084,
264
+ "learning_rate": 2.0300000000000002e-05,
265
+ "loss": 4.7438,
266
+ "step": 203
267
+ },
268
+ {
269
+ "epoch": 4.0,
270
+ "eval_accuracy": 0.08934426229508197,
271
+ "eval_f1_macro": 0.06995251642748464,
272
+ "eval_f1_micro": 0.08934426229508197,
273
+ "eval_f1_weighted": 0.07043312888490538,
274
+ "eval_loss": 4.615861892700195,
275
+ "eval_precision_macro": 0.08601477983511233,
276
+ "eval_precision_micro": 0.08934426229508197,
277
+ "eval_precision_weighted": 0.08437849721496109,
278
+ "eval_recall_macro": 0.08661309523809525,
279
+ "eval_recall_micro": 0.08934426229508197,
280
+ "eval_recall_weighted": 0.08934426229508197,
281
+ "eval_runtime": 7.7565,
282
+ "eval_samples_per_second": 157.287,
283
+ "eval_steps_per_second": 2.578,
284
+ "step": 204
285
+ },
286
+ {
287
+ "epoch": 4.119205298013245,
288
+ "grad_norm": 7.253829002380371,
289
+ "learning_rate": 2.1e-05,
290
+ "loss": 4.2004,
291
+ "step": 210
292
+ },
293
+ {
294
+ "epoch": 4.258278145695364,
295
+ "grad_norm": 9.1975679397583,
296
+ "learning_rate": 2.1700000000000002e-05,
297
+ "loss": 4.5028,
298
+ "step": 217
299
+ },
300
+ {
301
+ "epoch": 4.397350993377484,
302
+ "grad_norm": 7.770805835723877,
303
+ "learning_rate": 2.2400000000000002e-05,
304
+ "loss": 4.4363,
305
+ "step": 224
306
+ },
307
+ {
308
+ "epoch": 4.5364238410596025,
309
+ "grad_norm": 8.336009979248047,
310
+ "learning_rate": 2.3100000000000002e-05,
311
+ "loss": 4.2546,
312
+ "step": 231
313
+ },
314
+ {
315
+ "epoch": 4.675496688741722,
316
+ "grad_norm": 8.84949779510498,
317
+ "learning_rate": 2.38e-05,
318
+ "loss": 4.1982,
319
+ "step": 238
320
+ },
321
+ {
322
+ "epoch": 4.814569536423841,
323
+ "grad_norm": 9.084051132202148,
324
+ "learning_rate": 2.45e-05,
325
+ "loss": 4.0816,
326
+ "step": 245
327
+ },
328
+ {
329
+ "epoch": 4.95364238410596,
330
+ "grad_norm": 10.173778533935547,
331
+ "learning_rate": 2.5200000000000003e-05,
332
+ "loss": 3.9715,
333
+ "step": 252
334
+ },
335
+ {
336
+ "epoch": 5.0,
337
+ "eval_accuracy": 0.24098360655737705,
338
+ "eval_f1_macro": 0.19223627327284631,
339
+ "eval_f1_micro": 0.24098360655737705,
340
+ "eval_f1_weighted": 0.1991864013924831,
341
+ "eval_loss": 3.693058490753174,
342
+ "eval_precision_macro": 0.22356112769955647,
343
+ "eval_precision_micro": 0.24098360655737705,
344
+ "eval_precision_weighted": 0.22764487953674883,
345
+ "eval_recall_macro": 0.22954166666666664,
346
+ "eval_recall_micro": 0.24098360655737705,
347
+ "eval_recall_weighted": 0.24098360655737705,
348
+ "eval_runtime": 7.6802,
349
+ "eval_samples_per_second": 158.85,
350
+ "eval_steps_per_second": 2.604,
351
+ "step": 255
352
+ },
353
+ {
354
+ "epoch": 5.079470198675497,
355
+ "grad_norm": 10.615998268127441,
356
+ "learning_rate": 2.5900000000000003e-05,
357
+ "loss": 3.4457,
358
+ "step": 259
359
+ },
360
+ {
361
+ "epoch": 5.218543046357616,
362
+ "grad_norm": 11.359329223632812,
363
+ "learning_rate": 2.6600000000000003e-05,
364
+ "loss": 3.6421,
365
+ "step": 266
366
+ },
367
+ {
368
+ "epoch": 5.357615894039735,
369
+ "grad_norm": 9.347195625305176,
370
+ "learning_rate": 2.7300000000000003e-05,
371
+ "loss": 3.5802,
372
+ "step": 273
373
+ },
374
+ {
375
+ "epoch": 5.496688741721854,
376
+ "grad_norm": 36.44110870361328,
377
+ "learning_rate": 2.8000000000000003e-05,
378
+ "loss": 3.4378,
379
+ "step": 280
380
+ },
381
+ {
382
+ "epoch": 5.635761589403973,
383
+ "grad_norm": 12.00761890411377,
384
+ "learning_rate": 2.87e-05,
385
+ "loss": 3.398,
386
+ "step": 287
387
+ },
388
+ {
389
+ "epoch": 5.774834437086093,
390
+ "grad_norm": 11.801825523376465,
391
+ "learning_rate": 2.94e-05,
392
+ "loss": 3.3073,
393
+ "step": 294
394
+ },
395
+ {
396
+ "epoch": 5.913907284768212,
397
+ "grad_norm": 11.105921745300293,
398
+ "learning_rate": 3.01e-05,
399
+ "loss": 3.092,
400
+ "step": 301
401
+ },
402
+ {
403
+ "epoch": 6.0,
404
+ "eval_accuracy": 0.40409836065573773,
405
+ "eval_f1_macro": 0.3595422269523898,
406
+ "eval_f1_micro": 0.40409836065573773,
407
+ "eval_f1_weighted": 0.36980865200860835,
408
+ "eval_loss": 2.7539775371551514,
409
+ "eval_precision_macro": 0.4115824432389663,
410
+ "eval_precision_micro": 0.40409836065573773,
411
+ "eval_precision_weighted": 0.4179245283931502,
412
+ "eval_recall_macro": 0.3881369047619047,
413
+ "eval_recall_micro": 0.40409836065573773,
414
+ "eval_recall_weighted": 0.40409836065573773,
415
+ "eval_runtime": 7.7349,
416
+ "eval_samples_per_second": 157.727,
417
+ "eval_steps_per_second": 2.586,
418
+ "step": 306
419
+ },
420
+ {
421
+ "epoch": 6.039735099337748,
422
+ "grad_norm": 17.574556350708008,
423
+ "learning_rate": 3.08e-05,
424
+ "loss": 2.7521,
425
+ "step": 308
426
+ },
427
+ {
428
+ "epoch": 6.178807947019868,
429
+ "grad_norm": 9.803393363952637,
430
+ "learning_rate": 3.15e-05,
431
+ "loss": 2.8856,
432
+ "step": 315
433
+ },
434
+ {
435
+ "epoch": 6.317880794701987,
436
+ "grad_norm": 12.46679401397705,
437
+ "learning_rate": 3.2200000000000003e-05,
438
+ "loss": 2.6989,
439
+ "step": 322
440
+ },
441
+ {
442
+ "epoch": 6.456953642384106,
443
+ "grad_norm": 10.546730995178223,
444
+ "learning_rate": 3.29e-05,
445
+ "loss": 2.6773,
446
+ "step": 329
447
+ },
448
+ {
449
+ "epoch": 6.596026490066225,
450
+ "grad_norm": 10.680000305175781,
451
+ "learning_rate": 3.3600000000000004e-05,
452
+ "loss": 2.6294,
453
+ "step": 336
454
+ },
455
+ {
456
+ "epoch": 6.735099337748345,
457
+ "grad_norm": 11.50396728515625,
458
+ "learning_rate": 3.430000000000001e-05,
459
+ "loss": 2.6267,
460
+ "step": 343
461
+ },
462
+ {
463
+ "epoch": 6.874172185430464,
464
+ "grad_norm": 13.017539024353027,
465
+ "learning_rate": 3.5e-05,
466
+ "loss": 2.5391,
467
+ "step": 350
468
+ },
469
+ {
470
+ "epoch": 7.0,
471
+ "grad_norm": 8.430996894836426,
472
+ "learning_rate": 3.57e-05,
473
+ "loss": 2.2063,
474
+ "step": 357
475
+ },
476
+ {
477
+ "epoch": 7.0,
478
+ "eval_accuracy": 0.5139344262295082,
479
+ "eval_f1_macro": 0.4773453128408147,
480
+ "eval_f1_micro": 0.5139344262295082,
481
+ "eval_f1_weighted": 0.48667468151459403,
482
+ "eval_loss": 2.113166093826294,
483
+ "eval_precision_macro": 0.5348915124907773,
484
+ "eval_precision_micro": 0.5139344262295082,
485
+ "eval_precision_weighted": 0.5363564542042122,
486
+ "eval_recall_macro": 0.4992261904761905,
487
+ "eval_recall_micro": 0.5139344262295082,
488
+ "eval_recall_weighted": 0.5139344262295082,
489
+ "eval_runtime": 7.8286,
490
+ "eval_samples_per_second": 155.838,
491
+ "eval_steps_per_second": 2.555,
492
+ "step": 357
493
+ },
494
+ {
495
+ "epoch": 7.139072847682119,
496
+ "grad_norm": 10.343087196350098,
497
+ "learning_rate": 3.6400000000000004e-05,
498
+ "loss": 2.2819,
499
+ "step": 364
500
+ },
501
+ {
502
+ "epoch": 7.2781456953642385,
503
+ "grad_norm": 10.044729232788086,
504
+ "learning_rate": 3.71e-05,
505
+ "loss": 2.1958,
506
+ "step": 371
507
+ },
508
+ {
509
+ "epoch": 7.417218543046357,
510
+ "grad_norm": 11.718865394592285,
511
+ "learning_rate": 3.7800000000000004e-05,
512
+ "loss": 2.148,
513
+ "step": 378
514
+ },
515
+ {
516
+ "epoch": 7.556291390728477,
517
+ "grad_norm": 12.222508430480957,
518
+ "learning_rate": 3.85e-05,
519
+ "loss": 2.0688,
520
+ "step": 385
521
+ },
522
+ {
523
+ "epoch": 7.695364238410596,
524
+ "grad_norm": 11.58236026763916,
525
+ "learning_rate": 3.9200000000000004e-05,
526
+ "loss": 1.975,
527
+ "step": 392
528
+ },
529
+ {
530
+ "epoch": 7.8344370860927155,
531
+ "grad_norm": 10.627557754516602,
532
+ "learning_rate": 3.99e-05,
533
+ "loss": 2.004,
534
+ "step": 399
535
+ },
536
+ {
537
+ "epoch": 7.973509933774834,
538
+ "grad_norm": 11.480416297912598,
539
+ "learning_rate": 4.0600000000000004e-05,
540
+ "loss": 2.1376,
541
+ "step": 406
542
+ },
543
+ {
544
+ "epoch": 8.0,
545
+ "eval_accuracy": 0.6008196721311475,
546
+ "eval_f1_macro": 0.5775780136734551,
547
+ "eval_f1_micro": 0.6008196721311475,
548
+ "eval_f1_weighted": 0.5832949838381336,
549
+ "eval_loss": 1.6815783977508545,
550
+ "eval_precision_macro": 0.6357469042395514,
551
+ "eval_precision_micro": 0.6008196721311475,
552
+ "eval_precision_weighted": 0.6356073575251492,
553
+ "eval_recall_macro": 0.5908988095238095,
554
+ "eval_recall_micro": 0.6008196721311475,
555
+ "eval_recall_weighted": 0.6008196721311475,
556
+ "eval_runtime": 7.9124,
557
+ "eval_samples_per_second": 154.188,
558
+ "eval_steps_per_second": 2.528,
559
+ "step": 408
560
+ },
561
+ {
562
+ "epoch": 8.099337748344372,
563
+ "grad_norm": 10.665842056274414,
564
+ "learning_rate": 4.13e-05,
565
+ "loss": 1.6223,
566
+ "step": 413
567
+ },
568
+ {
569
+ "epoch": 8.23841059602649,
570
+ "grad_norm": 10.28793716430664,
571
+ "learning_rate": 4.2e-05,
572
+ "loss": 1.8407,
573
+ "step": 420
574
+ },
575
+ {
576
+ "epoch": 8.37748344370861,
577
+ "grad_norm": 9.664498329162598,
578
+ "learning_rate": 4.27e-05,
579
+ "loss": 1.8128,
580
+ "step": 427
581
+ },
582
+ {
583
+ "epoch": 8.516556291390728,
584
+ "grad_norm": 9.288748741149902,
585
+ "learning_rate": 4.3400000000000005e-05,
586
+ "loss": 1.6311,
587
+ "step": 434
588
+ },
589
+ {
590
+ "epoch": 8.655629139072847,
591
+ "grad_norm": 12.331849098205566,
592
+ "learning_rate": 4.41e-05,
593
+ "loss": 1.637,
594
+ "step": 441
595
+ },
596
+ {
597
+ "epoch": 8.794701986754967,
598
+ "grad_norm": 10.64306926727295,
599
+ "learning_rate": 4.4800000000000005e-05,
600
+ "loss": 1.5613,
601
+ "step": 448
602
+ },
603
+ {
604
+ "epoch": 8.933774834437086,
605
+ "grad_norm": 11.22636890411377,
606
+ "learning_rate": 4.55e-05,
607
+ "loss": 1.7138,
608
+ "step": 455
609
+ },
610
+ {
611
+ "epoch": 9.0,
612
+ "eval_accuracy": 0.639344262295082,
613
+ "eval_f1_macro": 0.6259736522470066,
614
+ "eval_f1_micro": 0.639344262295082,
615
+ "eval_f1_weighted": 0.6302877962757446,
616
+ "eval_loss": 1.4194470643997192,
617
+ "eval_precision_macro": 0.6881212537462537,
618
+ "eval_precision_micro": 0.639344262295082,
619
+ "eval_precision_weighted": 0.6848543874158628,
620
+ "eval_recall_macro": 0.6307857142857142,
621
+ "eval_recall_micro": 0.639344262295082,
622
+ "eval_recall_weighted": 0.639344262295082,
623
+ "eval_runtime": 7.8374,
624
+ "eval_samples_per_second": 155.665,
625
+ "eval_steps_per_second": 2.552,
626
+ "step": 459
627
+ },
628
+ {
629
+ "epoch": 9.059602649006623,
630
+ "grad_norm": 10.426998138427734,
631
+ "learning_rate": 4.6200000000000005e-05,
632
+ "loss": 1.3799,
633
+ "step": 462
634
+ },
635
+ {
636
+ "epoch": 9.198675496688741,
637
+ "grad_norm": 9.865791320800781,
638
+ "learning_rate": 4.69e-05,
639
+ "loss": 1.4825,
640
+ "step": 469
641
+ },
642
+ {
643
+ "epoch": 9.33774834437086,
644
+ "grad_norm": 10.425603866577148,
645
+ "learning_rate": 4.76e-05,
646
+ "loss": 1.4418,
647
+ "step": 476
648
+ },
649
+ {
650
+ "epoch": 9.47682119205298,
651
+ "grad_norm": 10.623998641967773,
652
+ "learning_rate": 4.83e-05,
653
+ "loss": 1.4183,
654
+ "step": 483
655
+ },
656
+ {
657
+ "epoch": 9.6158940397351,
658
+ "grad_norm": 19.386829376220703,
659
+ "learning_rate": 4.9e-05,
660
+ "loss": 1.4421,
661
+ "step": 490
662
+ },
663
+ {
664
+ "epoch": 9.754966887417218,
665
+ "grad_norm": 10.6463623046875,
666
+ "learning_rate": 4.97e-05,
667
+ "loss": 1.4223,
668
+ "step": 497
669
+ },
670
+ {
671
+ "epoch": 9.894039735099337,
672
+ "grad_norm": 11.120490074157715,
673
+ "learning_rate": 4.995555555555556e-05,
674
+ "loss": 1.4492,
675
+ "step": 504
676
+ },
677
+ {
678
+ "epoch": 10.0,
679
+ "eval_accuracy": 0.6893442622950819,
680
+ "eval_f1_macro": 0.6789676474626165,
681
+ "eval_f1_micro": 0.6893442622950819,
682
+ "eval_f1_weighted": 0.6817900244059966,
683
+ "eval_loss": 1.2429919242858887,
684
+ "eval_precision_macro": 0.7242377695833578,
685
+ "eval_precision_micro": 0.6893442622950819,
686
+ "eval_precision_weighted": 0.7221014998341421,
687
+ "eval_recall_macro": 0.6821309523809523,
688
+ "eval_recall_micro": 0.6893442622950819,
689
+ "eval_recall_weighted": 0.6893442622950819,
690
+ "eval_runtime": 7.9333,
691
+ "eval_samples_per_second": 153.782,
692
+ "eval_steps_per_second": 2.521,
693
+ "step": 510
694
+ },
695
+ {
696
+ "epoch": 10.019867549668874,
697
+ "grad_norm": 12.187692642211914,
698
+ "learning_rate": 4.987777777777778e-05,
699
+ "loss": 1.2479,
700
+ "step": 511
701
+ },
702
+ {
703
+ "epoch": 10.158940397350994,
704
+ "grad_norm": 9.530071258544922,
705
+ "learning_rate": 4.9800000000000004e-05,
706
+ "loss": 1.2886,
707
+ "step": 518
708
+ },
709
+ {
710
+ "epoch": 10.298013245033113,
711
+ "grad_norm": 11.248357772827148,
712
+ "learning_rate": 4.972222222222223e-05,
713
+ "loss": 1.3648,
714
+ "step": 525
715
+ },
716
+ {
717
+ "epoch": 10.437086092715232,
718
+ "grad_norm": 14.794153213500977,
719
+ "learning_rate": 4.964444444444445e-05,
720
+ "loss": 1.1894,
721
+ "step": 532
722
+ },
723
+ {
724
+ "epoch": 10.57615894039735,
725
+ "grad_norm": 10.403834342956543,
726
+ "learning_rate": 4.956666666666667e-05,
727
+ "loss": 1.2391,
728
+ "step": 539
729
+ },
730
+ {
731
+ "epoch": 10.71523178807947,
732
+ "grad_norm": 8.995583534240723,
733
+ "learning_rate": 4.948888888888889e-05,
734
+ "loss": 1.2544,
735
+ "step": 546
736
+ },
737
+ {
738
+ "epoch": 10.85430463576159,
739
+ "grad_norm": 11.541975021362305,
740
+ "learning_rate": 4.9411111111111114e-05,
741
+ "loss": 1.1367,
742
+ "step": 553
743
+ },
744
+ {
745
+ "epoch": 10.993377483443709,
746
+ "grad_norm": 11.431974411010742,
747
+ "learning_rate": 4.933333333333334e-05,
748
+ "loss": 1.1763,
749
+ "step": 560
750
+ },
751
+ {
752
+ "epoch": 11.0,
753
+ "eval_accuracy": 0.7131147540983607,
754
+ "eval_f1_macro": 0.7018493033024766,
755
+ "eval_f1_micro": 0.7131147540983607,
756
+ "eval_f1_weighted": 0.7056559436928543,
757
+ "eval_loss": 1.0868916511535645,
758
+ "eval_precision_macro": 0.7525895803542862,
759
+ "eval_precision_micro": 0.7131147540983607,
760
+ "eval_precision_weighted": 0.7488698884154525,
761
+ "eval_recall_macro": 0.7052321428571429,
762
+ "eval_recall_micro": 0.7131147540983607,
763
+ "eval_recall_weighted": 0.7131147540983607,
764
+ "eval_runtime": 7.8931,
765
+ "eval_samples_per_second": 154.565,
766
+ "eval_steps_per_second": 2.534,
767
+ "step": 561
768
+ },
769
+ {
770
+ "epoch": 11.119205298013245,
771
+ "grad_norm": 7.583092212677002,
772
+ "learning_rate": 4.925555555555556e-05,
773
+ "loss": 0.9683,
774
+ "step": 567
775
+ },
776
+ {
777
+ "epoch": 11.258278145695364,
778
+ "grad_norm": 11.066832542419434,
779
+ "learning_rate": 4.917777777777778e-05,
780
+ "loss": 1.1171,
781
+ "step": 574
782
+ },
783
+ {
784
+ "epoch": 11.397350993377483,
785
+ "grad_norm": 12.091965675354004,
786
+ "learning_rate": 4.91e-05,
787
+ "loss": 1.0942,
788
+ "step": 581
789
+ },
790
+ {
791
+ "epoch": 11.536423841059603,
792
+ "grad_norm": 11.736483573913574,
793
+ "learning_rate": 4.9022222222222224e-05,
794
+ "loss": 0.9956,
795
+ "step": 588
796
+ },
797
+ {
798
+ "epoch": 11.675496688741722,
799
+ "grad_norm": 11.410609245300293,
800
+ "learning_rate": 4.894444444444445e-05,
801
+ "loss": 1.1845,
802
+ "step": 595
803
+ },
804
+ {
805
+ "epoch": 11.814569536423841,
806
+ "grad_norm": 11.507161140441895,
807
+ "learning_rate": 4.886666666666667e-05,
808
+ "loss": 1.0821,
809
+ "step": 602
810
+ },
811
+ {
812
+ "epoch": 11.95364238410596,
813
+ "grad_norm": 9.406824111938477,
814
+ "learning_rate": 4.878888888888889e-05,
815
+ "loss": 1.1747,
816
+ "step": 609
817
+ },
818
+ {
819
+ "epoch": 12.0,
820
+ "eval_accuracy": 0.7352459016393442,
821
+ "eval_f1_macro": 0.7312760469309635,
822
+ "eval_f1_micro": 0.7352459016393442,
823
+ "eval_f1_weighted": 0.7335706726688512,
824
+ "eval_loss": 1.0279330015182495,
825
+ "eval_precision_macro": 0.7770572066822067,
826
+ "eval_precision_micro": 0.7352459016393442,
827
+ "eval_precision_weighted": 0.7772770910783207,
828
+ "eval_recall_macro": 0.7302857142857142,
829
+ "eval_recall_micro": 0.7352459016393442,
830
+ "eval_recall_weighted": 0.7352459016393442,
831
+ "eval_runtime": 7.7549,
832
+ "eval_samples_per_second": 157.319,
833
+ "eval_steps_per_second": 2.579,
834
+ "step": 612
835
+ },
836
+ {
837
+ "epoch": 12.079470198675496,
838
+ "grad_norm": 8.448054313659668,
839
+ "learning_rate": 4.871111111111111e-05,
840
+ "loss": 0.8869,
841
+ "step": 616
842
+ },
843
+ {
844
+ "epoch": 12.218543046357617,
845
+ "grad_norm": 12.257729530334473,
846
+ "learning_rate": 4.8633333333333334e-05,
847
+ "loss": 1.0252,
848
+ "step": 623
849
+ },
850
+ {
851
+ "epoch": 12.357615894039736,
852
+ "grad_norm": 10.638754844665527,
853
+ "learning_rate": 4.855555555555556e-05,
854
+ "loss": 0.9912,
855
+ "step": 630
856
+ },
857
+ {
858
+ "epoch": 12.496688741721854,
859
+ "grad_norm": 9.0201416015625,
860
+ "learning_rate": 4.847777777777778e-05,
861
+ "loss": 1.0075,
862
+ "step": 637
863
+ },
864
+ {
865
+ "epoch": 12.635761589403973,
866
+ "grad_norm": 11.034662246704102,
867
+ "learning_rate": 4.8400000000000004e-05,
868
+ "loss": 0.9086,
869
+ "step": 644
870
+ },
871
+ {
872
+ "epoch": 12.774834437086092,
873
+ "grad_norm": 9.97799301147461,
874
+ "learning_rate": 4.832222222222223e-05,
875
+ "loss": 1.0196,
876
+ "step": 651
877
+ },
878
+ {
879
+ "epoch": 12.913907284768213,
880
+ "grad_norm": 8.188615798950195,
881
+ "learning_rate": 4.824444444444445e-05,
882
+ "loss": 0.9324,
883
+ "step": 658
884
+ },
885
+ {
886
+ "epoch": 13.0,
887
+ "eval_accuracy": 0.7450819672131147,
888
+ "eval_f1_macro": 0.7381139110250362,
889
+ "eval_f1_micro": 0.7450819672131147,
890
+ "eval_f1_weighted": 0.742061789150014,
891
+ "eval_loss": 0.977480411529541,
892
+ "eval_precision_macro": 0.7810082972582972,
893
+ "eval_precision_micro": 0.7450819672131147,
894
+ "eval_precision_weighted": 0.7808100028386914,
895
+ "eval_recall_macro": 0.7389166666666667,
896
+ "eval_recall_micro": 0.7450819672131147,
897
+ "eval_recall_weighted": 0.7450819672131147,
898
+ "eval_runtime": 9.0225,
899
+ "eval_samples_per_second": 135.217,
900
+ "eval_steps_per_second": 2.217,
901
+ "step": 663
902
+ },
903
+ {
904
+ "epoch": 13.039735099337749,
905
+ "grad_norm": 7.6113362312316895,
906
+ "learning_rate": 4.8166666666666674e-05,
907
+ "loss": 0.8129,
908
+ "step": 665
909
+ },
910
+ {
911
+ "epoch": 13.178807947019868,
912
+ "grad_norm": 9.107025146484375,
913
+ "learning_rate": 4.808888888888889e-05,
914
+ "loss": 1.0174,
915
+ "step": 672
916
+ },
917
+ {
918
+ "epoch": 13.317880794701987,
919
+ "grad_norm": 8.571157455444336,
920
+ "learning_rate": 4.8011111111111114e-05,
921
+ "loss": 0.9077,
922
+ "step": 679
923
+ },
924
+ {
925
+ "epoch": 13.456953642384105,
926
+ "grad_norm": 8.005922317504883,
927
+ "learning_rate": 4.793333333333334e-05,
928
+ "loss": 0.8127,
929
+ "step": 686
930
+ },
931
+ {
932
+ "epoch": 13.596026490066226,
933
+ "grad_norm": 8.676533699035645,
934
+ "learning_rate": 4.785555555555556e-05,
935
+ "loss": 0.9146,
936
+ "step": 693
937
+ },
938
+ {
939
+ "epoch": 13.735099337748345,
940
+ "grad_norm": 8.648446083068848,
941
+ "learning_rate": 4.7777777777777784e-05,
942
+ "loss": 0.8411,
943
+ "step": 700
944
+ },
945
+ {
946
+ "epoch": 13.874172185430464,
947
+ "grad_norm": 8.4627046585083,
948
+ "learning_rate": 4.77e-05,
949
+ "loss": 0.8663,
950
+ "step": 707
951
+ },
952
+ {
953
+ "epoch": 14.0,
954
+ "grad_norm": 7.211985111236572,
955
+ "learning_rate": 4.7622222222222224e-05,
956
+ "loss": 0.8182,
957
+ "step": 714
958
+ },
959
+ {
960
+ "epoch": 14.0,
961
+ "eval_accuracy": 0.7426229508196721,
962
+ "eval_f1_macro": 0.7368765096272836,
963
+ "eval_f1_micro": 0.7426229508196721,
964
+ "eval_f1_weighted": 0.7399623246367397,
965
+ "eval_loss": 0.9406667351722717,
966
+ "eval_precision_macro": 0.7832181609240433,
967
+ "eval_precision_micro": 0.7426229508196721,
968
+ "eval_precision_weighted": 0.7804051459897651,
969
+ "eval_recall_macro": 0.7361904761904762,
970
+ "eval_recall_micro": 0.7426229508196721,
971
+ "eval_recall_weighted": 0.7426229508196721,
972
+ "eval_runtime": 9.371,
973
+ "eval_samples_per_second": 130.189,
974
+ "eval_steps_per_second": 2.134,
975
+ "step": 714
976
+ },
977
+ {
978
+ "epoch": 14.139072847682119,
979
+ "grad_norm": 9.872713088989258,
980
+ "learning_rate": 4.754444444444445e-05,
981
+ "loss": 0.8304,
982
+ "step": 721
983
+ },
984
+ {
985
+ "epoch": 14.278145695364238,
986
+ "grad_norm": 8.220512390136719,
987
+ "learning_rate": 4.746666666666667e-05,
988
+ "loss": 0.8467,
989
+ "step": 728
990
+ },
991
+ {
992
+ "epoch": 14.417218543046358,
993
+ "grad_norm": 7.127202987670898,
994
+ "learning_rate": 4.7388888888888894e-05,
995
+ "loss": 0.8545,
996
+ "step": 735
997
+ },
998
+ {
999
+ "epoch": 14.556291390728477,
1000
+ "grad_norm": 9.545476913452148,
1001
+ "learning_rate": 4.731111111111111e-05,
1002
+ "loss": 0.7786,
1003
+ "step": 742
1004
+ },
1005
+ {
1006
+ "epoch": 14.695364238410596,
1007
+ "grad_norm": 7.036871433258057,
1008
+ "learning_rate": 4.7233333333333334e-05,
1009
+ "loss": 0.8149,
1010
+ "step": 749
1011
+ },
1012
+ {
1013
+ "epoch": 14.834437086092715,
1014
+ "grad_norm": 9.99036979675293,
1015
+ "learning_rate": 4.715555555555556e-05,
1016
+ "loss": 0.882,
1017
+ "step": 756
1018
+ },
1019
+ {
1020
+ "epoch": 14.973509933774835,
1021
+ "grad_norm": 8.156625747680664,
1022
+ "learning_rate": 4.707777777777778e-05,
1023
+ "loss": 0.8521,
1024
+ "step": 763
1025
+ },
1026
+ {
1027
+ "epoch": 15.0,
1028
+ "eval_accuracy": 0.7368852459016394,
1029
+ "eval_f1_macro": 0.7359490527274584,
1030
+ "eval_f1_micro": 0.7368852459016394,
1031
+ "eval_f1_weighted": 0.7371425688386177,
1032
+ "eval_loss": 0.95683753490448,
1033
+ "eval_precision_macro": 0.7832754329004328,
1034
+ "eval_precision_micro": 0.7368852459016394,
1035
+ "eval_precision_weighted": 0.7825828543041657,
1036
+ "eval_recall_macro": 0.7337619047619047,
1037
+ "eval_recall_micro": 0.7368852459016394,
1038
+ "eval_recall_weighted": 0.7368852459016394,
1039
+ "eval_runtime": 8.6018,
1040
+ "eval_samples_per_second": 141.831,
1041
+ "eval_steps_per_second": 2.325,
1042
+ "step": 765
1043
+ },
1044
+ {
1045
+ "epoch": 15.099337748344372,
1046
+ "grad_norm": 9.90152645111084,
1047
+ "learning_rate": 4.7e-05,
1048
+ "loss": 0.7302,
1049
+ "step": 770
1050
+ },
1051
+ {
1052
+ "epoch": 15.23841059602649,
1053
+ "grad_norm": 8.24559211730957,
1054
+ "learning_rate": 4.692222222222222e-05,
1055
+ "loss": 0.73,
1056
+ "step": 777
1057
+ },
1058
+ {
1059
+ "epoch": 15.37748344370861,
1060
+ "grad_norm": 7.499877452850342,
1061
+ "learning_rate": 4.6844444444444444e-05,
1062
+ "loss": 0.6844,
1063
+ "step": 784
1064
+ },
1065
+ {
1066
+ "epoch": 15.516556291390728,
1067
+ "grad_norm": 7.696178913116455,
1068
+ "learning_rate": 4.676666666666667e-05,
1069
+ "loss": 0.7804,
1070
+ "step": 791
1071
+ },
1072
+ {
1073
+ "epoch": 15.655629139072847,
1074
+ "grad_norm": 7.795961856842041,
1075
+ "learning_rate": 4.668888888888889e-05,
1076
+ "loss": 0.7448,
1077
+ "step": 798
1078
+ },
1079
+ {
1080
+ "epoch": 15.794701986754967,
1081
+ "grad_norm": 8.67758560180664,
1082
+ "learning_rate": 4.6611111111111114e-05,
1083
+ "loss": 0.7165,
1084
+ "step": 805
1085
+ },
1086
+ {
1087
+ "epoch": 15.933774834437086,
1088
+ "grad_norm": 7.989274501800537,
1089
+ "learning_rate": 4.653333333333334e-05,
1090
+ "loss": 0.7982,
1091
+ "step": 812
1092
+ },
1093
+ {
1094
+ "epoch": 16.0,
1095
+ "eval_accuracy": 0.7573770491803279,
1096
+ "eval_f1_macro": 0.7503380093418792,
1097
+ "eval_f1_micro": 0.7573770491803279,
1098
+ "eval_f1_weighted": 0.7537269522379024,
1099
+ "eval_loss": 0.9051777720451355,
1100
+ "eval_precision_macro": 0.7920613553113554,
1101
+ "eval_precision_micro": 0.7573770491803279,
1102
+ "eval_precision_weighted": 0.7919793093973422,
1103
+ "eval_recall_macro": 0.7523154761904762,
1104
+ "eval_recall_micro": 0.7573770491803279,
1105
+ "eval_recall_weighted": 0.7573770491803279,
1106
+ "eval_runtime": 8.3886,
1107
+ "eval_samples_per_second": 145.435,
1108
+ "eval_steps_per_second": 2.384,
1109
+ "step": 816
1110
+ },
1111
+ {
1112
+ "epoch": 16.05960264900662,
1113
+ "grad_norm": 8.321952819824219,
1114
+ "learning_rate": 4.645555555555556e-05,
1115
+ "loss": 0.7606,
1116
+ "step": 819
1117
+ },
1118
+ {
1119
+ "epoch": 16.198675496688743,
1120
+ "grad_norm": 7.837481498718262,
1121
+ "learning_rate": 4.6377777777777784e-05,
1122
+ "loss": 0.6038,
1123
+ "step": 826
1124
+ },
1125
+ {
1126
+ "epoch": 16.337748344370862,
1127
+ "grad_norm": 7.198391437530518,
1128
+ "learning_rate": 4.630000000000001e-05,
1129
+ "loss": 0.6135,
1130
+ "step": 833
1131
+ },
1132
+ {
1133
+ "epoch": 16.47682119205298,
1134
+ "grad_norm": 6.980067253112793,
1135
+ "learning_rate": 4.6222222222222224e-05,
1136
+ "loss": 0.7087,
1137
+ "step": 840
1138
+ },
1139
+ {
1140
+ "epoch": 16.6158940397351,
1141
+ "grad_norm": 8.596502304077148,
1142
+ "learning_rate": 4.614444444444445e-05,
1143
+ "loss": 0.7511,
1144
+ "step": 847
1145
+ },
1146
+ {
1147
+ "epoch": 16.75496688741722,
1148
+ "grad_norm": 10.769916534423828,
1149
+ "learning_rate": 4.606666666666667e-05,
1150
+ "loss": 0.7704,
1151
+ "step": 854
1152
+ },
1153
+ {
1154
+ "epoch": 16.894039735099337,
1155
+ "grad_norm": 8.09610366821289,
1156
+ "learning_rate": 4.5988888888888894e-05,
1157
+ "loss": 0.6758,
1158
+ "step": 861
1159
+ },
1160
+ {
1161
+ "epoch": 17.0,
1162
+ "eval_accuracy": 0.7688524590163934,
1163
+ "eval_f1_macro": 0.7620661952426658,
1164
+ "eval_f1_micro": 0.7688524590163934,
1165
+ "eval_f1_weighted": 0.7661766579076126,
1166
+ "eval_loss": 0.8681604862213135,
1167
+ "eval_precision_macro": 0.8014163271369154,
1168
+ "eval_precision_micro": 0.7688524590163934,
1169
+ "eval_precision_weighted": 0.8034294296456784,
1170
+ "eval_recall_macro": 0.7639523809523809,
1171
+ "eval_recall_micro": 0.7688524590163934,
1172
+ "eval_recall_weighted": 0.7688524590163934,
1173
+ "eval_runtime": 8.0661,
1174
+ "eval_samples_per_second": 151.25,
1175
+ "eval_steps_per_second": 2.48,
1176
+ "step": 867
1177
+ },
1178
+ {
1179
+ "epoch": 17.019867549668874,
1180
+ "grad_norm": 6.455817222595215,
1181
+ "learning_rate": 4.591111111111112e-05,
1182
+ "loss": 0.6425,
1183
+ "step": 868
1184
+ },
1185
+ {
1186
+ "epoch": 17.158940397350992,
1187
+ "grad_norm": 6.898990631103516,
1188
+ "learning_rate": 4.5833333333333334e-05,
1189
+ "loss": 0.6026,
1190
+ "step": 875
1191
+ },
1192
+ {
1193
+ "epoch": 17.29801324503311,
1194
+ "grad_norm": 6.594276428222656,
1195
+ "learning_rate": 4.575555555555556e-05,
1196
+ "loss": 0.6881,
1197
+ "step": 882
1198
+ },
1199
+ {
1200
+ "epoch": 17.437086092715234,
1201
+ "grad_norm": 7.464595794677734,
1202
+ "learning_rate": 4.567777777777778e-05,
1203
+ "loss": 0.6886,
1204
+ "step": 889
1205
+ },
1206
+ {
1207
+ "epoch": 17.576158940397352,
1208
+ "grad_norm": 7.091211795806885,
1209
+ "learning_rate": 4.5600000000000004e-05,
1210
+ "loss": 0.6853,
1211
+ "step": 896
1212
+ },
1213
+ {
1214
+ "epoch": 17.71523178807947,
1215
+ "grad_norm": 9.127113342285156,
1216
+ "learning_rate": 4.552222222222222e-05,
1217
+ "loss": 0.6738,
1218
+ "step": 903
1219
+ },
1220
+ {
1221
+ "epoch": 17.85430463576159,
1222
+ "grad_norm": 10.013925552368164,
1223
+ "learning_rate": 4.5444444444444444e-05,
1224
+ "loss": 0.6987,
1225
+ "step": 910
1226
+ },
1227
+ {
1228
+ "epoch": 17.99337748344371,
1229
+ "grad_norm": 7.973466396331787,
1230
+ "learning_rate": 4.536666666666667e-05,
1231
+ "loss": 0.6198,
1232
+ "step": 917
1233
+ },
1234
+ {
1235
+ "epoch": 18.0,
1236
+ "eval_accuracy": 0.7778688524590164,
1237
+ "eval_f1_macro": 0.7727661896926602,
1238
+ "eval_f1_micro": 0.7778688524590164,
1239
+ "eval_f1_weighted": 0.7755600006443786,
1240
+ "eval_loss": 0.837512195110321,
1241
+ "eval_precision_macro": 0.8040597041847042,
1242
+ "eval_precision_micro": 0.7778688524590164,
1243
+ "eval_precision_weighted": 0.8045207638445343,
1244
+ "eval_recall_macro": 0.7733869047619047,
1245
+ "eval_recall_micro": 0.7778688524590164,
1246
+ "eval_recall_weighted": 0.7778688524590164,
1247
+ "eval_runtime": 7.9822,
1248
+ "eval_samples_per_second": 152.841,
1249
+ "eval_steps_per_second": 2.506,
1250
+ "step": 918
1251
+ },
1252
+ {
1253
+ "epoch": 18.119205298013245,
1254
+ "grad_norm": 8.20810317993164,
1255
+ "learning_rate": 4.528888888888889e-05,
1256
+ "loss": 0.5334,
1257
+ "step": 924
1258
+ },
1259
+ {
1260
+ "epoch": 18.258278145695364,
1261
+ "grad_norm": 6.411288738250732,
1262
+ "learning_rate": 4.5211111111111114e-05,
1263
+ "loss": 0.6188,
1264
+ "step": 931
1265
+ },
1266
+ {
1267
+ "epoch": 18.397350993377483,
1268
+ "grad_norm": 6.3168625831604,
1269
+ "learning_rate": 4.513333333333333e-05,
1270
+ "loss": 0.6689,
1271
+ "step": 938
1272
+ },
1273
+ {
1274
+ "epoch": 18.5364238410596,
1275
+ "grad_norm": 7.33069372177124,
1276
+ "learning_rate": 4.5055555555555554e-05,
1277
+ "loss": 0.5977,
1278
+ "step": 945
1279
+ },
1280
+ {
1281
+ "epoch": 18.67549668874172,
1282
+ "grad_norm": 8.643268585205078,
1283
+ "learning_rate": 4.497777777777778e-05,
1284
+ "loss": 0.6336,
1285
+ "step": 952
1286
+ },
1287
+ {
1288
+ "epoch": 18.814569536423843,
1289
+ "grad_norm": 7.422027587890625,
1290
+ "learning_rate": 4.49e-05,
1291
+ "loss": 0.5705,
1292
+ "step": 959
1293
+ },
1294
+ {
1295
+ "epoch": 18.95364238410596,
1296
+ "grad_norm": 9.680660247802734,
1297
+ "learning_rate": 4.4822222222222224e-05,
1298
+ "loss": 0.6472,
1299
+ "step": 966
1300
+ },
1301
+ {
1302
+ "epoch": 19.0,
1303
+ "eval_accuracy": 0.7754098360655738,
1304
+ "eval_f1_macro": 0.7680860346224123,
1305
+ "eval_f1_micro": 0.7754098360655738,
1306
+ "eval_f1_weighted": 0.7702894885556789,
1307
+ "eval_loss": 0.8460519313812256,
1308
+ "eval_precision_macro": 0.7924107142857142,
1309
+ "eval_precision_micro": 0.7754098360655738,
1310
+ "eval_precision_weighted": 0.7927615144418424,
1311
+ "eval_recall_macro": 0.7717797619047618,
1312
+ "eval_recall_micro": 0.7754098360655738,
1313
+ "eval_recall_weighted": 0.7754098360655738,
1314
+ "eval_runtime": 7.6728,
1315
+ "eval_samples_per_second": 159.003,
1316
+ "eval_steps_per_second": 2.607,
1317
+ "step": 969
1318
+ },
1319
+ {
1320
+ "epoch": 19.079470198675498,
1321
+ "grad_norm": 8.89242935180664,
1322
+ "learning_rate": 4.474444444444445e-05,
1323
+ "loss": 0.4945,
1324
+ "step": 973
1325
+ },
1326
+ {
1327
+ "epoch": 19.218543046357617,
1328
+ "grad_norm": 8.83127212524414,
1329
+ "learning_rate": 4.466666666666667e-05,
1330
+ "loss": 0.515,
1331
+ "step": 980
1332
+ },
1333
+ {
1334
+ "epoch": 19.357615894039736,
1335
+ "grad_norm": 9.815643310546875,
1336
+ "learning_rate": 4.4588888888888894e-05,
1337
+ "loss": 0.6046,
1338
+ "step": 987
1339
+ },
1340
+ {
1341
+ "epoch": 19.496688741721854,
1342
+ "grad_norm": 7.919642925262451,
1343
+ "learning_rate": 4.451111111111112e-05,
1344
+ "loss": 0.6668,
1345
+ "step": 994
1346
+ },
1347
+ {
1348
+ "epoch": 19.635761589403973,
1349
+ "grad_norm": 7.550244331359863,
1350
+ "learning_rate": 4.443333333333334e-05,
1351
+ "loss": 0.5545,
1352
+ "step": 1001
1353
+ },
1354
+ {
1355
+ "epoch": 19.774834437086092,
1356
+ "grad_norm": 8.549494743347168,
1357
+ "learning_rate": 4.435555555555556e-05,
1358
+ "loss": 0.5826,
1359
+ "step": 1008
1360
+ },
1361
+ {
1362
+ "epoch": 19.91390728476821,
1363
+ "grad_norm": 8.502252578735352,
1364
+ "learning_rate": 4.427777777777778e-05,
1365
+ "loss": 0.59,
1366
+ "step": 1015
1367
+ },
1368
+ {
1369
+ "epoch": 20.0,
1370
+ "eval_accuracy": 0.7704918032786885,
1371
+ "eval_f1_macro": 0.7659097554062726,
1372
+ "eval_f1_micro": 0.7704918032786885,
1373
+ "eval_f1_weighted": 0.7667846829728269,
1374
+ "eval_loss": 0.8479171991348267,
1375
+ "eval_precision_macro": 0.8060642135642135,
1376
+ "eval_precision_micro": 0.7704918032786885,
1377
+ "eval_precision_weighted": 0.8040887800723866,
1378
+ "eval_recall_macro": 0.7677142857142857,
1379
+ "eval_recall_micro": 0.7704918032786885,
1380
+ "eval_recall_weighted": 0.7704918032786885,
1381
+ "eval_runtime": 8.7673,
1382
+ "eval_samples_per_second": 139.154,
1383
+ "eval_steps_per_second": 2.281,
1384
+ "step": 1020
1385
+ },
1386
+ {
1387
+ "epoch": 20.039735099337747,
1388
+ "grad_norm": 7.236791133880615,
1389
+ "learning_rate": 4.4200000000000004e-05,
1390
+ "loss": 0.4936,
1391
+ "step": 1022
1392
+ },
1393
+ {
1394
+ "epoch": 20.178807947019866,
1395
+ "grad_norm": 8.718758583068848,
1396
+ "learning_rate": 4.412222222222223e-05,
1397
+ "loss": 0.5734,
1398
+ "step": 1029
1399
+ },
1400
+ {
1401
+ "epoch": 20.31788079470199,
1402
+ "grad_norm": 6.671544551849365,
1403
+ "learning_rate": 4.404444444444445e-05,
1404
+ "loss": 0.5834,
1405
+ "step": 1036
1406
+ },
1407
+ {
1408
+ "epoch": 20.456953642384107,
1409
+ "grad_norm": 6.937793731689453,
1410
+ "learning_rate": 4.396666666666667e-05,
1411
+ "loss": 0.5894,
1412
+ "step": 1043
1413
+ },
1414
+ {
1415
+ "epoch": 20.596026490066226,
1416
+ "grad_norm": 8.0573091506958,
1417
+ "learning_rate": 4.388888888888889e-05,
1418
+ "loss": 0.6071,
1419
+ "step": 1050
1420
+ },
1421
+ {
1422
+ "epoch": 20.735099337748345,
1423
+ "grad_norm": 6.314826965332031,
1424
+ "learning_rate": 4.3811111111111114e-05,
1425
+ "loss": 0.6011,
1426
+ "step": 1057
1427
+ },
1428
+ {
1429
+ "epoch": 20.874172185430464,
1430
+ "grad_norm": 6.716008186340332,
1431
+ "learning_rate": 4.373333333333334e-05,
1432
+ "loss": 0.6057,
1433
+ "step": 1064
1434
+ },
1435
+ {
1436
+ "epoch": 21.0,
1437
+ "grad_norm": 5.2278666496276855,
1438
+ "learning_rate": 4.3655555555555554e-05,
1439
+ "loss": 0.5018,
1440
+ "step": 1071
1441
+ },
1442
+ {
1443
+ "epoch": 21.0,
1444
+ "eval_accuracy": 0.7827868852459017,
1445
+ "eval_f1_macro": 0.7756399022374255,
1446
+ "eval_f1_micro": 0.7827868852459017,
1447
+ "eval_f1_weighted": 0.7791432066610973,
1448
+ "eval_loss": 0.7969875931739807,
1449
+ "eval_precision_macro": 0.8050563048063047,
1450
+ "eval_precision_micro": 0.7827868852459017,
1451
+ "eval_precision_weighted": 0.8058397818028965,
1452
+ "eval_recall_macro": 0.7773869047619048,
1453
+ "eval_recall_micro": 0.7827868852459017,
1454
+ "eval_recall_weighted": 0.7827868852459017,
1455
+ "eval_runtime": 8.0625,
1456
+ "eval_samples_per_second": 151.318,
1457
+ "eval_steps_per_second": 2.481,
1458
+ "step": 1071
1459
+ },
1460
+ {
1461
+ "epoch": 21.13907284768212,
1462
+ "grad_norm": 8.252735137939453,
1463
+ "learning_rate": 4.357777777777778e-05,
1464
+ "loss": 0.504,
1465
+ "step": 1078
1466
+ },
1467
+ {
1468
+ "epoch": 21.278145695364238,
1469
+ "grad_norm": 4.934821128845215,
1470
+ "learning_rate": 4.35e-05,
1471
+ "loss": 0.5043,
1472
+ "step": 1085
1473
+ },
1474
+ {
1475
+ "epoch": 21.417218543046356,
1476
+ "grad_norm": 8.361172676086426,
1477
+ "learning_rate": 4.3422222222222224e-05,
1478
+ "loss": 0.5401,
1479
+ "step": 1092
1480
+ },
1481
+ {
1482
+ "epoch": 21.556291390728475,
1483
+ "grad_norm": 8.164180755615234,
1484
+ "learning_rate": 4.334444444444445e-05,
1485
+ "loss": 0.556,
1486
+ "step": 1099
1487
+ },
1488
+ {
1489
+ "epoch": 21.695364238410598,
1490
+ "grad_norm": 8.75143814086914,
1491
+ "learning_rate": 4.3266666666666664e-05,
1492
+ "loss": 0.6266,
1493
+ "step": 1106
1494
+ },
1495
+ {
1496
+ "epoch": 21.834437086092716,
1497
+ "grad_norm": 9.500467300415039,
1498
+ "learning_rate": 4.318888888888889e-05,
1499
+ "loss": 0.497,
1500
+ "step": 1113
1501
+ },
1502
+ {
1503
+ "epoch": 21.973509933774835,
1504
+ "grad_norm": 8.917155265808105,
1505
+ "learning_rate": 4.311111111111111e-05,
1506
+ "loss": 0.5441,
1507
+ "step": 1120
1508
+ },
1509
+ {
1510
+ "epoch": 22.0,
1511
+ "eval_accuracy": 0.7836065573770492,
1512
+ "eval_f1_macro": 0.7740025198829379,
1513
+ "eval_f1_micro": 0.7836065573770492,
1514
+ "eval_f1_weighted": 0.776360052190446,
1515
+ "eval_loss": 0.815737247467041,
1516
+ "eval_precision_macro": 0.7979354256854256,
1517
+ "eval_precision_micro": 0.7836065573770492,
1518
+ "eval_precision_weighted": 0.7986912450027204,
1519
+ "eval_recall_macro": 0.7795238095238095,
1520
+ "eval_recall_micro": 0.7836065573770492,
1521
+ "eval_recall_weighted": 0.7836065573770492,
1522
+ "eval_runtime": 8.4975,
1523
+ "eval_samples_per_second": 143.572,
1524
+ "eval_steps_per_second": 2.354,
1525
+ "step": 1122
1526
+ },
1527
+ {
1528
+ "epoch": 22.09933774834437,
1529
+ "grad_norm": 7.294015884399414,
1530
+ "learning_rate": 4.3033333333333334e-05,
1531
+ "loss": 0.4939,
1532
+ "step": 1127
1533
+ },
1534
+ {
1535
+ "epoch": 22.23841059602649,
1536
+ "grad_norm": 5.934714317321777,
1537
+ "learning_rate": 4.295555555555556e-05,
1538
+ "loss": 0.5275,
1539
+ "step": 1134
1540
+ },
1541
+ {
1542
+ "epoch": 22.37748344370861,
1543
+ "grad_norm": 7.307860851287842,
1544
+ "learning_rate": 4.287777777777778e-05,
1545
+ "loss": 0.5805,
1546
+ "step": 1141
1547
+ },
1548
+ {
1549
+ "epoch": 22.516556291390728,
1550
+ "grad_norm": 6.267979621887207,
1551
+ "learning_rate": 4.2800000000000004e-05,
1552
+ "loss": 0.4981,
1553
+ "step": 1148
1554
+ },
1555
+ {
1556
+ "epoch": 22.655629139072847,
1557
+ "grad_norm": 9.028493881225586,
1558
+ "learning_rate": 4.272222222222223e-05,
1559
+ "loss": 0.5054,
1560
+ "step": 1155
1561
+ },
1562
+ {
1563
+ "epoch": 22.794701986754966,
1564
+ "grad_norm": 6.913817405700684,
1565
+ "learning_rate": 4.264444444444445e-05,
1566
+ "loss": 0.5445,
1567
+ "step": 1162
1568
+ },
1569
+ {
1570
+ "epoch": 22.933774834437084,
1571
+ "grad_norm": 5.680942058563232,
1572
+ "learning_rate": 4.2566666666666674e-05,
1573
+ "loss": 0.521,
1574
+ "step": 1169
1575
+ },
1576
+ {
1577
+ "epoch": 23.0,
1578
+ "eval_accuracy": 0.7729508196721312,
1579
+ "eval_f1_macro": 0.7663257041805803,
1580
+ "eval_f1_micro": 0.7729508196721312,
1581
+ "eval_f1_weighted": 0.7691782293636708,
1582
+ "eval_loss": 0.824141800403595,
1583
+ "eval_precision_macro": 0.7993881673881674,
1584
+ "eval_precision_micro": 0.7729508196721312,
1585
+ "eval_precision_weighted": 0.8004997870981478,
1586
+ "eval_recall_macro": 0.769375,
1587
+ "eval_recall_micro": 0.7729508196721312,
1588
+ "eval_recall_weighted": 0.7729508196721312,
1589
+ "eval_runtime": 8.204,
1590
+ "eval_samples_per_second": 148.708,
1591
+ "eval_steps_per_second": 2.438,
1592
+ "step": 1173
1593
+ },
1594
+ {
1595
+ "epoch": 23.05960264900662,
1596
+ "grad_norm": 10.308310508728027,
1597
+ "learning_rate": 4.248888888888889e-05,
1598
+ "loss": 0.5383,
1599
+ "step": 1176
1600
+ },
1601
+ {
1602
+ "epoch": 23.198675496688743,
1603
+ "grad_norm": 7.397233009338379,
1604
+ "learning_rate": 4.2411111111111114e-05,
1605
+ "loss": 0.4616,
1606
+ "step": 1183
1607
+ },
1608
+ {
1609
+ "epoch": 23.337748344370862,
1610
+ "grad_norm": 7.977766036987305,
1611
+ "learning_rate": 4.233333333333334e-05,
1612
+ "loss": 0.4552,
1613
+ "step": 1190
1614
+ },
1615
+ {
1616
+ "epoch": 23.47682119205298,
1617
+ "grad_norm": 7.122145175933838,
1618
+ "learning_rate": 4.225555555555556e-05,
1619
+ "loss": 0.5357,
1620
+ "step": 1197
1621
+ },
1622
+ {
1623
+ "epoch": 23.6158940397351,
1624
+ "grad_norm": 8.030558586120605,
1625
+ "learning_rate": 4.217777777777778e-05,
1626
+ "loss": 0.4766,
1627
+ "step": 1204
1628
+ },
1629
+ {
1630
+ "epoch": 23.75496688741722,
1631
+ "grad_norm": 6.913261890411377,
1632
+ "learning_rate": 4.21e-05,
1633
+ "loss": 0.4749,
1634
+ "step": 1211
1635
+ },
1636
+ {
1637
+ "epoch": 23.894039735099337,
1638
+ "grad_norm": 8.414008140563965,
1639
+ "learning_rate": 4.2022222222222223e-05,
1640
+ "loss": 0.6132,
1641
+ "step": 1218
1642
+ },
1643
+ {
1644
+ "epoch": 24.0,
1645
+ "eval_accuracy": 0.7745901639344263,
1646
+ "eval_f1_macro": 0.768003412566106,
1647
+ "eval_f1_micro": 0.7745901639344263,
1648
+ "eval_f1_weighted": 0.7714303129776662,
1649
+ "eval_loss": 0.820447564125061,
1650
+ "eval_precision_macro": 0.8005151637578107,
1651
+ "eval_precision_micro": 0.7745901639344263,
1652
+ "eval_precision_weighted": 0.8034235941118776,
1653
+ "eval_recall_macro": 0.7705833333333334,
1654
+ "eval_recall_micro": 0.7745901639344263,
1655
+ "eval_recall_weighted": 0.7745901639344263,
1656
+ "eval_runtime": 7.8325,
1657
+ "eval_samples_per_second": 155.762,
1658
+ "eval_steps_per_second": 2.553,
1659
+ "step": 1224
1660
+ },
1661
+ {
1662
+ "epoch": 24.019867549668874,
1663
+ "grad_norm": 7.957177639007568,
1664
+ "learning_rate": 4.194444444444445e-05,
1665
+ "loss": 0.4539,
1666
+ "step": 1225
1667
+ },
1668
+ {
1669
+ "epoch": 24.158940397350992,
1670
+ "grad_norm": 8.860937118530273,
1671
+ "learning_rate": 4.186666666666667e-05,
1672
+ "loss": 0.5149,
1673
+ "step": 1232
1674
+ },
1675
+ {
1676
+ "epoch": 24.29801324503311,
1677
+ "grad_norm": 7.115077495574951,
1678
+ "learning_rate": 4.178888888888889e-05,
1679
+ "loss": 0.4416,
1680
+ "step": 1239
1681
+ },
1682
+ {
1683
+ "epoch": 24.437086092715234,
1684
+ "grad_norm": 6.419886112213135,
1685
+ "learning_rate": 4.171111111111111e-05,
1686
+ "loss": 0.434,
1687
+ "step": 1246
1688
+ },
1689
+ {
1690
+ "epoch": 24.576158940397352,
1691
+ "grad_norm": 8.631376266479492,
1692
+ "learning_rate": 4.1633333333333333e-05,
1693
+ "loss": 0.4709,
1694
+ "step": 1253
1695
+ },
1696
+ {
1697
+ "epoch": 24.71523178807947,
1698
+ "grad_norm": 5.748884201049805,
1699
+ "learning_rate": 4.155555555555556e-05,
1700
+ "loss": 0.4158,
1701
+ "step": 1260
1702
+ },
1703
+ {
1704
+ "epoch": 24.85430463576159,
1705
+ "grad_norm": 6.351589202880859,
1706
+ "learning_rate": 4.147777777777778e-05,
1707
+ "loss": 0.4453,
1708
+ "step": 1267
1709
+ },
1710
+ {
1711
+ "epoch": 24.99337748344371,
1712
+ "grad_norm": 6.419159412384033,
1713
+ "learning_rate": 4.14e-05,
1714
+ "loss": 0.5556,
1715
+ "step": 1274
1716
+ },
1717
+ {
1718
+ "epoch": 25.0,
1719
+ "eval_accuracy": 0.7844262295081967,
1720
+ "eval_f1_macro": 0.7757715160656338,
1721
+ "eval_f1_micro": 0.7844262295081967,
1722
+ "eval_f1_weighted": 0.779143908165123,
1723
+ "eval_loss": 0.7968999147415161,
1724
+ "eval_precision_macro": 0.8028276290702762,
1725
+ "eval_precision_micro": 0.7844262295081967,
1726
+ "eval_precision_weighted": 0.8051517111678635,
1727
+ "eval_recall_macro": 0.7802916666666665,
1728
+ "eval_recall_micro": 0.7844262295081967,
1729
+ "eval_recall_weighted": 0.7844262295081967,
1730
+ "eval_runtime": 8.302,
1731
+ "eval_samples_per_second": 146.952,
1732
+ "eval_steps_per_second": 2.409,
1733
+ "step": 1275
1734
+ }
1735
+ ],
1736
+ "logging_steps": 7,
1737
+ "max_steps": 5000,
1738
+ "num_input_tokens_seen": 0,
1739
+ "num_train_epochs": 100,
1740
+ "save_steps": 7,
1741
+ "stateful_callbacks": {
1742
+ "EarlyStoppingCallback": {
1743
+ "args": {
1744
+ "early_stopping_patience": 5,
1745
+ "early_stopping_threshold": 0.01
1746
+ },
1747
+ "attributes": {
1748
+ "early_stopping_patience_counter": 4
1749
+ }
1750
+ },
1751
+ "TrainerControl": {
1752
+ "args": {
1753
+ "should_epoch_stop": false,
1754
+ "should_evaluate": false,
1755
+ "should_log": false,
1756
+ "should_save": true,
1757
+ "should_training_stop": false
1758
+ },
1759
+ "attributes": {}
1760
+ }
1761
+ },
1762
+ "total_flos": 3.0073437480489984e+18,
1763
+ "train_batch_size": 32,
1764
+ "trial_name": null,
1765
+ "trial_params": null
1766
+ }
checkpoint-1275/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4954f53b9b1b38b0121eed683aa922ddfa43b3acd0b7daa2f4e2a8f145eb1fd2
3
+ size 5304
config.json ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/swin-tiny-patch4-window7-224",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 6,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 96,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 768,
20
+ "id2label": {
21
+ "0": "acadian_flycatcher",
22
+ "1": "american_crow",
23
+ "2": "american_goldfinch",
24
+ "3": "american_pipit",
25
+ "4": "american_redstart",
26
+ "5": "american_three_toed_woodpecker",
27
+ "6": "anna_hummingbird",
28
+ "7": "artic_tern",
29
+ "8": "baird_sparrow",
30
+ "9": "baltimore_oriole",
31
+ "10": "bank_swallow",
32
+ "11": "barn_swallow",
33
+ "12": "bay_breasted_warbler",
34
+ "13": "belted_kingfisher",
35
+ "14": "bewick_wren",
36
+ "15": "black_and_white_warbler",
37
+ "16": "black_billed_cuckoo",
38
+ "17": "black_capped_vireo",
39
+ "18": "black_footed_albatross",
40
+ "19": "black_tern",
41
+ "20": "black_throated_blue_warbler",
42
+ "21": "black_throated_sparrow",
43
+ "22": "blue_grosbeak",
44
+ "23": "blue_headed_vireo",
45
+ "24": "blue_jay",
46
+ "25": "blue_winged_warbler",
47
+ "26": "boat_tailed_grackle",
48
+ "27": "bobolink",
49
+ "28": "bohemian_waxwing",
50
+ "29": "brandt_cormorant",
51
+ "30": "brewer_blackbird",
52
+ "31": "brewer_sparrow",
53
+ "32": "bronzed_cowbird",
54
+ "33": "brown_creeper",
55
+ "34": "brown_pelican",
56
+ "35": "brown_thrasher",
57
+ "36": "cactus_wren",
58
+ "37": "california_gull",
59
+ "38": "canada_warbler",
60
+ "39": "cape_glossy_starling",
61
+ "40": "cape_may_warbler",
62
+ "41": "cardinal",
63
+ "42": "carolina_wren",
64
+ "43": "caspian_tern",
65
+ "44": "cedar_waxwing",
66
+ "45": "cerulean_warbler",
67
+ "46": "chestnut_sided_warbler",
68
+ "47": "chipping_sparrow",
69
+ "48": "chuck_will_widow",
70
+ "49": "clark_nutcracker",
71
+ "50": "clay_colored_sparrow",
72
+ "51": "cliff_swallow",
73
+ "52": "common_raven",
74
+ "53": "common_tern",
75
+ "54": "common_yellowthroat",
76
+ "55": "crested_auklet",
77
+ "56": "dark_eyed_junco",
78
+ "57": "downy_woodpecker",
79
+ "58": "eared_grebe",
80
+ "59": "eastern_towhee",
81
+ "60": "elegant_tern",
82
+ "61": "european_goldfinch",
83
+ "62": "evening_grosbeak",
84
+ "63": "field_sparrow",
85
+ "64": "fish_crow",
86
+ "65": "florida_jay",
87
+ "66": "forsters_tern",
88
+ "67": "fox_sparrow",
89
+ "68": "frigatebird",
90
+ "69": "gadwall",
91
+ "70": "geococcyx",
92
+ "71": "glaucous_winged_gull",
93
+ "72": "golden_winged_warbler",
94
+ "73": "grasshopper_sparrow",
95
+ "74": "gray_catbird",
96
+ "75": "gray_crowned_rosy_finch",
97
+ "76": "gray_kingbird",
98
+ "77": "great_crested_flycatcher",
99
+ "78": "great_grey_shrike",
100
+ "79": "green_jay",
101
+ "80": "green_kingfisher",
102
+ "81": "green_tailed_towhee",
103
+ "82": "green_violetear",
104
+ "83": "groove_billed_ani",
105
+ "84": "harris_sparrow",
106
+ "85": "heermann_gull",
107
+ "86": "henslow_sparrow",
108
+ "87": "herring_gull",
109
+ "88": "hooded_merganser",
110
+ "89": "hooded_oriole",
111
+ "90": "hooded_warbler",
112
+ "91": "horned_grebe",
113
+ "92": "horned_lark",
114
+ "93": "horned_puffin",
115
+ "94": "house_sparrow",
116
+ "95": "house_wren",
117
+ "96": "indigo_bunting",
118
+ "97": "ivory_gull",
119
+ "98": "kentucky_warbler",
120
+ "99": "laysan_albatross",
121
+ "100": "lazuli_bunting",
122
+ "101": "le_conte_sparrow",
123
+ "102": "least_auklet",
124
+ "103": "least_flycatcher",
125
+ "104": "least_tern",
126
+ "105": "lincoln_sparrow",
127
+ "106": "loggerhead_shrike",
128
+ "107": "long_tailed_jaeger",
129
+ "108": "louisiana_waterthrush",
130
+ "109": "magnolia_warbler",
131
+ "110": "mallard",
132
+ "111": "mangrove_cuckoo",
133
+ "112": "marsh_wren",
134
+ "113": "mockingbird",
135
+ "114": "mourning_warbler",
136
+ "115": "myrtle_warbler",
137
+ "116": "nashville_warbler",
138
+ "117": "nelson_sharp_tailed_sparrow",
139
+ "118": "nighthawk",
140
+ "119": "northern_flicker",
141
+ "120": "northern_fulmar",
142
+ "121": "northern_waterthrush",
143
+ "122": "olive_sided_flycatcher",
144
+ "123": "orange_crowned_warbler",
145
+ "124": "orchard_oriole",
146
+ "125": "ovenbird",
147
+ "126": "pacific_loon",
148
+ "127": "painted_bunting",
149
+ "128": "palm_warbler",
150
+ "129": "parakeet_auklet",
151
+ "130": "pelagic_cormorant",
152
+ "131": "philadelphia_vireo",
153
+ "132": "pied_billed_grebe",
154
+ "133": "pied_kingfisher",
155
+ "134": "pigeon_guillemot",
156
+ "135": "pileated_woodpecker",
157
+ "136": "pine_grosbeak",
158
+ "137": "pine_warbler",
159
+ "138": "pomarine_jaeger",
160
+ "139": "prairie_warbler",
161
+ "140": "prothonotary_warbler",
162
+ "141": "purple_finch",
163
+ "142": "red_bellied_woodpecker",
164
+ "143": "red_breasted_merganser",
165
+ "144": "red_cockaded_woodpecker",
166
+ "145": "red_eyed_vireo",
167
+ "146": "red_faced_cormorant",
168
+ "147": "red_headed_woodpecker",
169
+ "148": "red_legged_kittiwake",
170
+ "149": "red_winged_blackbird",
171
+ "150": "rhinoceros_auklet",
172
+ "151": "ring_billed_gull",
173
+ "152": "ringed_kingfisher",
174
+ "153": "rock_wren",
175
+ "154": "rose_breasted_grosbeak",
176
+ "155": "ruby_throated_hummingbird",
177
+ "156": "rufous_hummingbird",
178
+ "157": "rusty_blackbird",
179
+ "158": "sage_thrasher",
180
+ "159": "savannah_sparrow",
181
+ "160": "sayornis",
182
+ "161": "scarlet_tanager",
183
+ "162": "scissor_tailed_flycatcher",
184
+ "163": "scott_oriole",
185
+ "164": "seaside_sparrow",
186
+ "165": "shiny_cowbird",
187
+ "166": "slaty_backed_gull",
188
+ "167": "song_sparrow",
189
+ "168": "sooty_albatross",
190
+ "169": "spotted_catbird",
191
+ "170": "summer_tanager",
192
+ "171": "swainson_warbler",
193
+ "172": "tennessee_warbler",
194
+ "173": "tree_sparrow",
195
+ "174": "tree_swallow",
196
+ "175": "tropical_kingbird",
197
+ "176": "vermilion_flycatcher",
198
+ "177": "vesper_sparrow",
199
+ "178": "warbling_vireo",
200
+ "179": "western_grebe",
201
+ "180": "western_gull",
202
+ "181": "western_meadowlark",
203
+ "182": "western_wood_pewee",
204
+ "183": "whip_poor_will",
205
+ "184": "white_breasted_kingfisher",
206
+ "185": "white_breasted_nuthatch",
207
+ "186": "white_crowned_sparrow",
208
+ "187": "white_eyed_vireo",
209
+ "188": "white_necked_raven",
210
+ "189": "white_pelican",
211
+ "190": "white_throated_sparrow",
212
+ "191": "wilson_warbler",
213
+ "192": "winter_wren",
214
+ "193": "worm_eating_warbler",
215
+ "194": "yellow_bellied_flycatcher",
216
+ "195": "yellow_billed_cuckoo",
217
+ "196": "yellow_breasted_chat",
218
+ "197": "yellow_headed_blackbird",
219
+ "198": "yellow_throated_vireo",
220
+ "199": "yellow_warbler"
221
+ },
222
+ "image_size": 224,
223
+ "initializer_range": 0.02,
224
+ "label2id": {
225
+ "acadian_flycatcher": 0,
226
+ "american_crow": 1,
227
+ "american_goldfinch": 2,
228
+ "american_pipit": 3,
229
+ "american_redstart": 4,
230
+ "american_three_toed_woodpecker": 5,
231
+ "anna_hummingbird": 6,
232
+ "artic_tern": 7,
233
+ "baird_sparrow": 8,
234
+ "baltimore_oriole": 9,
235
+ "bank_swallow": 10,
236
+ "barn_swallow": 11,
237
+ "bay_breasted_warbler": 12,
238
+ "belted_kingfisher": 13,
239
+ "bewick_wren": 14,
240
+ "black_and_white_warbler": 15,
241
+ "black_billed_cuckoo": 16,
242
+ "black_capped_vireo": 17,
243
+ "black_footed_albatross": 18,
244
+ "black_tern": 19,
245
+ "black_throated_blue_warbler": 20,
246
+ "black_throated_sparrow": 21,
247
+ "blue_grosbeak": 22,
248
+ "blue_headed_vireo": 23,
249
+ "blue_jay": 24,
250
+ "blue_winged_warbler": 25,
251
+ "boat_tailed_grackle": 26,
252
+ "bobolink": 27,
253
+ "bohemian_waxwing": 28,
254
+ "brandt_cormorant": 29,
255
+ "brewer_blackbird": 30,
256
+ "brewer_sparrow": 31,
257
+ "bronzed_cowbird": 32,
258
+ "brown_creeper": 33,
259
+ "brown_pelican": 34,
260
+ "brown_thrasher": 35,
261
+ "cactus_wren": 36,
262
+ "california_gull": 37,
263
+ "canada_warbler": 38,
264
+ "cape_glossy_starling": 39,
265
+ "cape_may_warbler": 40,
266
+ "cardinal": 41,
267
+ "carolina_wren": 42,
268
+ "caspian_tern": 43,
269
+ "cedar_waxwing": 44,
270
+ "cerulean_warbler": 45,
271
+ "chestnut_sided_warbler": 46,
272
+ "chipping_sparrow": 47,
273
+ "chuck_will_widow": 48,
274
+ "clark_nutcracker": 49,
275
+ "clay_colored_sparrow": 50,
276
+ "cliff_swallow": 51,
277
+ "common_raven": 52,
278
+ "common_tern": 53,
279
+ "common_yellowthroat": 54,
280
+ "crested_auklet": 55,
281
+ "dark_eyed_junco": 56,
282
+ "downy_woodpecker": 57,
283
+ "eared_grebe": 58,
284
+ "eastern_towhee": 59,
285
+ "elegant_tern": 60,
286
+ "european_goldfinch": 61,
287
+ "evening_grosbeak": 62,
288
+ "field_sparrow": 63,
289
+ "fish_crow": 64,
290
+ "florida_jay": 65,
291
+ "forsters_tern": 66,
292
+ "fox_sparrow": 67,
293
+ "frigatebird": 68,
294
+ "gadwall": 69,
295
+ "geococcyx": 70,
296
+ "glaucous_winged_gull": 71,
297
+ "golden_winged_warbler": 72,
298
+ "grasshopper_sparrow": 73,
299
+ "gray_catbird": 74,
300
+ "gray_crowned_rosy_finch": 75,
301
+ "gray_kingbird": 76,
302
+ "great_crested_flycatcher": 77,
303
+ "great_grey_shrike": 78,
304
+ "green_jay": 79,
305
+ "green_kingfisher": 80,
306
+ "green_tailed_towhee": 81,
307
+ "green_violetear": 82,
308
+ "groove_billed_ani": 83,
309
+ "harris_sparrow": 84,
310
+ "heermann_gull": 85,
311
+ "henslow_sparrow": 86,
312
+ "herring_gull": 87,
313
+ "hooded_merganser": 88,
314
+ "hooded_oriole": 89,
315
+ "hooded_warbler": 90,
316
+ "horned_grebe": 91,
317
+ "horned_lark": 92,
318
+ "horned_puffin": 93,
319
+ "house_sparrow": 94,
320
+ "house_wren": 95,
321
+ "indigo_bunting": 96,
322
+ "ivory_gull": 97,
323
+ "kentucky_warbler": 98,
324
+ "laysan_albatross": 99,
325
+ "lazuli_bunting": 100,
326
+ "le_conte_sparrow": 101,
327
+ "least_auklet": 102,
328
+ "least_flycatcher": 103,
329
+ "least_tern": 104,
330
+ "lincoln_sparrow": 105,
331
+ "loggerhead_shrike": 106,
332
+ "long_tailed_jaeger": 107,
333
+ "louisiana_waterthrush": 108,
334
+ "magnolia_warbler": 109,
335
+ "mallard": 110,
336
+ "mangrove_cuckoo": 111,
337
+ "marsh_wren": 112,
338
+ "mockingbird": 113,
339
+ "mourning_warbler": 114,
340
+ "myrtle_warbler": 115,
341
+ "nashville_warbler": 116,
342
+ "nelson_sharp_tailed_sparrow": 117,
343
+ "nighthawk": 118,
344
+ "northern_flicker": 119,
345
+ "northern_fulmar": 120,
346
+ "northern_waterthrush": 121,
347
+ "olive_sided_flycatcher": 122,
348
+ "orange_crowned_warbler": 123,
349
+ "orchard_oriole": 124,
350
+ "ovenbird": 125,
351
+ "pacific_loon": 126,
352
+ "painted_bunting": 127,
353
+ "palm_warbler": 128,
354
+ "parakeet_auklet": 129,
355
+ "pelagic_cormorant": 130,
356
+ "philadelphia_vireo": 131,
357
+ "pied_billed_grebe": 132,
358
+ "pied_kingfisher": 133,
359
+ "pigeon_guillemot": 134,
360
+ "pileated_woodpecker": 135,
361
+ "pine_grosbeak": 136,
362
+ "pine_warbler": 137,
363
+ "pomarine_jaeger": 138,
364
+ "prairie_warbler": 139,
365
+ "prothonotary_warbler": 140,
366
+ "purple_finch": 141,
367
+ "red_bellied_woodpecker": 142,
368
+ "red_breasted_merganser": 143,
369
+ "red_cockaded_woodpecker": 144,
370
+ "red_eyed_vireo": 145,
371
+ "red_faced_cormorant": 146,
372
+ "red_headed_woodpecker": 147,
373
+ "red_legged_kittiwake": 148,
374
+ "red_winged_blackbird": 149,
375
+ "rhinoceros_auklet": 150,
376
+ "ring_billed_gull": 151,
377
+ "ringed_kingfisher": 152,
378
+ "rock_wren": 153,
379
+ "rose_breasted_grosbeak": 154,
380
+ "ruby_throated_hummingbird": 155,
381
+ "rufous_hummingbird": 156,
382
+ "rusty_blackbird": 157,
383
+ "sage_thrasher": 158,
384
+ "savannah_sparrow": 159,
385
+ "sayornis": 160,
386
+ "scarlet_tanager": 161,
387
+ "scissor_tailed_flycatcher": 162,
388
+ "scott_oriole": 163,
389
+ "seaside_sparrow": 164,
390
+ "shiny_cowbird": 165,
391
+ "slaty_backed_gull": 166,
392
+ "song_sparrow": 167,
393
+ "sooty_albatross": 168,
394
+ "spotted_catbird": 169,
395
+ "summer_tanager": 170,
396
+ "swainson_warbler": 171,
397
+ "tennessee_warbler": 172,
398
+ "tree_sparrow": 173,
399
+ "tree_swallow": 174,
400
+ "tropical_kingbird": 175,
401
+ "vermilion_flycatcher": 176,
402
+ "vesper_sparrow": 177,
403
+ "warbling_vireo": 178,
404
+ "western_grebe": 179,
405
+ "western_gull": 180,
406
+ "western_meadowlark": 181,
407
+ "western_wood_pewee": 182,
408
+ "whip_poor_will": 183,
409
+ "white_breasted_kingfisher": 184,
410
+ "white_breasted_nuthatch": 185,
411
+ "white_crowned_sparrow": 186,
412
+ "white_eyed_vireo": 187,
413
+ "white_necked_raven": 188,
414
+ "white_pelican": 189,
415
+ "white_throated_sparrow": 190,
416
+ "wilson_warbler": 191,
417
+ "winter_wren": 192,
418
+ "worm_eating_warbler": 193,
419
+ "yellow_bellied_flycatcher": 194,
420
+ "yellow_billed_cuckoo": 195,
421
+ "yellow_breasted_chat": 196,
422
+ "yellow_headed_blackbird": 197,
423
+ "yellow_throated_vireo": 198,
424
+ "yellow_warbler": 199
425
+ },
426
+ "layer_norm_eps": 1e-05,
427
+ "mlp_ratio": 4.0,
428
+ "model_type": "swin",
429
+ "num_channels": 3,
430
+ "num_heads": [
431
+ 3,
432
+ 6,
433
+ 12,
434
+ 24
435
+ ],
436
+ "num_layers": 4,
437
+ "out_features": [
438
+ "stage4"
439
+ ],
440
+ "out_indices": [
441
+ 4
442
+ ],
443
+ "patch_size": 4,
444
+ "path_norm": true,
445
+ "problem_type": "single_label_classification",
446
+ "qkv_bias": true,
447
+ "stage_names": [
448
+ "stem",
449
+ "stage1",
450
+ "stage2",
451
+ "stage3",
452
+ "stage4"
453
+ ],
454
+ "torch_dtype": "float32",
455
+ "transformers_version": "4.48.0",
456
+ "use_absolute_embeddings": false,
457
+ "window_size": 7
458
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b381ba710a311f309e6b501293213b5910f400bcebc10f9a471a481931b1e2
3
+ size 110951928
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
runs/Oct22_10-31-28_dld-laptop/events.out.tfevents.1761100289.dld-laptop.33032.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200f6e2c2b2d3f29d6c667ef513a10c0688c6321b939ea1feeaaa8cc5d6c6031
3
+ size 78790
runs/Oct22_10-31-28_dld-laptop/events.out.tfevents.1761101940.dld-laptop.33032.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4719a81fd1d9bbad55b438a159db47df51dccf000209cd3985578ec19e4b17
3
+ size 921
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4954f53b9b1b38b0121eed683aa922ddfa43b3acd0b7daa2f4e2a8f145eb1fd2
3
+ size 5304
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "Model-Swin-Tiny-/autotrain-data",
3
+ "model": "microsoft/swin-tiny-patch4-window7-224",
4
+ "username": "local",
5
+ "lr": 0.00005,
6
+ "epochs": 100,
7
+ "batch_size": 32,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 3,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.01,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "logging_steps": -1,
18
+ "project_name": "Model-Swin-Tiny-",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "bf16",
21
+ "save_total_limit": 1,
22
+ "token": null,
23
+ "push_to_hub": true,
24
+ "eval_strategy": "epoch",
25
+ "image_column": "autotrain_image",
26
+ "target_column": "autotrain_label",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }