VinayHajare commited on
Commit
2f11503
·
verified ·
1 Parent(s): 719a382

doodle-dash

Browse files
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: apple/mobilevit-small
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: results
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # results
18
+
19
+ This model is a fine-tuned version of [apple/mobilevit-small](https://huggingface.co/apple/mobilevit-small) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.9863
22
+ - Accuracy: 0.7512
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 0.0008
42
+ - train_batch_size: 256
43
+ - eval_batch_size: 256
44
+ - seed: 42
45
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
+ - lr_scheduler_type: linear
47
+ - num_epochs: 5
48
+ - mixed_precision_training: Native AMP
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
+ |:-------------:|:------:|:-----:|:---------------:|:--------:|
54
+ | 1.4609 | 0.2844 | 5000 | 1.3989 | 0.6532 |
55
+ | 1.3211 | 0.5689 | 10000 | 1.2739 | 0.6803 |
56
+ | 1.2531 | 0.8533 | 15000 | 1.2132 | 0.6942 |
57
+ | 1.1875 | 1.1377 | 20000 | 1.1762 | 0.7041 |
58
+ | 1.157 | 1.4222 | 25000 | 1.1460 | 0.7111 |
59
+ | 1.144 | 1.7066 | 30000 | 1.1184 | 0.7163 |
60
+ | 1.1217 | 1.9910 | 35000 | 1.0880 | 0.7247 |
61
+ | 1.0831 | 2.2754 | 40000 | 1.0729 | 0.7280 |
62
+ | 1.0761 | 2.5599 | 45000 | 1.0593 | 0.7312 |
63
+ | 1.0565 | 2.8443 | 50000 | 1.0480 | 0.7346 |
64
+ | 1.0149 | 3.1287 | 55000 | 1.0356 | 0.7380 |
65
+ | 1.0102 | 3.4132 | 60000 | 1.0263 | 0.7401 |
66
+ | 1.0014 | 3.6976 | 65000 | 1.0122 | 0.7437 |
67
+ | 0.9972 | 3.9820 | 70000 | 1.0028 | 0.7459 |
68
+ | 0.9556 | 4.2665 | 75000 | 0.9971 | 0.7474 |
69
+ | 0.9606 | 4.5509 | 80000 | 0.9904 | 0.7496 |
70
+ | 0.9544 | 4.8353 | 85000 | 0.9842 | 0.7507 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - Transformers 4.57.3
76
+ - Pytorch 2.9.0+cu126
77
+ - Datasets 4.0.0
78
+ - Tokenizers 0.22.1
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.751164,
4
+ "eval_loss": 0.9863214492797852,
5
+ "eval_runtime": 104.5218,
6
+ "eval_samples_per_second": 2391.845,
7
+ "eval_steps_per_second": 9.347,
8
+ "total_flos": 5.4597447576e+17,
9
+ "train_loss": 1.1272559640920097,
10
+ "train_runtime": 10316.2205,
11
+ "train_samples_per_second": 2181.031,
12
+ "train_steps_per_second": 8.52
13
+ }
config.json ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MobileViTForImageClassification"
4
+ ],
5
+ "aspp_dropout_prob": 0.1,
6
+ "aspp_out_channels": 256,
7
+ "atrous_rates": [
8
+ 6,
9
+ 12,
10
+ 18
11
+ ],
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "classifier_dropout_prob": 0.1,
14
+ "conv_kernel_size": 3,
15
+ "dtype": "float32",
16
+ "expand_ratio": 4.0,
17
+ "hidden_act": "silu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_sizes": [
20
+ 144,
21
+ 192,
22
+ 240
23
+ ],
24
+ "id2label": {
25
+ "0": "aircraft carrier",
26
+ "1": "airplane",
27
+ "10": "asparagus",
28
+ "100": "dumbbell",
29
+ "101": "ear",
30
+ "102": "elbow",
31
+ "103": "elephant",
32
+ "104": "envelope",
33
+ "105": "eraser",
34
+ "106": "eye",
35
+ "107": "eyeglasses",
36
+ "108": "face",
37
+ "109": "fan",
38
+ "11": "axe",
39
+ "110": "feather",
40
+ "111": "fence",
41
+ "112": "finger",
42
+ "113": "fire hydrant",
43
+ "114": "fireplace",
44
+ "115": "firetruck",
45
+ "116": "fish",
46
+ "117": "flamingo",
47
+ "118": "flashlight",
48
+ "119": "flip flops",
49
+ "12": "backpack",
50
+ "120": "floor lamp",
51
+ "121": "flower",
52
+ "122": "flying saucer",
53
+ "123": "foot",
54
+ "124": "fork",
55
+ "125": "frog",
56
+ "126": "frying pan",
57
+ "127": "garden hose",
58
+ "128": "garden",
59
+ "129": "giraffe",
60
+ "13": "banana",
61
+ "130": "goatee",
62
+ "131": "golf club",
63
+ "132": "grapes",
64
+ "133": "grass",
65
+ "134": "guitar",
66
+ "135": "hamburger",
67
+ "136": "hammer",
68
+ "137": "hand",
69
+ "138": "harp",
70
+ "139": "hat",
71
+ "14": "bandage",
72
+ "140": "headphones",
73
+ "141": "hedgehog",
74
+ "142": "helicopter",
75
+ "143": "helmet",
76
+ "144": "hexagon",
77
+ "145": "hockey puck",
78
+ "146": "hockey stick",
79
+ "147": "horse",
80
+ "148": "hospital",
81
+ "149": "hot air balloon",
82
+ "15": "barn",
83
+ "150": "hot dog",
84
+ "151": "hot tub",
85
+ "152": "hourglass",
86
+ "153": "house plant",
87
+ "154": "house",
88
+ "155": "hurricane",
89
+ "156": "ice cream",
90
+ "157": "jacket",
91
+ "158": "jail",
92
+ "159": "kangaroo",
93
+ "16": "baseball bat",
94
+ "160": "key",
95
+ "161": "keyboard",
96
+ "162": "knee",
97
+ "163": "knife",
98
+ "164": "ladder",
99
+ "165": "lantern",
100
+ "166": "laptop",
101
+ "167": "leaf",
102
+ "168": "leg",
103
+ "169": "light bulb",
104
+ "17": "baseball",
105
+ "170": "lighter",
106
+ "171": "lighthouse",
107
+ "172": "lightning",
108
+ "173": "line",
109
+ "174": "lion",
110
+ "175": "lipstick",
111
+ "176": "lobster",
112
+ "177": "lollipop",
113
+ "178": "mailbox",
114
+ "179": "map",
115
+ "18": "basket",
116
+ "180": "marker",
117
+ "181": "matches",
118
+ "182": "megaphone",
119
+ "183": "mermaid",
120
+ "184": "microphone",
121
+ "185": "microwave",
122
+ "186": "monkey",
123
+ "187": "moon",
124
+ "188": "mosquito",
125
+ "189": "motorbike",
126
+ "19": "basketball",
127
+ "190": "mountain",
128
+ "191": "mouse",
129
+ "192": "moustache",
130
+ "193": "mouth",
131
+ "194": "mug",
132
+ "195": "mushroom",
133
+ "196": "nail",
134
+ "197": "necklace",
135
+ "198": "nose",
136
+ "199": "ocean",
137
+ "2": "alarm clock",
138
+ "20": "bat",
139
+ "200": "octagon",
140
+ "201": "octopus",
141
+ "202": "onion",
142
+ "203": "oven",
143
+ "204": "owl",
144
+ "205": "paint can",
145
+ "206": "paintbrush",
146
+ "207": "palm tree",
147
+ "208": "panda",
148
+ "209": "pants",
149
+ "21": "bathtub",
150
+ "210": "paper clip",
151
+ "211": "parachute",
152
+ "212": "parrot",
153
+ "213": "passport",
154
+ "214": "peanut",
155
+ "215": "pear",
156
+ "216": "peas",
157
+ "217": "pencil",
158
+ "218": "penguin",
159
+ "219": "piano",
160
+ "22": "beach",
161
+ "220": "pickup truck",
162
+ "221": "picture frame",
163
+ "222": "pig",
164
+ "223": "pillow",
165
+ "224": "pineapple",
166
+ "225": "pizza",
167
+ "226": "pliers",
168
+ "227": "police car",
169
+ "228": "pond",
170
+ "229": "pool",
171
+ "23": "bear",
172
+ "230": "popsicle",
173
+ "231": "postcard",
174
+ "232": "potato",
175
+ "233": "power outlet",
176
+ "234": "purse",
177
+ "235": "rabbit",
178
+ "236": "raccoon",
179
+ "237": "radio",
180
+ "238": "rain",
181
+ "239": "rainbow",
182
+ "24": "beard",
183
+ "240": "rake",
184
+ "241": "remote control",
185
+ "242": "rhinoceros",
186
+ "243": "rifle",
187
+ "244": "river",
188
+ "245": "roller coaster",
189
+ "246": "rollerskates",
190
+ "247": "sailboat",
191
+ "248": "sandwich",
192
+ "249": "saw",
193
+ "25": "bed",
194
+ "250": "saxophone",
195
+ "251": "school bus",
196
+ "252": "scissors",
197
+ "253": "scorpion",
198
+ "254": "screwdriver",
199
+ "255": "sea turtle",
200
+ "256": "see saw",
201
+ "257": "shark",
202
+ "258": "sheep",
203
+ "259": "shoe",
204
+ "26": "bee",
205
+ "260": "shorts",
206
+ "261": "shovel",
207
+ "262": "sink",
208
+ "263": "skateboard",
209
+ "264": "skull",
210
+ "265": "skyscraper",
211
+ "266": "sleeping bag",
212
+ "267": "smiley face",
213
+ "268": "snail",
214
+ "269": "snake",
215
+ "27": "belt",
216
+ "270": "snorkel",
217
+ "271": "snowflake",
218
+ "272": "snowman",
219
+ "273": "soccer ball",
220
+ "274": "sock",
221
+ "275": "speedboat",
222
+ "276": "spider",
223
+ "277": "spoon",
224
+ "278": "spreadsheet",
225
+ "279": "square",
226
+ "28": "bench",
227
+ "280": "squiggle",
228
+ "281": "squirrel",
229
+ "282": "stairs",
230
+ "283": "star",
231
+ "284": "steak",
232
+ "285": "stereo",
233
+ "286": "stethoscope",
234
+ "287": "stitches",
235
+ "288": "stop sign",
236
+ "289": "stove",
237
+ "29": "bicycle",
238
+ "290": "strawberry",
239
+ "291": "streetlight",
240
+ "292": "string bean",
241
+ "293": "submarine",
242
+ "294": "suitcase",
243
+ "295": "sun",
244
+ "296": "swan",
245
+ "297": "sweater",
246
+ "298": "swing set",
247
+ "299": "sword",
248
+ "3": "ambulance",
249
+ "30": "binoculars",
250
+ "300": "syringe",
251
+ "301": "t-shirt",
252
+ "302": "table",
253
+ "303": "teapot",
254
+ "304": "teddy-bear",
255
+ "305": "telephone",
256
+ "306": "television",
257
+ "307": "tennis racquet",
258
+ "308": "tent",
259
+ "309": "The Eiffel Tower",
260
+ "31": "bird",
261
+ "310": "The Great Wall of China",
262
+ "311": "The Mona Lisa",
263
+ "312": "tiger",
264
+ "313": "toaster",
265
+ "314": "toe",
266
+ "315": "toilet",
267
+ "316": "tooth",
268
+ "317": "toothbrush",
269
+ "318": "toothpaste",
270
+ "319": "tornado",
271
+ "32": "birthday cake",
272
+ "320": "tractor",
273
+ "321": "traffic light",
274
+ "322": "train",
275
+ "323": "tree",
276
+ "324": "triangle",
277
+ "325": "trombone",
278
+ "326": "truck",
279
+ "327": "trumpet",
280
+ "328": "umbrella",
281
+ "329": "underwear",
282
+ "33": "blackberry",
283
+ "330": "van",
284
+ "331": "vase",
285
+ "332": "violin",
286
+ "333": "washing machine",
287
+ "334": "watermelon",
288
+ "335": "waterslide",
289
+ "336": "whale",
290
+ "337": "wheel",
291
+ "338": "windmill",
292
+ "339": "wine bottle",
293
+ "34": "blueberry",
294
+ "340": "wine glass",
295
+ "341": "wristwatch",
296
+ "342": "yoga",
297
+ "343": "zebra",
298
+ "344": "zigzag",
299
+ "35": "book",
300
+ "36": "boomerang",
301
+ "37": "bottlecap",
302
+ "38": "bowtie",
303
+ "39": "bracelet",
304
+ "4": "angel",
305
+ "40": "brain",
306
+ "41": "bread",
307
+ "42": "bridge",
308
+ "43": "broccoli",
309
+ "44": "broom",
310
+ "45": "bucket",
311
+ "46": "bulldozer",
312
+ "47": "bus",
313
+ "48": "bush",
314
+ "49": "butterfly",
315
+ "5": "animal migration",
316
+ "50": "cactus",
317
+ "51": "cake",
318
+ "52": "calculator",
319
+ "53": "calendar",
320
+ "54": "camel",
321
+ "55": "camera",
322
+ "56": "camouflage",
323
+ "57": "campfire",
324
+ "58": "candle",
325
+ "59": "cannon",
326
+ "6": "ant",
327
+ "60": "canoe",
328
+ "61": "car",
329
+ "62": "carrot",
330
+ "63": "castle",
331
+ "64": "cat",
332
+ "65": "ceiling fan",
333
+ "66": "cell phone",
334
+ "67": "cello",
335
+ "68": "chair",
336
+ "69": "chandelier",
337
+ "7": "anvil",
338
+ "70": "church",
339
+ "71": "circle",
340
+ "72": "clarinet",
341
+ "73": "clock",
342
+ "74": "cloud",
343
+ "75": "coffee cup",
344
+ "76": "compass",
345
+ "77": "computer",
346
+ "78": "cookie",
347
+ "79": "cooler",
348
+ "8": "apple",
349
+ "80": "couch",
350
+ "81": "cow",
351
+ "82": "crab",
352
+ "83": "crayon",
353
+ "84": "crocodile",
354
+ "85": "crown",
355
+ "86": "cruise ship",
356
+ "87": "cup",
357
+ "88": "diamond",
358
+ "89": "dishwasher",
359
+ "9": "arm",
360
+ "90": "diving board",
361
+ "91": "dog",
362
+ "92": "dolphin",
363
+ "93": "donut",
364
+ "94": "door",
365
+ "95": "dragon",
366
+ "96": "dresser",
367
+ "97": "drill",
368
+ "98": "drums",
369
+ "99": "duck"
370
+ },
371
+ "image_size": 28,
372
+ "initializer_range": 0.02,
373
+ "label2id": {
374
+ "The Eiffel Tower": "309",
375
+ "The Great Wall of China": "310",
376
+ "The Mona Lisa": "311",
377
+ "aircraft carrier": "0",
378
+ "airplane": "1",
379
+ "alarm clock": "2",
380
+ "ambulance": "3",
381
+ "angel": "4",
382
+ "animal migration": "5",
383
+ "ant": "6",
384
+ "anvil": "7",
385
+ "apple": "8",
386
+ "arm": "9",
387
+ "asparagus": "10",
388
+ "axe": "11",
389
+ "backpack": "12",
390
+ "banana": "13",
391
+ "bandage": "14",
392
+ "barn": "15",
393
+ "baseball": "17",
394
+ "baseball bat": "16",
395
+ "basket": "18",
396
+ "basketball": "19",
397
+ "bat": "20",
398
+ "bathtub": "21",
399
+ "beach": "22",
400
+ "bear": "23",
401
+ "beard": "24",
402
+ "bed": "25",
403
+ "bee": "26",
404
+ "belt": "27",
405
+ "bench": "28",
406
+ "bicycle": "29",
407
+ "binoculars": "30",
408
+ "bird": "31",
409
+ "birthday cake": "32",
410
+ "blackberry": "33",
411
+ "blueberry": "34",
412
+ "book": "35",
413
+ "boomerang": "36",
414
+ "bottlecap": "37",
415
+ "bowtie": "38",
416
+ "bracelet": "39",
417
+ "brain": "40",
418
+ "bread": "41",
419
+ "bridge": "42",
420
+ "broccoli": "43",
421
+ "broom": "44",
422
+ "bucket": "45",
423
+ "bulldozer": "46",
424
+ "bus": "47",
425
+ "bush": "48",
426
+ "butterfly": "49",
427
+ "cactus": "50",
428
+ "cake": "51",
429
+ "calculator": "52",
430
+ "calendar": "53",
431
+ "camel": "54",
432
+ "camera": "55",
433
+ "camouflage": "56",
434
+ "campfire": "57",
435
+ "candle": "58",
436
+ "cannon": "59",
437
+ "canoe": "60",
438
+ "car": "61",
439
+ "carrot": "62",
440
+ "castle": "63",
441
+ "cat": "64",
442
+ "ceiling fan": "65",
443
+ "cell phone": "66",
444
+ "cello": "67",
445
+ "chair": "68",
446
+ "chandelier": "69",
447
+ "church": "70",
448
+ "circle": "71",
449
+ "clarinet": "72",
450
+ "clock": "73",
451
+ "cloud": "74",
452
+ "coffee cup": "75",
453
+ "compass": "76",
454
+ "computer": "77",
455
+ "cookie": "78",
456
+ "cooler": "79",
457
+ "couch": "80",
458
+ "cow": "81",
459
+ "crab": "82",
460
+ "crayon": "83",
461
+ "crocodile": "84",
462
+ "crown": "85",
463
+ "cruise ship": "86",
464
+ "cup": "87",
465
+ "diamond": "88",
466
+ "dishwasher": "89",
467
+ "diving board": "90",
468
+ "dog": "91",
469
+ "dolphin": "92",
470
+ "donut": "93",
471
+ "door": "94",
472
+ "dragon": "95",
473
+ "dresser": "96",
474
+ "drill": "97",
475
+ "drums": "98",
476
+ "duck": "99",
477
+ "dumbbell": "100",
478
+ "ear": "101",
479
+ "elbow": "102",
480
+ "elephant": "103",
481
+ "envelope": "104",
482
+ "eraser": "105",
483
+ "eye": "106",
484
+ "eyeglasses": "107",
485
+ "face": "108",
486
+ "fan": "109",
487
+ "feather": "110",
488
+ "fence": "111",
489
+ "finger": "112",
490
+ "fire hydrant": "113",
491
+ "fireplace": "114",
492
+ "firetruck": "115",
493
+ "fish": "116",
494
+ "flamingo": "117",
495
+ "flashlight": "118",
496
+ "flip flops": "119",
497
+ "floor lamp": "120",
498
+ "flower": "121",
499
+ "flying saucer": "122",
500
+ "foot": "123",
501
+ "fork": "124",
502
+ "frog": "125",
503
+ "frying pan": "126",
504
+ "garden": "128",
505
+ "garden hose": "127",
506
+ "giraffe": "129",
507
+ "goatee": "130",
508
+ "golf club": "131",
509
+ "grapes": "132",
510
+ "grass": "133",
511
+ "guitar": "134",
512
+ "hamburger": "135",
513
+ "hammer": "136",
514
+ "hand": "137",
515
+ "harp": "138",
516
+ "hat": "139",
517
+ "headphones": "140",
518
+ "hedgehog": "141",
519
+ "helicopter": "142",
520
+ "helmet": "143",
521
+ "hexagon": "144",
522
+ "hockey puck": "145",
523
+ "hockey stick": "146",
524
+ "horse": "147",
525
+ "hospital": "148",
526
+ "hot air balloon": "149",
527
+ "hot dog": "150",
528
+ "hot tub": "151",
529
+ "hourglass": "152",
530
+ "house": "154",
531
+ "house plant": "153",
532
+ "hurricane": "155",
533
+ "ice cream": "156",
534
+ "jacket": "157",
535
+ "jail": "158",
536
+ "kangaroo": "159",
537
+ "key": "160",
538
+ "keyboard": "161",
539
+ "knee": "162",
540
+ "knife": "163",
541
+ "ladder": "164",
542
+ "lantern": "165",
543
+ "laptop": "166",
544
+ "leaf": "167",
545
+ "leg": "168",
546
+ "light bulb": "169",
547
+ "lighter": "170",
548
+ "lighthouse": "171",
549
+ "lightning": "172",
550
+ "line": "173",
551
+ "lion": "174",
552
+ "lipstick": "175",
553
+ "lobster": "176",
554
+ "lollipop": "177",
555
+ "mailbox": "178",
556
+ "map": "179",
557
+ "marker": "180",
558
+ "matches": "181",
559
+ "megaphone": "182",
560
+ "mermaid": "183",
561
+ "microphone": "184",
562
+ "microwave": "185",
563
+ "monkey": "186",
564
+ "moon": "187",
565
+ "mosquito": "188",
566
+ "motorbike": "189",
567
+ "mountain": "190",
568
+ "mouse": "191",
569
+ "moustache": "192",
570
+ "mouth": "193",
571
+ "mug": "194",
572
+ "mushroom": "195",
573
+ "nail": "196",
574
+ "necklace": "197",
575
+ "nose": "198",
576
+ "ocean": "199",
577
+ "octagon": "200",
578
+ "octopus": "201",
579
+ "onion": "202",
580
+ "oven": "203",
581
+ "owl": "204",
582
+ "paint can": "205",
583
+ "paintbrush": "206",
584
+ "palm tree": "207",
585
+ "panda": "208",
586
+ "pants": "209",
587
+ "paper clip": "210",
588
+ "parachute": "211",
589
+ "parrot": "212",
590
+ "passport": "213",
591
+ "peanut": "214",
592
+ "pear": "215",
593
+ "peas": "216",
594
+ "pencil": "217",
595
+ "penguin": "218",
596
+ "piano": "219",
597
+ "pickup truck": "220",
598
+ "picture frame": "221",
599
+ "pig": "222",
600
+ "pillow": "223",
601
+ "pineapple": "224",
602
+ "pizza": "225",
603
+ "pliers": "226",
604
+ "police car": "227",
605
+ "pond": "228",
606
+ "pool": "229",
607
+ "popsicle": "230",
608
+ "postcard": "231",
609
+ "potato": "232",
610
+ "power outlet": "233",
611
+ "purse": "234",
612
+ "rabbit": "235",
613
+ "raccoon": "236",
614
+ "radio": "237",
615
+ "rain": "238",
616
+ "rainbow": "239",
617
+ "rake": "240",
618
+ "remote control": "241",
619
+ "rhinoceros": "242",
620
+ "rifle": "243",
621
+ "river": "244",
622
+ "roller coaster": "245",
623
+ "rollerskates": "246",
624
+ "sailboat": "247",
625
+ "sandwich": "248",
626
+ "saw": "249",
627
+ "saxophone": "250",
628
+ "school bus": "251",
629
+ "scissors": "252",
630
+ "scorpion": "253",
631
+ "screwdriver": "254",
632
+ "sea turtle": "255",
633
+ "see saw": "256",
634
+ "shark": "257",
635
+ "sheep": "258",
636
+ "shoe": "259",
637
+ "shorts": "260",
638
+ "shovel": "261",
639
+ "sink": "262",
640
+ "skateboard": "263",
641
+ "skull": "264",
642
+ "skyscraper": "265",
643
+ "sleeping bag": "266",
644
+ "smiley face": "267",
645
+ "snail": "268",
646
+ "snake": "269",
647
+ "snorkel": "270",
648
+ "snowflake": "271",
649
+ "snowman": "272",
650
+ "soccer ball": "273",
651
+ "sock": "274",
652
+ "speedboat": "275",
653
+ "spider": "276",
654
+ "spoon": "277",
655
+ "spreadsheet": "278",
656
+ "square": "279",
657
+ "squiggle": "280",
658
+ "squirrel": "281",
659
+ "stairs": "282",
660
+ "star": "283",
661
+ "steak": "284",
662
+ "stereo": "285",
663
+ "stethoscope": "286",
664
+ "stitches": "287",
665
+ "stop sign": "288",
666
+ "stove": "289",
667
+ "strawberry": "290",
668
+ "streetlight": "291",
669
+ "string bean": "292",
670
+ "submarine": "293",
671
+ "suitcase": "294",
672
+ "sun": "295",
673
+ "swan": "296",
674
+ "sweater": "297",
675
+ "swing set": "298",
676
+ "sword": "299",
677
+ "syringe": "300",
678
+ "t-shirt": "301",
679
+ "table": "302",
680
+ "teapot": "303",
681
+ "teddy-bear": "304",
682
+ "telephone": "305",
683
+ "television": "306",
684
+ "tennis racquet": "307",
685
+ "tent": "308",
686
+ "tiger": "312",
687
+ "toaster": "313",
688
+ "toe": "314",
689
+ "toilet": "315",
690
+ "tooth": "316",
691
+ "toothbrush": "317",
692
+ "toothpaste": "318",
693
+ "tornado": "319",
694
+ "tractor": "320",
695
+ "traffic light": "321",
696
+ "train": "322",
697
+ "tree": "323",
698
+ "triangle": "324",
699
+ "trombone": "325",
700
+ "truck": "326",
701
+ "trumpet": "327",
702
+ "umbrella": "328",
703
+ "underwear": "329",
704
+ "van": "330",
705
+ "vase": "331",
706
+ "violin": "332",
707
+ "washing machine": "333",
708
+ "watermelon": "334",
709
+ "waterslide": "335",
710
+ "whale": "336",
711
+ "wheel": "337",
712
+ "windmill": "338",
713
+ "wine bottle": "339",
714
+ "wine glass": "340",
715
+ "wristwatch": "341",
716
+ "yoga": "342",
717
+ "zebra": "343",
718
+ "zigzag": "344"
719
+ },
720
+ "layer_norm_eps": 1e-05,
721
+ "mlp_ratio": 2.0,
722
+ "model_type": "mobilevit",
723
+ "neck_hidden_sizes": [
724
+ 16,
725
+ 32,
726
+ 64,
727
+ 96,
728
+ 128,
729
+ 160,
730
+ 640
731
+ ],
732
+ "num_attention_heads": 4,
733
+ "num_channels": 1,
734
+ "output_stride": 32,
735
+ "patch_size": 2,
736
+ "problem_type": "single_label_classification",
737
+ "qkv_bias": true,
738
+ "semantic_loss_ignore_index": 255,
739
+ "transformers_version": "4.57.3"
740
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eeb0411c28598c38c3b093f41626f73884a0e0c80556bd8d0490abb4bf09f03
3
+ size 20730036
preprocessor_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 28,
4
+ "width": 28
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": false,
8
+ "do_flip_channel_order": false,
9
+ "do_reduce_labels": false,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "image_processor_type": "MobileViTImageProcessor",
13
+ "resample": 2,
14
+ "rescale_factor": 0.00392156862745098,
15
+ "size": {
16
+ "shortest_edge": 28
17
+ }
18
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.751164,
4
+ "eval_loss": 0.9863214492797852,
5
+ "eval_runtime": 104.5218,
6
+ "eval_samples_per_second": 2391.845,
7
+ "eval_steps_per_second": 9.347
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 5.4597447576e+17,
4
+ "train_loss": 1.1272559640920097,
5
+ "train_runtime": 10316.2205,
6
+ "train_samples_per_second": 2181.031,
7
+ "train_steps_per_second": 8.52
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,805 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
+ "eval_steps": 5000,
7
+ "global_step": 87895,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.05688605722737357,
14
+ "grad_norm": 2.3711910247802734,
15
+ "learning_rate": 0.0007909073326127766,
16
+ "loss": 2.6366,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 0.11377211445474714,
21
+ "grad_norm": 2.2273147106170654,
22
+ "learning_rate": 0.0007818055634563969,
23
+ "loss": 1.7361,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.17065817168212072,
28
+ "grad_norm": 2.9114110469818115,
29
+ "learning_rate": 0.000772703794300017,
30
+ "loss": 1.5903,
31
+ "step": 3000
32
+ },
33
+ {
34
+ "epoch": 0.22754422890949427,
35
+ "grad_norm": 1.7726603746414185,
36
+ "learning_rate": 0.0007636020251436373,
37
+ "loss": 1.5127,
38
+ "step": 4000
39
+ },
40
+ {
41
+ "epoch": 0.2844302861368678,
42
+ "grad_norm": 1.8174991607666016,
43
+ "learning_rate": 0.0007545002559872575,
44
+ "loss": 1.4609,
45
+ "step": 5000
46
+ },
47
+ {
48
+ "epoch": 0.2844302861368678,
49
+ "eval_accuracy": 0.653196,
50
+ "eval_loss": 1.3989018201828003,
51
+ "eval_runtime": 65.7885,
52
+ "eval_samples_per_second": 3800.055,
53
+ "eval_steps_per_second": 14.851,
54
+ "step": 5000
55
+ },
56
+ {
57
+ "epoch": 0.34131634336424144,
58
+ "grad_norm": 1.7002882957458496,
59
+ "learning_rate": 0.0007453984868308778,
60
+ "loss": 1.4214,
61
+ "step": 6000
62
+ },
63
+ {
64
+ "epoch": 0.398202400591615,
65
+ "grad_norm": 1.6060094833374023,
66
+ "learning_rate": 0.0007362967176744981,
67
+ "loss": 1.3803,
68
+ "step": 7000
69
+ },
70
+ {
71
+ "epoch": 0.45508845781898855,
72
+ "grad_norm": 2.100240468978882,
73
+ "learning_rate": 0.0007271949485181182,
74
+ "loss": 1.358,
75
+ "step": 8000
76
+ },
77
+ {
78
+ "epoch": 0.5119745150463622,
79
+ "grad_norm": 1.507076621055603,
80
+ "learning_rate": 0.0007180931793617385,
81
+ "loss": 1.3392,
82
+ "step": 9000
83
+ },
84
+ {
85
+ "epoch": 0.5688605722737357,
86
+ "grad_norm": 1.8028790950775146,
87
+ "learning_rate": 0.0007089914102053587,
88
+ "loss": 1.3211,
89
+ "step": 10000
90
+ },
91
+ {
92
+ "epoch": 0.5688605722737357,
93
+ "eval_accuracy": 0.680348,
94
+ "eval_loss": 1.2739007472991943,
95
+ "eval_runtime": 64.9042,
96
+ "eval_samples_per_second": 3851.83,
97
+ "eval_steps_per_second": 15.053,
98
+ "step": 10000
99
+ },
100
+ {
101
+ "epoch": 0.6257466295011093,
102
+ "grad_norm": 1.699574589729309,
103
+ "learning_rate": 0.000699889641048979,
104
+ "loss": 1.3131,
105
+ "step": 11000
106
+ },
107
+ {
108
+ "epoch": 0.6826326867284829,
109
+ "grad_norm": 1.6491554975509644,
110
+ "learning_rate": 0.0006907878718925991,
111
+ "loss": 1.2837,
112
+ "step": 12000
113
+ },
114
+ {
115
+ "epoch": 0.7395187439558564,
116
+ "grad_norm": 1.8563138246536255,
117
+ "learning_rate": 0.0006816861027362194,
118
+ "loss": 1.276,
119
+ "step": 13000
120
+ },
121
+ {
122
+ "epoch": 0.79640480118323,
123
+ "grad_norm": 1.5511844158172607,
124
+ "learning_rate": 0.0006725843335798396,
125
+ "loss": 1.2678,
126
+ "step": 14000
127
+ },
128
+ {
129
+ "epoch": 0.8532908584106036,
130
+ "grad_norm": 1.3686333894729614,
131
+ "learning_rate": 0.0006634825644234599,
132
+ "loss": 1.2531,
133
+ "step": 15000
134
+ },
135
+ {
136
+ "epoch": 0.8532908584106036,
137
+ "eval_accuracy": 0.694232,
138
+ "eval_loss": 1.2132482528686523,
139
+ "eval_runtime": 64.8716,
140
+ "eval_samples_per_second": 3853.765,
141
+ "eval_steps_per_second": 15.061,
142
+ "step": 15000
143
+ },
144
+ {
145
+ "epoch": 0.9101769156379771,
146
+ "grad_norm": 1.958629846572876,
147
+ "learning_rate": 0.00065438079526708,
148
+ "loss": 1.2457,
149
+ "step": 16000
150
+ },
151
+ {
152
+ "epoch": 0.9670629728653507,
153
+ "grad_norm": 1.528414011001587,
154
+ "learning_rate": 0.0006452790261107003,
155
+ "loss": 1.2338,
156
+ "step": 17000
157
+ },
158
+ {
159
+ "epoch": 1.0239490300927243,
160
+ "grad_norm": 1.2693781852722168,
161
+ "learning_rate": 0.0006361772569543205,
162
+ "loss": 1.2142,
163
+ "step": 18000
164
+ },
165
+ {
166
+ "epoch": 1.0808350873200978,
167
+ "grad_norm": 1.4573434591293335,
168
+ "learning_rate": 0.0006270754877979408,
169
+ "loss": 1.19,
170
+ "step": 19000
171
+ },
172
+ {
173
+ "epoch": 1.1377211445474713,
174
+ "grad_norm": 1.236939787864685,
175
+ "learning_rate": 0.0006179737186415609,
176
+ "loss": 1.1875,
177
+ "step": 20000
178
+ },
179
+ {
180
+ "epoch": 1.1377211445474713,
181
+ "eval_accuracy": 0.704068,
182
+ "eval_loss": 1.1761754751205444,
183
+ "eval_runtime": 65.8177,
184
+ "eval_samples_per_second": 3798.369,
185
+ "eval_steps_per_second": 14.844,
186
+ "step": 20000
187
+ },
188
+ {
189
+ "epoch": 1.194607201774845,
190
+ "grad_norm": 1.241289496421814,
191
+ "learning_rate": 0.0006088719494851812,
192
+ "loss": 1.1814,
193
+ "step": 21000
194
+ },
195
+ {
196
+ "epoch": 1.2514932590022185,
197
+ "grad_norm": 1.483782410621643,
198
+ "learning_rate": 0.0005997701803288014,
199
+ "loss": 1.1822,
200
+ "step": 22000
201
+ },
202
+ {
203
+ "epoch": 1.3083793162295922,
204
+ "grad_norm": 1.5755152702331543,
205
+ "learning_rate": 0.0005906684111724217,
206
+ "loss": 1.1767,
207
+ "step": 23000
208
+ },
209
+ {
210
+ "epoch": 1.3652653734569657,
211
+ "grad_norm": 1.333516001701355,
212
+ "learning_rate": 0.0005815666420160419,
213
+ "loss": 1.1731,
214
+ "step": 24000
215
+ },
216
+ {
217
+ "epoch": 1.4221514306843392,
218
+ "grad_norm": 1.8660708665847778,
219
+ "learning_rate": 0.0005724648728596621,
220
+ "loss": 1.157,
221
+ "step": 25000
222
+ },
223
+ {
224
+ "epoch": 1.4221514306843392,
225
+ "eval_accuracy": 0.711072,
226
+ "eval_loss": 1.145967960357666,
227
+ "eval_runtime": 63.5002,
228
+ "eval_samples_per_second": 3936.992,
229
+ "eval_steps_per_second": 15.386,
230
+ "step": 25000
231
+ },
232
+ {
233
+ "epoch": 1.4790374879117127,
234
+ "grad_norm": 1.3808480501174927,
235
+ "learning_rate": 0.0005633631037032824,
236
+ "loss": 1.1574,
237
+ "step": 26000
238
+ },
239
+ {
240
+ "epoch": 1.5359235451390862,
241
+ "grad_norm": 1.1691391468048096,
242
+ "learning_rate": 0.0005542613345469026,
243
+ "loss": 1.1554,
244
+ "step": 27000
245
+ },
246
+ {
247
+ "epoch": 1.59280960236646,
248
+ "grad_norm": 1.4390947818756104,
249
+ "learning_rate": 0.0005451595653905228,
250
+ "loss": 1.1497,
251
+ "step": 28000
252
+ },
253
+ {
254
+ "epoch": 1.6496956595938337,
255
+ "grad_norm": 1.3637901544570923,
256
+ "learning_rate": 0.000536057796234143,
257
+ "loss": 1.1452,
258
+ "step": 29000
259
+ },
260
+ {
261
+ "epoch": 1.7065817168212072,
262
+ "grad_norm": 1.2076903581619263,
263
+ "learning_rate": 0.0005269560270777633,
264
+ "loss": 1.144,
265
+ "step": 30000
266
+ },
267
+ {
268
+ "epoch": 1.7065817168212072,
269
+ "eval_accuracy": 0.716336,
270
+ "eval_loss": 1.11836576461792,
271
+ "eval_runtime": 64.1718,
272
+ "eval_samples_per_second": 3895.791,
273
+ "eval_steps_per_second": 15.225,
274
+ "step": 30000
275
+ },
276
+ {
277
+ "epoch": 1.7634677740485807,
278
+ "grad_norm": 1.349098801612854,
279
+ "learning_rate": 0.0005178542579213835,
280
+ "loss": 1.1383,
281
+ "step": 31000
282
+ },
283
+ {
284
+ "epoch": 1.8203538312759542,
285
+ "grad_norm": 1.4453612565994263,
286
+ "learning_rate": 0.0005087524887650037,
287
+ "loss": 1.1391,
288
+ "step": 32000
289
+ },
290
+ {
291
+ "epoch": 1.8772398885033277,
292
+ "grad_norm": 1.0392345190048218,
293
+ "learning_rate": 0.0004996507196086239,
294
+ "loss": 1.1328,
295
+ "step": 33000
296
+ },
297
+ {
298
+ "epoch": 1.9341259457307014,
299
+ "grad_norm": 1.1520024538040161,
300
+ "learning_rate": 0.0004905489504522442,
301
+ "loss": 1.1238,
302
+ "step": 34000
303
+ },
304
+ {
305
+ "epoch": 1.9910120029580751,
306
+ "grad_norm": 1.515512228012085,
307
+ "learning_rate": 0.0004814471812958644,
308
+ "loss": 1.1217,
309
+ "step": 35000
310
+ },
311
+ {
312
+ "epoch": 1.9910120029580751,
313
+ "eval_accuracy": 0.724676,
314
+ "eval_loss": 1.0880111455917358,
315
+ "eval_runtime": 64.3813,
316
+ "eval_samples_per_second": 3883.115,
317
+ "eval_steps_per_second": 15.175,
318
+ "step": 35000
319
+ },
320
+ {
321
+ "epoch": 2.0478980601854486,
322
+ "grad_norm": 1.4771007299423218,
323
+ "learning_rate": 0.00047234541213948464,
324
+ "loss": 1.0919,
325
+ "step": 36000
326
+ },
327
+ {
328
+ "epoch": 2.104784117412822,
329
+ "grad_norm": 1.3845994472503662,
330
+ "learning_rate": 0.00046324364298310487,
331
+ "loss": 1.0838,
332
+ "step": 37000
333
+ },
334
+ {
335
+ "epoch": 2.1616701746401956,
336
+ "grad_norm": 1.250450611114502,
337
+ "learning_rate": 0.00045414187382672515,
338
+ "loss": 1.0785,
339
+ "step": 38000
340
+ },
341
+ {
342
+ "epoch": 2.218556231867569,
343
+ "grad_norm": 1.5783060789108276,
344
+ "learning_rate": 0.0004450401046703453,
345
+ "loss": 1.0753,
346
+ "step": 39000
347
+ },
348
+ {
349
+ "epoch": 2.2754422890949426,
350
+ "grad_norm": 1.7228904962539673,
351
+ "learning_rate": 0.0004359383355139656,
352
+ "loss": 1.0831,
353
+ "step": 40000
354
+ },
355
+ {
356
+ "epoch": 2.2754422890949426,
357
+ "eval_accuracy": 0.727968,
358
+ "eval_loss": 1.0728965997695923,
359
+ "eval_runtime": 64.3156,
360
+ "eval_samples_per_second": 3887.084,
361
+ "eval_steps_per_second": 15.191,
362
+ "step": 40000
363
+ },
364
+ {
365
+ "epoch": 2.3323283463223166,
366
+ "grad_norm": 1.333543062210083,
367
+ "learning_rate": 0.00042683656635758577,
368
+ "loss": 1.0798,
369
+ "step": 41000
370
+ },
371
+ {
372
+ "epoch": 2.38921440354969,
373
+ "grad_norm": 1.3213781118392944,
374
+ "learning_rate": 0.00041773479720120594,
375
+ "loss": 1.0804,
376
+ "step": 42000
377
+ },
378
+ {
379
+ "epoch": 2.4461004607770636,
380
+ "grad_norm": 1.43584406375885,
381
+ "learning_rate": 0.0004086330280448262,
382
+ "loss": 1.0713,
383
+ "step": 43000
384
+ },
385
+ {
386
+ "epoch": 2.502986518004437,
387
+ "grad_norm": 1.2614803314208984,
388
+ "learning_rate": 0.0003995312588884465,
389
+ "loss": 1.0697,
390
+ "step": 44000
391
+ },
392
+ {
393
+ "epoch": 2.5598725752318106,
394
+ "grad_norm": 1.1319971084594727,
395
+ "learning_rate": 0.0003904294897320667,
396
+ "loss": 1.0761,
397
+ "step": 45000
398
+ },
399
+ {
400
+ "epoch": 2.5598725752318106,
401
+ "eval_accuracy": 0.731168,
402
+ "eval_loss": 1.0593221187591553,
403
+ "eval_runtime": 64.6765,
404
+ "eval_samples_per_second": 3865.393,
405
+ "eval_steps_per_second": 15.106,
406
+ "step": 45000
407
+ },
408
+ {
409
+ "epoch": 2.6167586324591845,
410
+ "grad_norm": 1.2045773267745972,
411
+ "learning_rate": 0.00038132772057568694,
412
+ "loss": 1.0723,
413
+ "step": 46000
414
+ },
415
+ {
416
+ "epoch": 2.673644689686558,
417
+ "grad_norm": 1.3462469577789307,
418
+ "learning_rate": 0.00037222595141930717,
419
+ "loss": 1.067,
420
+ "step": 47000
421
+ },
422
+ {
423
+ "epoch": 2.7305307469139315,
424
+ "grad_norm": 1.3573272228240967,
425
+ "learning_rate": 0.0003631241822629274,
426
+ "loss": 1.0636,
427
+ "step": 48000
428
+ },
429
+ {
430
+ "epoch": 2.787416804141305,
431
+ "grad_norm": 1.2870041131973267,
432
+ "learning_rate": 0.0003540224131065476,
433
+ "loss": 1.0655,
434
+ "step": 49000
435
+ },
436
+ {
437
+ "epoch": 2.8443028613686785,
438
+ "grad_norm": 1.3287382125854492,
439
+ "learning_rate": 0.0003449206439501678,
440
+ "loss": 1.0565,
441
+ "step": 50000
442
+ },
443
+ {
444
+ "epoch": 2.8443028613686785,
445
+ "eval_accuracy": 0.734552,
446
+ "eval_loss": 1.0479968786239624,
447
+ "eval_runtime": 65.2161,
448
+ "eval_samples_per_second": 3833.412,
449
+ "eval_steps_per_second": 14.981,
450
+ "step": 50000
451
+ },
452
+ {
453
+ "epoch": 2.901188918596052,
454
+ "grad_norm": 1.384717345237732,
455
+ "learning_rate": 0.000335818874793788,
456
+ "loss": 1.0529,
457
+ "step": 51000
458
+ },
459
+ {
460
+ "epoch": 2.9580749758234255,
461
+ "grad_norm": 1.1834776401519775,
462
+ "learning_rate": 0.0003267171056374083,
463
+ "loss": 1.0608,
464
+ "step": 52000
465
+ },
466
+ {
467
+ "epoch": 3.0149610330507994,
468
+ "grad_norm": 1.0646686553955078,
469
+ "learning_rate": 0.0003176153364810285,
470
+ "loss": 1.0417,
471
+ "step": 53000
472
+ },
473
+ {
474
+ "epoch": 3.071847090278173,
475
+ "grad_norm": 1.348777174949646,
476
+ "learning_rate": 0.00030851356732464874,
477
+ "loss": 1.0168,
478
+ "step": 54000
479
+ },
480
+ {
481
+ "epoch": 3.1287331475055464,
482
+ "grad_norm": 1.2929068803787231,
483
+ "learning_rate": 0.00029941179816826897,
484
+ "loss": 1.0149,
485
+ "step": 55000
486
+ },
487
+ {
488
+ "epoch": 3.1287331475055464,
489
+ "eval_accuracy": 0.73796,
490
+ "eval_loss": 1.0355563163757324,
491
+ "eval_runtime": 66.0157,
492
+ "eval_samples_per_second": 3786.979,
493
+ "eval_steps_per_second": 14.8,
494
+ "step": 55000
495
+ },
496
+ {
497
+ "epoch": 3.18561920473292,
498
+ "grad_norm": 1.3426847457885742,
499
+ "learning_rate": 0.0002903100290118892,
500
+ "loss": 1.0145,
501
+ "step": 56000
502
+ },
503
+ {
504
+ "epoch": 3.2425052619602934,
505
+ "grad_norm": 1.3112365007400513,
506
+ "learning_rate": 0.0002812082598555094,
507
+ "loss": 1.013,
508
+ "step": 57000
509
+ },
510
+ {
511
+ "epoch": 3.299391319187667,
512
+ "grad_norm": 1.3956024646759033,
513
+ "learning_rate": 0.00027210649069912964,
514
+ "loss": 1.0117,
515
+ "step": 58000
516
+ },
517
+ {
518
+ "epoch": 3.356277376415041,
519
+ "grad_norm": 1.2679752111434937,
520
+ "learning_rate": 0.00026300472154274987,
521
+ "loss": 1.0155,
522
+ "step": 59000
523
+ },
524
+ {
525
+ "epoch": 3.4131634336424144,
526
+ "grad_norm": 1.5014774799346924,
527
+ "learning_rate": 0.0002539029523863701,
528
+ "loss": 1.0102,
529
+ "step": 60000
530
+ },
531
+ {
532
+ "epoch": 3.4131634336424144,
533
+ "eval_accuracy": 0.74012,
534
+ "eval_loss": 1.0263450145721436,
535
+ "eval_runtime": 64.1919,
536
+ "eval_samples_per_second": 3894.574,
537
+ "eval_steps_per_second": 15.22,
538
+ "step": 60000
539
+ },
540
+ {
541
+ "epoch": 3.470049490869788,
542
+ "grad_norm": 1.4669406414031982,
543
+ "learning_rate": 0.0002448011832299904,
544
+ "loss": 1.0145,
545
+ "step": 61000
546
+ },
547
+ {
548
+ "epoch": 3.5269355480971614,
549
+ "grad_norm": 1.3615577220916748,
550
+ "learning_rate": 0.00023569941407361057,
551
+ "loss": 1.0173,
552
+ "step": 62000
553
+ },
554
+ {
555
+ "epoch": 3.583821605324535,
556
+ "grad_norm": 1.126437783241272,
557
+ "learning_rate": 0.00022659764491723082,
558
+ "loss": 1.0125,
559
+ "step": 63000
560
+ },
561
+ {
562
+ "epoch": 3.6407076625519084,
563
+ "grad_norm": 1.2467857599258423,
564
+ "learning_rate": 0.00021749587576085105,
565
+ "loss": 1.0133,
566
+ "step": 64000
567
+ },
568
+ {
569
+ "epoch": 3.697593719779282,
570
+ "grad_norm": 1.3474713563919067,
571
+ "learning_rate": 0.00020839410660447127,
572
+ "loss": 1.0014,
573
+ "step": 65000
574
+ },
575
+ {
576
+ "epoch": 3.697593719779282,
577
+ "eval_accuracy": 0.743688,
578
+ "eval_loss": 1.0122489929199219,
579
+ "eval_runtime": 64.6438,
580
+ "eval_samples_per_second": 3867.347,
581
+ "eval_steps_per_second": 15.114,
582
+ "step": 65000
583
+ },
584
+ {
585
+ "epoch": 3.754479777006656,
586
+ "grad_norm": 1.3319435119628906,
587
+ "learning_rate": 0.00019929233744809147,
588
+ "loss": 1.0034,
589
+ "step": 66000
590
+ },
591
+ {
592
+ "epoch": 3.8113658342340293,
593
+ "grad_norm": 1.9685286283493042,
594
+ "learning_rate": 0.00019019056829171172,
595
+ "loss": 0.995,
596
+ "step": 67000
597
+ },
598
+ {
599
+ "epoch": 3.868251891461403,
600
+ "grad_norm": 1.2180532217025757,
601
+ "learning_rate": 0.00018108879913533195,
602
+ "loss": 1.0069,
603
+ "step": 68000
604
+ },
605
+ {
606
+ "epoch": 3.9251379486887763,
607
+ "grad_norm": 1.3233805894851685,
608
+ "learning_rate": 0.00017198702997895217,
609
+ "loss": 0.9983,
610
+ "step": 69000
611
+ },
612
+ {
613
+ "epoch": 3.98202400591615,
614
+ "grad_norm": 1.7491425275802612,
615
+ "learning_rate": 0.0001628852608225724,
616
+ "loss": 0.9972,
617
+ "step": 70000
618
+ },
619
+ {
620
+ "epoch": 3.98202400591615,
621
+ "eval_accuracy": 0.745936,
622
+ "eval_loss": 1.0027811527252197,
623
+ "eval_runtime": 65.7257,
624
+ "eval_samples_per_second": 3803.688,
625
+ "eval_steps_per_second": 14.865,
626
+ "step": 70000
627
+ },
628
+ {
629
+ "epoch": 4.038910063143524,
630
+ "grad_norm": 1.1467124223709106,
631
+ "learning_rate": 0.00015378349166619262,
632
+ "loss": 0.9752,
633
+ "step": 71000
634
+ },
635
+ {
636
+ "epoch": 4.095796120370897,
637
+ "grad_norm": 1.2129188776016235,
638
+ "learning_rate": 0.00014468172250981285,
639
+ "loss": 0.9652,
640
+ "step": 72000
641
+ },
642
+ {
643
+ "epoch": 4.152682177598271,
644
+ "grad_norm": 1.3177002668380737,
645
+ "learning_rate": 0.00013557995335343307,
646
+ "loss": 0.9615,
647
+ "step": 73000
648
+ },
649
+ {
650
+ "epoch": 4.209568234825644,
651
+ "grad_norm": 1.1324489116668701,
652
+ "learning_rate": 0.0001264781841970533,
653
+ "loss": 0.9629,
654
+ "step": 74000
655
+ },
656
+ {
657
+ "epoch": 4.266454292053018,
658
+ "grad_norm": 1.2428852319717407,
659
+ "learning_rate": 0.00011737641504067354,
660
+ "loss": 0.9556,
661
+ "step": 75000
662
+ },
663
+ {
664
+ "epoch": 4.266454292053018,
665
+ "eval_accuracy": 0.747436,
666
+ "eval_loss": 0.9971279501914978,
667
+ "eval_runtime": 66.3789,
668
+ "eval_samples_per_second": 3766.258,
669
+ "eval_steps_per_second": 14.719,
670
+ "step": 75000
671
+ },
672
+ {
673
+ "epoch": 4.323340349280391,
674
+ "grad_norm": 1.4413901567459106,
675
+ "learning_rate": 0.00010827464588429376,
676
+ "loss": 0.9616,
677
+ "step": 76000
678
+ },
679
+ {
680
+ "epoch": 4.380226406507765,
681
+ "grad_norm": 1.312136173248291,
682
+ "learning_rate": 9.917287672791399e-05,
683
+ "loss": 0.9657,
684
+ "step": 77000
685
+ },
686
+ {
687
+ "epoch": 4.437112463735138,
688
+ "grad_norm": 1.3660274744033813,
689
+ "learning_rate": 9.007110757153423e-05,
690
+ "loss": 0.9613,
691
+ "step": 78000
692
+ },
693
+ {
694
+ "epoch": 4.493998520962512,
695
+ "grad_norm": 1.4278331995010376,
696
+ "learning_rate": 8.096933841515445e-05,
697
+ "loss": 0.9576,
698
+ "step": 79000
699
+ },
700
+ {
701
+ "epoch": 4.550884578189885,
702
+ "grad_norm": 1.20628821849823,
703
+ "learning_rate": 7.186756925877468e-05,
704
+ "loss": 0.9606,
705
+ "step": 80000
706
+ },
707
+ {
708
+ "epoch": 4.550884578189885,
709
+ "eval_accuracy": 0.749644,
710
+ "eval_loss": 0.990385890007019,
711
+ "eval_runtime": 65.0093,
712
+ "eval_samples_per_second": 3845.605,
713
+ "eval_steps_per_second": 15.029,
714
+ "step": 80000
715
+ },
716
+ {
717
+ "epoch": 4.607770635417259,
718
+ "grad_norm": 1.8617701530456543,
719
+ "learning_rate": 6.27658001023949e-05,
720
+ "loss": 0.954,
721
+ "step": 81000
722
+ },
723
+ {
724
+ "epoch": 4.664656692644633,
725
+ "grad_norm": 1.352597951889038,
726
+ "learning_rate": 5.366403094601513e-05,
727
+ "loss": 0.957,
728
+ "step": 82000
729
+ },
730
+ {
731
+ "epoch": 4.721542749872007,
732
+ "grad_norm": 1.4314864873886108,
733
+ "learning_rate": 4.4562261789635364e-05,
734
+ "loss": 0.9541,
735
+ "step": 83000
736
+ },
737
+ {
738
+ "epoch": 4.77842880709938,
739
+ "grad_norm": 1.2464176416397095,
740
+ "learning_rate": 3.5460492633255596e-05,
741
+ "loss": 0.9545,
742
+ "step": 84000
743
+ },
744
+ {
745
+ "epoch": 4.835314864326754,
746
+ "grad_norm": 1.4721029996871948,
747
+ "learning_rate": 2.6358723476875817e-05,
748
+ "loss": 0.9544,
749
+ "step": 85000
750
+ },
751
+ {
752
+ "epoch": 4.835314864326754,
753
+ "eval_accuracy": 0.750732,
754
+ "eval_loss": 0.9842203259468079,
755
+ "eval_runtime": 65.3657,
756
+ "eval_samples_per_second": 3824.637,
757
+ "eval_steps_per_second": 14.947,
758
+ "step": 85000
759
+ },
760
+ {
761
+ "epoch": 4.892200921554127,
762
+ "grad_norm": 1.383285403251648,
763
+ "learning_rate": 1.7256954320496046e-05,
764
+ "loss": 0.9556,
765
+ "step": 86000
766
+ },
767
+ {
768
+ "epoch": 4.949086978781501,
769
+ "grad_norm": 1.3051174879074097,
770
+ "learning_rate": 8.155185164116276e-06,
771
+ "loss": 0.9503,
772
+ "step": 87000
773
+ },
774
+ {
775
+ "epoch": 5.0,
776
+ "step": 87895,
777
+ "total_flos": 5.4597447576e+17,
778
+ "train_loss": 1.1272559640920097,
779
+ "train_runtime": 10316.2205,
780
+ "train_samples_per_second": 2181.031,
781
+ "train_steps_per_second": 8.52
782
+ }
783
+ ],
784
+ "logging_steps": 1000,
785
+ "max_steps": 87895,
786
+ "num_input_tokens_seen": 0,
787
+ "num_train_epochs": 5,
788
+ "save_steps": 5000,
789
+ "stateful_callbacks": {
790
+ "TrainerControl": {
791
+ "args": {
792
+ "should_epoch_stop": false,
793
+ "should_evaluate": false,
794
+ "should_log": false,
795
+ "should_save": true,
796
+ "should_training_stop": true
797
+ },
798
+ "attributes": {}
799
+ }
800
+ },
801
+ "total_flos": 5.4597447576e+17,
802
+ "train_batch_size": 256,
803
+ "trial_name": null,
804
+ "trial_params": null
805
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d535fe1935962c1e372c8e1956b7a46c429d0be2f757a49936cc4a84f2e878ee
3
+ size 5777