| { | |
| "_name_or_path": "dandelin/vilt-b32-mlm", | |
| "architectures": [ | |
| "ViltSNNForQuestionAnswering" | |
| ], | |
| "attention_probs_dropout_prob": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "4", | |
| "1": "walking", | |
| "2": "shadows", | |
| "3": "6", | |
| "4": "not sure", | |
| "5": "out", | |
| "6": "crown", | |
| "7": "brown", | |
| "8": "car", | |
| "9": "hawaii", | |
| "10": "donut", | |
| "11": "they aren't", | |
| "12": "7", | |
| "13": "many", | |
| "14": "at table", | |
| "15": "shrimp", | |
| "16": "air", | |
| "17": "leather", | |
| "18": "yellow", | |
| "19": "wedding", | |
| "20": "8", | |
| "21": "black", | |
| "22": "watching", | |
| "23": "hat", | |
| "24": "tired", | |
| "25": "train", | |
| "26": "2", | |
| "27": "1", | |
| "28": "human", | |
| "29": "0", | |
| "30": "skateboard", | |
| "31": "no", | |
| "32": "3", | |
| "33": "cup", | |
| "34": "forest", | |
| "35": "picnic table", | |
| "36": "boy", | |
| "37": "woman", | |
| "38": "blue and white", | |
| "39": "right", | |
| "40": "outside", | |
| "41": "chair", | |
| "42": "suv", | |
| "43": "wine", | |
| "44": "yes", | |
| "45": "red and yellow", | |
| "46": "jeep", | |
| "47": "white", | |
| "48": "backpack", | |
| "49": "fashion", | |
| "50": "red and blue", | |
| "51": "bedroom", | |
| "52": "table", | |
| "53": "park", | |
| "54": "full", | |
| "55": "gray", | |
| "56": "sky", | |
| "57": "beige", | |
| "58": "sun", | |
| "59": "doughnut", | |
| "60": "plastic", | |
| "61": "king", | |
| "62": "shadow", | |
| "63": "wall", | |
| "64": "crossing", | |
| "65": "girl", | |
| "66": "double", | |
| "67": "5", | |
| "68": "red", | |
| "69": "pink", | |
| "70": "bus", | |
| "71": "wine tasting", | |
| "72": "ice cream", | |
| "73": "dog", | |
| "74": "little girl", | |
| "75": "white and black", | |
| "76": "birthday", | |
| "77": "down", | |
| "78": "resting", | |
| "79": "cat", | |
| "80": "chopsticks", | |
| "81": "blonde", | |
| "82": "green", | |
| "83": "style", | |
| "84": "queen", | |
| "85": "woods", | |
| "86": "white and blue", | |
| "87": "10" | |
| }, | |
| "image_size": 384, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label2id": { | |
| "0": 29, | |
| "1": 27, | |
| "10": 87, | |
| "2": 26, | |
| "3": 32, | |
| "4": 0, | |
| "5": 67, | |
| "6": 3, | |
| "7": 12, | |
| "8": 20, | |
| "air": 16, | |
| "at table": 14, | |
| "backpack": 48, | |
| "bedroom": 51, | |
| "beige": 57, | |
| "birthday": 76, | |
| "black": 21, | |
| "blonde": 81, | |
| "blue and white": 38, | |
| "boy": 36, | |
| "brown": 7, | |
| "bus": 70, | |
| "car": 8, | |
| "cat": 79, | |
| "chair": 41, | |
| "chopsticks": 80, | |
| "crossing": 64, | |
| "crown": 6, | |
| "cup": 33, | |
| "dog": 73, | |
| "donut": 10, | |
| "double": 66, | |
| "doughnut": 59, | |
| "down": 77, | |
| "fashion": 49, | |
| "forest": 34, | |
| "full": 54, | |
| "girl": 65, | |
| "gray": 55, | |
| "green": 82, | |
| "hat": 23, | |
| "hawaii": 9, | |
| "human": 28, | |
| "ice cream": 72, | |
| "jeep": 46, | |
| "king": 61, | |
| "leather": 17, | |
| "little girl": 74, | |
| "many": 13, | |
| "no": 31, | |
| "not sure": 4, | |
| "out": 5, | |
| "outside": 40, | |
| "park": 53, | |
| "picnic table": 35, | |
| "pink": 69, | |
| "plastic": 60, | |
| "queen": 84, | |
| "red": 68, | |
| "red and blue": 50, | |
| "red and yellow": 45, | |
| "resting": 78, | |
| "right": 39, | |
| "shadow": 62, | |
| "shadows": 2, | |
| "shrimp": 15, | |
| "skateboard": 30, | |
| "sky": 56, | |
| "style": 83, | |
| "sun": 58, | |
| "suv": 42, | |
| "table": 52, | |
| "they aren't": 11, | |
| "tired": 24, | |
| "train": 25, | |
| "walking": 1, | |
| "wall": 63, | |
| "watching": 22, | |
| "wedding": 19, | |
| "white": 47, | |
| "white and black": 75, | |
| "white and blue": 86, | |
| "wine": 43, | |
| "wine tasting": 71, | |
| "woman": 37, | |
| "woods": 85, | |
| "yellow": 18, | |
| "yes": 44 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "max_image_length": -1, | |
| "max_position_embeddings": 40, | |
| "modality_type_vocab_size": 2, | |
| "model_type": "vilt", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_images": -1, | |
| "patch_size": 32, | |
| "qkv_bias": true, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.46.2", | |
| "type_vocab_size": 2, | |
| "vocab_size": 30522 | |
| } | |