| { | |
| "_name_or_path": "dandelin/vilt-b32-finetuned-vqa", | |
| "architectures": [ | |
| "CustomViltForVQA" | |
| ], | |
| "attention_probs_dropout_prob": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "boat", | |
| "1": "spoon", | |
| "2": "bench", | |
| "3": "banana", | |
| "4": "cat", | |
| "5": "oven", | |
| "6": "bottle", | |
| "7": "No", | |
| "8": "Yes", | |
| "9": "knife", | |
| "10": "clock", | |
| "11": "5", | |
| "12": "mouse", | |
| "13": "cup", | |
| "14": "sheep", | |
| "15": "dining table", | |
| "16": "fork", | |
| "17": "refrigerator", | |
| "18": "zebra", | |
| "19": "broccoli", | |
| "20": "dog", | |
| "21": "skateboard", | |
| "22": "Black", | |
| "23": "bed", | |
| "24": "motorcycle", | |
| "25": "pizza", | |
| "26": "donut", | |
| "27": "skis", | |
| "28": "chair", | |
| "29": "tennis racket", | |
| "30": "bird", | |
| "31": "potted plant", | |
| "32": "sports ball", | |
| "33": "Brown", | |
| "34": "laptop", | |
| "35": "elephant", | |
| "36": "horse", | |
| "37": "Blue", | |
| "38": "suitcase", | |
| "39": "hot dog", | |
| "40": "4", | |
| "41": "Orange", | |
| "42": "Purple", | |
| "43": "handbag", | |
| "44": "cow", | |
| "45": "fire hydrant", | |
| "46": "snowboard", | |
| "47": "toothbrush", | |
| "48": "Below", | |
| "49": "parking meter", | |
| "50": "Front", | |
| "51": "Right", | |
| "52": "cake", | |
| "53": "tv", | |
| "54": "9", | |
| "55": "tie", | |
| "56": "orange", | |
| "57": "wine glass", | |
| "58": "cell phone", | |
| "59": "stop sign", | |
| "60": "right", | |
| "61": "Pink", | |
| "62": "giraffe", | |
| "63": "scissors", | |
| "64": "1", | |
| "65": "7", | |
| "66": "keyboard", | |
| "67": "Yellow", | |
| "68": "3", | |
| "69": "remote", | |
| "70": "bear", | |
| "71": "car", | |
| "72": "truck", | |
| "73": "surfboard", | |
| "74": "traffic light", | |
| "75": "left", | |
| "76": "bus", | |
| "77": "frisbee", | |
| "78": "couch", | |
| "79": "Red", | |
| "80": "Left", | |
| "81": "6", | |
| "82": "toilet", | |
| "83": "airplane", | |
| "84": "Grey", | |
| "85": "8", | |
| "86": "above", | |
| "87": "baseball glove", | |
| "88": "vase", | |
| "89": "kite", | |
| "90": "bowl", | |
| "91": "0", | |
| "92": "2", | |
| "93": "White", | |
| "94": "Behind", | |
| "95": "Above", | |
| "96": "baseball bat", | |
| "97": "Green", | |
| "98": "person", | |
| "99": "sandwich", | |
| "100": "sink", | |
| "101": "book", | |
| "102": "train", | |
| "103": "umbrella", | |
| "104": "carrot", | |
| "105": "bicycle", | |
| "106": "apple", | |
| "107": "teddy bear" | |
| }, | |
| "image_size": 384, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label2id": { | |
| "0": 91, | |
| "1": 64, | |
| "2": 92, | |
| "3": 68, | |
| "4": 40, | |
| "5": 11, | |
| "6": 81, | |
| "7": 65, | |
| "8": 85, | |
| "9": 54, | |
| "Above": 95, | |
| "Behind": 94, | |
| "Below": 48, | |
| "Black": 22, | |
| "Blue": 37, | |
| "Brown": 33, | |
| "Front": 50, | |
| "Green": 97, | |
| "Grey": 84, | |
| "Left": 80, | |
| "No": 7, | |
| "Orange": 41, | |
| "Pink": 61, | |
| "Purple": 42, | |
| "Red": 79, | |
| "Right": 51, | |
| "White": 93, | |
| "Yellow": 67, | |
| "Yes": 8, | |
| "above": 86, | |
| "airplane": 83, | |
| "apple": 106, | |
| "banana": 3, | |
| "baseball bat": 96, | |
| "baseball glove": 87, | |
| "bear": 70, | |
| "bed": 23, | |
| "bench": 2, | |
| "bicycle": 105, | |
| "bird": 30, | |
| "boat": 0, | |
| "book": 101, | |
| "bottle": 6, | |
| "bowl": 90, | |
| "broccoli": 19, | |
| "bus": 76, | |
| "cake": 52, | |
| "car": 71, | |
| "carrot": 104, | |
| "cat": 4, | |
| "cell phone": 58, | |
| "chair": 28, | |
| "clock": 10, | |
| "couch": 78, | |
| "cow": 44, | |
| "cup": 13, | |
| "dining table": 15, | |
| "dog": 20, | |
| "donut": 26, | |
| "elephant": 35, | |
| "fire hydrant": 45, | |
| "fork": 16, | |
| "frisbee": 77, | |
| "giraffe": 62, | |
| "handbag": 43, | |
| "horse": 36, | |
| "hot dog": 39, | |
| "keyboard": 66, | |
| "kite": 89, | |
| "knife": 9, | |
| "laptop": 34, | |
| "left": 75, | |
| "motorcycle": 24, | |
| "mouse": 12, | |
| "orange": 56, | |
| "oven": 5, | |
| "parking meter": 49, | |
| "person": 98, | |
| "pizza": 25, | |
| "potted plant": 31, | |
| "refrigerator": 17, | |
| "remote": 69, | |
| "right": 60, | |
| "sandwich": 99, | |
| "scissors": 63, | |
| "sheep": 14, | |
| "sink": 100, | |
| "skateboard": 21, | |
| "skis": 27, | |
| "snowboard": 46, | |
| "spoon": 1, | |
| "sports ball": 32, | |
| "stop sign": 59, | |
| "suitcase": 38, | |
| "surfboard": 73, | |
| "teddy bear": 107, | |
| "tennis racket": 29, | |
| "tie": 55, | |
| "toilet": 82, | |
| "toothbrush": 47, | |
| "traffic light": 74, | |
| "train": 102, | |
| "truck": 72, | |
| "tv": 53, | |
| "umbrella": 103, | |
| "vase": 88, | |
| "wine glass": 57, | |
| "zebra": 18 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "max_image_length": -1, | |
| "max_position_embeddings": 40, | |
| "modality_type_vocab_size": 2, | |
| "model_type": "vilt", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_images": -1, | |
| "patch_size": 32, | |
| "qkv_bias": true, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.47.0", | |
| "type_vocab_size": 2, | |
| "vocab_size": 30522 | |
| } | |