{ "_name_or_path": "dandelin/vilt-b32-finetuned-vqa", "architectures": [ "CustomViltForVQA" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "boat", "1": "above", "2": "apple", "3": "clock", "4": "9", "5": "wine glass", "6": "mouse", "7": "2", "8": "sheep", "9": "couch", "10": "snowboard", "11": "Yes", "12": "spoon", "13": "sports ball", "14": "handbag", "15": "Blue", "16": "5", "17": "laptop", "18": "toothbrush", "19": "carrot", "20": "cake", "21": "dog", "22": "White", "23": "motorcycle", "24": "chair", "25": "train", "26": "zebra", "27": "6", "28": "person", "29": "parking meter", "30": "Brown", "31": "car", "32": "sink", "33": "right", "34": "bear", "35": "Right", "36": "sandwich", "37": "Grey", "38": "Red", "39": "surfboard", "40": "bowl", "41": "airplane", "42": "potted plant", "43": "7", "44": "tv", "45": "suitcase", "46": "kite", "47": "tennis racket", "48": "bench", "49": "0", "50": "bed", "51": "dining table", "52": "No", "53": "refrigerator", "54": "giraffe", "55": "Purple", "56": "hot dog", "57": "truck", "58": "vase", "59": "Orange", "60": "tie", "61": "broccoli", "62": "umbrella", "63": "Green", "64": "left", "65": "stop sign", "66": "cat", "67": "teddy bear", "68": "bicycle", "69": "orange", "70": "3", "71": "scissors", "72": "baseball glove", "73": "frisbee", "74": "4", "75": "keyboard", "76": "banana", "77": "bus", "78": "1", "79": "Black", "80": "8", "81": "fork", "82": "baseball bat", "83": "donut", "84": "book", "85": "Above", "86": "skis", "87": "oven", "88": "Below", "89": "cell phone", "90": "Left", "91": "fire hydrant", "92": "Behind", "93": "cup", "94": "elephant", "95": "pizza", "96": "bird", "97": "Pink", "98": "knife", "99": "skateboard", "100": "horse", "101": "Front", "102": "Yellow", "103": "traffic light", "104": "remote", "105": "cow", "106": "toilet", "107": "bottle" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 49, "1": 78, "2": 7, "3": 70, "4": 74, "5": 16, "6": 27, "7": 43, "8": 80, "9": 4, "Above": 85, "Behind": 92, "Below": 88, "Black": 79, "Blue": 15, "Brown": 30, "Front": 101, "Green": 63, "Grey": 37, "Left": 90, "No": 52, "Orange": 59, "Pink": 97, "Purple": 55, "Red": 38, "Right": 35, "White": 22, "Yellow": 102, "Yes": 11, "above": 1, "airplane": 41, "apple": 2, "banana": 76, "baseball bat": 82, "baseball glove": 72, "bear": 34, "bed": 50, "bench": 48, "bicycle": 68, "bird": 96, "boat": 0, "book": 84, "bottle": 107, "bowl": 40, "broccoli": 61, "bus": 77, "cake": 20, "car": 31, "carrot": 19, "cat": 66, "cell phone": 89, "chair": 24, "clock": 3, "couch": 9, "cow": 105, "cup": 93, "dining table": 51, "dog": 21, "donut": 83, "elephant": 94, "fire hydrant": 91, "fork": 81, "frisbee": 73, "giraffe": 54, "handbag": 14, "horse": 100, "hot dog": 56, "keyboard": 75, "kite": 46, "knife": 98, "laptop": 17, "left": 64, "motorcycle": 23, "mouse": 6, "orange": 69, "oven": 87, "parking meter": 29, "person": 28, "pizza": 95, "potted plant": 42, "refrigerator": 53, "remote": 104, "right": 33, "sandwich": 36, "scissors": 71, "sheep": 8, "sink": 32, "skateboard": 99, "skis": 86, "snowboard": 10, "spoon": 12, "sports ball": 13, "stop sign": 65, "suitcase": 45, "surfboard": 39, "teddy bear": 67, "tennis racket": 47, "tie": 60, "toilet": 106, "toothbrush": 18, "traffic light": 103, "train": 25, "truck": 57, "tv": 44, "umbrella": 62, "vase": 58, "wine glass": 5, "zebra": 26 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.35.2", "type_vocab_size": 2, "vocab_size": 30522 }