| { |
| "_name_or_path": "dandelin/vilt-b32-finetuned-vqa", |
| "architectures": [ |
| "CustomViltForVQA" |
| ], |
| "attention_probs_dropout_prob": 0.0, |
| "hidden_act": "gelu", |
| "hidden_dropout_prob": 0.0, |
| "hidden_size": 768, |
| "id2label": { |
| "0": "boat", |
| "1": "above", |
| "2": "apple", |
| "3": "clock", |
| "4": "9", |
| "5": "wine glass", |
| "6": "mouse", |
| "7": "2", |
| "8": "sheep", |
| "9": "couch", |
| "10": "snowboard", |
| "11": "Yes", |
| "12": "spoon", |
| "13": "sports ball", |
| "14": "handbag", |
| "15": "Blue", |
| "16": "5", |
| "17": "laptop", |
| "18": "toothbrush", |
| "19": "carrot", |
| "20": "cake", |
| "21": "dog", |
| "22": "White", |
| "23": "motorcycle", |
| "24": "chair", |
| "25": "train", |
| "26": "zebra", |
| "27": "6", |
| "28": "person", |
| "29": "parking meter", |
| "30": "Brown", |
| "31": "car", |
| "32": "sink", |
| "33": "right", |
| "34": "bear", |
| "35": "Right", |
| "36": "sandwich", |
| "37": "Grey", |
| "38": "Red", |
| "39": "surfboard", |
| "40": "bowl", |
| "41": "airplane", |
| "42": "potted plant", |
| "43": "7", |
| "44": "tv", |
| "45": "suitcase", |
| "46": "kite", |
| "47": "tennis racket", |
| "48": "bench", |
| "49": "0", |
| "50": "bed", |
| "51": "dining table", |
| "52": "No", |
| "53": "refrigerator", |
| "54": "giraffe", |
| "55": "Purple", |
| "56": "hot dog", |
| "57": "truck", |
| "58": "vase", |
| "59": "Orange", |
| "60": "tie", |
| "61": "broccoli", |
| "62": "umbrella", |
| "63": "Green", |
| "64": "left", |
| "65": "stop sign", |
| "66": "cat", |
| "67": "teddy bear", |
| "68": "bicycle", |
| "69": "orange", |
| "70": "3", |
| "71": "scissors", |
| "72": "baseball glove", |
| "73": "frisbee", |
| "74": "4", |
| "75": "keyboard", |
| "76": "banana", |
| "77": "bus", |
| "78": "1", |
| "79": "Black", |
| "80": "8", |
| "81": "fork", |
| "82": "baseball bat", |
| "83": "donut", |
| "84": "book", |
| "85": "Above", |
| "86": "skis", |
| "87": "oven", |
| "88": "Below", |
| "89": "cell phone", |
| "90": "Left", |
| "91": "fire hydrant", |
| "92": "Behind", |
| "93": "cup", |
| "94": "elephant", |
| "95": "pizza", |
| "96": "bird", |
| "97": "Pink", |
| "98": "knife", |
| "99": "skateboard", |
| "100": "horse", |
| "101": "Front", |
| "102": "Yellow", |
| "103": "traffic light", |
| "104": "remote", |
| "105": "cow", |
| "106": "toilet", |
| "107": "bottle" |
| }, |
| "image_size": 384, |
| "initializer_range": 0.02, |
| "intermediate_size": 3072, |
| "label2id": { |
| "0": 49, |
| "1": 78, |
| "2": 7, |
| "3": 70, |
| "4": 74, |
| "5": 16, |
| "6": 27, |
| "7": 43, |
| "8": 80, |
| "9": 4, |
| "Above": 85, |
| "Behind": 92, |
| "Below": 88, |
| "Black": 79, |
| "Blue": 15, |
| "Brown": 30, |
| "Front": 101, |
| "Green": 63, |
| "Grey": 37, |
| "Left": 90, |
| "No": 52, |
| "Orange": 59, |
| "Pink": 97, |
| "Purple": 55, |
| "Red": 38, |
| "Right": 35, |
| "White": 22, |
| "Yellow": 102, |
| "Yes": 11, |
| "above": 1, |
| "airplane": 41, |
| "apple": 2, |
| "banana": 76, |
| "baseball bat": 82, |
| "baseball glove": 72, |
| "bear": 34, |
| "bed": 50, |
| "bench": 48, |
| "bicycle": 68, |
| "bird": 96, |
| "boat": 0, |
| "book": 84, |
| "bottle": 107, |
| "bowl": 40, |
| "broccoli": 61, |
| "bus": 77, |
| "cake": 20, |
| "car": 31, |
| "carrot": 19, |
| "cat": 66, |
| "cell phone": 89, |
| "chair": 24, |
| "clock": 3, |
| "couch": 9, |
| "cow": 105, |
| "cup": 93, |
| "dining table": 51, |
| "dog": 21, |
| "donut": 83, |
| "elephant": 94, |
| "fire hydrant": 91, |
| "fork": 81, |
| "frisbee": 73, |
| "giraffe": 54, |
| "handbag": 14, |
| "horse": 100, |
| "hot dog": 56, |
| "keyboard": 75, |
| "kite": 46, |
| "knife": 98, |
| "laptop": 17, |
| "left": 64, |
| "motorcycle": 23, |
| "mouse": 6, |
| "orange": 69, |
| "oven": 87, |
| "parking meter": 29, |
| "person": 28, |
| "pizza": 95, |
| "potted plant": 42, |
| "refrigerator": 53, |
| "remote": 104, |
| "right": 33, |
| "sandwich": 36, |
| "scissors": 71, |
| "sheep": 8, |
| "sink": 32, |
| "skateboard": 99, |
| "skis": 86, |
| "snowboard": 10, |
| "spoon": 12, |
| "sports ball": 13, |
| "stop sign": 65, |
| "suitcase": 45, |
| "surfboard": 39, |
| "teddy bear": 67, |
| "tennis racket": 47, |
| "tie": 60, |
| "toilet": 106, |
| "toothbrush": 18, |
| "traffic light": 103, |
| "train": 25, |
| "truck": 57, |
| "tv": 44, |
| "umbrella": 62, |
| "vase": 58, |
| "wine glass": 5, |
| "zebra": 26 |
| }, |
| "layer_norm_eps": 1e-12, |
| "max_image_length": -1, |
| "max_position_embeddings": 40, |
| "modality_type_vocab_size": 2, |
| "model_type": "vilt", |
| "num_attention_heads": 12, |
| "num_channels": 3, |
| "num_hidden_layers": 12, |
| "num_images": -1, |
| "patch_size": 32, |
| "qkv_bias": true, |
| "tie_word_embeddings": false, |
| "torch_dtype": "float32", |
| "transformers_version": "4.35.2", |
| "type_vocab_size": 2, |
| "vocab_size": 30522 |
| } |
|
|