| { | |
| "_name_or_path": "phonghoccode/vilt-vqa-finetune-pytorch", | |
| "architectures": [ | |
| "CustomViltForVQA" | |
| ], | |
| "attention_probs_dropout_prob": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "donut", | |
| "1": "Orange", | |
| "2": "bottle", | |
| "3": "laptop", | |
| "4": "toilet", | |
| "5": "car", | |
| "6": "fork", | |
| "7": "bus", | |
| "8": "keyboard", | |
| "9": "0", | |
| "10": "Green", | |
| "11": "vase", | |
| "12": "bear", | |
| "13": "4", | |
| "14": "3", | |
| "15": "bird", | |
| "16": "Yellow", | |
| "17": "Grey", | |
| "18": "suitcase", | |
| "19": "skateboard", | |
| "20": "tv", | |
| "21": "Red", | |
| "22": "Behind", | |
| "23": "spoon", | |
| "24": "bicycle", | |
| "25": "7", | |
| "26": "remote", | |
| "27": "kite", | |
| "28": "orange", | |
| "29": "cow", | |
| "30": "Below", | |
| "31": "parking meter", | |
| "32": "right", | |
| "33": "bowl", | |
| "34": "sheep", | |
| "35": "handbag", | |
| "36": "potted plant", | |
| "37": "left", | |
| "38": "airplane", | |
| "39": "2", | |
| "40": "6", | |
| "41": "elephant", | |
| "42": "skis", | |
| "43": "bench", | |
| "44": "dog", | |
| "45": "truck", | |
| "46": "Left", | |
| "47": "umbrella", | |
| "48": "motorcycle", | |
| "49": "5", | |
| "50": "tennis racket", | |
| "51": "cake", | |
| "52": "Front", | |
| "53": "clock", | |
| "54": "teddy bear", | |
| "55": "hot dog", | |
| "56": "oven", | |
| "57": "toothbrush", | |
| "58": "Black", | |
| "59": "book", | |
| "60": "1", | |
| "61": "tie", | |
| "62": "couch", | |
| "63": "mouse", | |
| "64": "Brown", | |
| "65": "dining table", | |
| "66": "Pink", | |
| "67": "carrot", | |
| "68": "surfboard", | |
| "69": "pizza", | |
| "70": "bed", | |
| "71": "cell phone", | |
| "72": "broccoli", | |
| "73": "scissors", | |
| "74": "Purple", | |
| "75": "boat", | |
| "76": "Yes", | |
| "77": "apple", | |
| "78": "Blue", | |
| "79": "stop sign", | |
| "80": "8", | |
| "81": "frisbee", | |
| "82": "sports ball", | |
| "83": "9", | |
| "84": "fire hydrant", | |
| "85": "wine glass", | |
| "86": "sink", | |
| "87": "baseball glove", | |
| "88": "cat", | |
| "89": "train", | |
| "90": "banana", | |
| "91": "horse", | |
| "92": "above", | |
| "93": "White", | |
| "94": "traffic light", | |
| "95": "snowboard", | |
| "96": "No", | |
| "97": "baseball bat", | |
| "98": "person", | |
| "99": "refrigerator", | |
| "100": "zebra", | |
| "101": "chair", | |
| "102": "cup", | |
| "103": "giraffe", | |
| "104": "knife", | |
| "105": "Right", | |
| "106": "Above", | |
| "107": "sandwich" | |
| }, | |
| "image_size": 384, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label2id": { | |
| "0": 9, | |
| "1": 60, | |
| "2": 39, | |
| "3": 14, | |
| "4": 13, | |
| "5": 49, | |
| "6": 40, | |
| "7": 25, | |
| "8": 80, | |
| "9": 83, | |
| "Above": 106, | |
| "Behind": 22, | |
| "Below": 30, | |
| "Black": 58, | |
| "Blue": 78, | |
| "Brown": 64, | |
| "Front": 52, | |
| "Green": 10, | |
| "Grey": 17, | |
| "Left": 46, | |
| "No": 96, | |
| "Orange": 1, | |
| "Pink": 66, | |
| "Purple": 74, | |
| "Red": 21, | |
| "Right": 105, | |
| "White": 93, | |
| "Yellow": 16, | |
| "Yes": 76, | |
| "above": 92, | |
| "airplane": 38, | |
| "apple": 77, | |
| "banana": 90, | |
| "baseball bat": 97, | |
| "baseball glove": 87, | |
| "bear": 12, | |
| "bed": 70, | |
| "bench": 43, | |
| "bicycle": 24, | |
| "bird": 15, | |
| "boat": 75, | |
| "book": 59, | |
| "bottle": 2, | |
| "bowl": 33, | |
| "broccoli": 72, | |
| "bus": 7, | |
| "cake": 51, | |
| "car": 5, | |
| "carrot": 67, | |
| "cat": 88, | |
| "cell phone": 71, | |
| "chair": 101, | |
| "clock": 53, | |
| "couch": 62, | |
| "cow": 29, | |
| "cup": 102, | |
| "dining table": 65, | |
| "dog": 44, | |
| "donut": 0, | |
| "elephant": 41, | |
| "fire hydrant": 84, | |
| "fork": 6, | |
| "frisbee": 81, | |
| "giraffe": 103, | |
| "handbag": 35, | |
| "horse": 91, | |
| "hot dog": 55, | |
| "keyboard": 8, | |
| "kite": 27, | |
| "knife": 104, | |
| "laptop": 3, | |
| "left": 37, | |
| "motorcycle": 48, | |
| "mouse": 63, | |
| "orange": 28, | |
| "oven": 56, | |
| "parking meter": 31, | |
| "person": 98, | |
| "pizza": 69, | |
| "potted plant": 36, | |
| "refrigerator": 99, | |
| "remote": 26, | |
| "right": 32, | |
| "sandwich": 107, | |
| "scissors": 73, | |
| "sheep": 34, | |
| "sink": 86, | |
| "skateboard": 19, | |
| "skis": 42, | |
| "snowboard": 95, | |
| "spoon": 23, | |
| "sports ball": 82, | |
| "stop sign": 79, | |
| "suitcase": 18, | |
| "surfboard": 68, | |
| "teddy bear": 54, | |
| "tennis racket": 50, | |
| "tie": 61, | |
| "toilet": 4, | |
| "toothbrush": 57, | |
| "traffic light": 94, | |
| "train": 89, | |
| "truck": 45, | |
| "tv": 20, | |
| "umbrella": 47, | |
| "vase": 11, | |
| "wine glass": 85, | |
| "zebra": 100 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "max_image_length": -1, | |
| "max_position_embeddings": 40, | |
| "modality_type_vocab_size": 2, | |
| "model_type": "vilt", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_images": -1, | |
| "patch_size": 32, | |
| "qkv_bias": true, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.35.2", | |
| "type_vocab_size": 2, | |
| "vocab_size": 30522 | |
| } | |