| { |
| "_name_or_path": "phonghoccode/vilt-vqa-finetune-pytorch", |
| "architectures": [ |
| "CustomViltForVQA" |
| ], |
| "attention_probs_dropout_prob": 0.0, |
| "hidden_act": "gelu", |
| "hidden_dropout_prob": 0.0, |
| "hidden_size": 768, |
| "id2label": { |
| "0": "donut", |
| "1": "Orange", |
| "2": "bottle", |
| "3": "laptop", |
| "4": "toilet", |
| "5": "car", |
| "6": "fork", |
| "7": "bus", |
| "8": "keyboard", |
| "9": "0", |
| "10": "Green", |
| "11": "vase", |
| "12": "bear", |
| "13": "4", |
| "14": "3", |
| "15": "bird", |
| "16": "Yellow", |
| "17": "Grey", |
| "18": "suitcase", |
| "19": "skateboard", |
| "20": "tv", |
| "21": "Red", |
| "22": "Behind", |
| "23": "spoon", |
| "24": "bicycle", |
| "25": "7", |
| "26": "remote", |
| "27": "kite", |
| "28": "orange", |
| "29": "cow", |
| "30": "Below", |
| "31": "parking meter", |
| "32": "right", |
| "33": "bowl", |
| "34": "sheep", |
| "35": "handbag", |
| "36": "potted plant", |
| "37": "left", |
| "38": "airplane", |
| "39": "2", |
| "40": "6", |
| "41": "elephant", |
| "42": "skis", |
| "43": "bench", |
| "44": "dog", |
| "45": "truck", |
| "46": "Left", |
| "47": "umbrella", |
| "48": "motorcycle", |
| "49": "5", |
| "50": "tennis racket", |
| "51": "cake", |
| "52": "Front", |
| "53": "clock", |
| "54": "teddy bear", |
| "55": "hot dog", |
| "56": "oven", |
| "57": "toothbrush", |
| "58": "Black", |
| "59": "book", |
| "60": "1", |
| "61": "tie", |
| "62": "couch", |
| "63": "mouse", |
| "64": "Brown", |
| "65": "dining table", |
| "66": "Pink", |
| "67": "carrot", |
| "68": "surfboard", |
| "69": "pizza", |
| "70": "bed", |
| "71": "cell phone", |
| "72": "broccoli", |
| "73": "scissors", |
| "74": "Purple", |
| "75": "boat", |
| "76": "Yes", |
| "77": "apple", |
| "78": "Blue", |
| "79": "stop sign", |
| "80": "8", |
| "81": "frisbee", |
| "82": "sports ball", |
| "83": "9", |
| "84": "fire hydrant", |
| "85": "wine glass", |
| "86": "sink", |
| "87": "baseball glove", |
| "88": "cat", |
| "89": "train", |
| "90": "banana", |
| "91": "horse", |
| "92": "above", |
| "93": "White", |
| "94": "traffic light", |
| "95": "snowboard", |
| "96": "No", |
| "97": "baseball bat", |
| "98": "person", |
| "99": "refrigerator", |
| "100": "zebra", |
| "101": "chair", |
| "102": "cup", |
| "103": "giraffe", |
| "104": "knife", |
| "105": "Right", |
| "106": "Above", |
| "107": "sandwich" |
| }, |
| "image_size": 384, |
| "initializer_range": 0.02, |
| "intermediate_size": 3072, |
| "label2id": { |
| "0": 9, |
| "1": 60, |
| "2": 39, |
| "3": 14, |
| "4": 13, |
| "5": 49, |
| "6": 40, |
| "7": 25, |
| "8": 80, |
| "9": 83, |
| "Above": 106, |
| "Behind": 22, |
| "Below": 30, |
| "Black": 58, |
| "Blue": 78, |
| "Brown": 64, |
| "Front": 52, |
| "Green": 10, |
| "Grey": 17, |
| "Left": 46, |
| "No": 96, |
| "Orange": 1, |
| "Pink": 66, |
| "Purple": 74, |
| "Red": 21, |
| "Right": 105, |
| "White": 93, |
| "Yellow": 16, |
| "Yes": 76, |
| "above": 92, |
| "airplane": 38, |
| "apple": 77, |
| "banana": 90, |
| "baseball bat": 97, |
| "baseball glove": 87, |
| "bear": 12, |
| "bed": 70, |
| "bench": 43, |
| "bicycle": 24, |
| "bird": 15, |
| "boat": 75, |
| "book": 59, |
| "bottle": 2, |
| "bowl": 33, |
| "broccoli": 72, |
| "bus": 7, |
| "cake": 51, |
| "car": 5, |
| "carrot": 67, |
| "cat": 88, |
| "cell phone": 71, |
| "chair": 101, |
| "clock": 53, |
| "couch": 62, |
| "cow": 29, |
| "cup": 102, |
| "dining table": 65, |
| "dog": 44, |
| "donut": 0, |
| "elephant": 41, |
| "fire hydrant": 84, |
| "fork": 6, |
| "frisbee": 81, |
| "giraffe": 103, |
| "handbag": 35, |
| "horse": 91, |
| "hot dog": 55, |
| "keyboard": 8, |
| "kite": 27, |
| "knife": 104, |
| "laptop": 3, |
| "left": 37, |
| "motorcycle": 48, |
| "mouse": 63, |
| "orange": 28, |
| "oven": 56, |
| "parking meter": 31, |
| "person": 98, |
| "pizza": 69, |
| "potted plant": 36, |
| "refrigerator": 99, |
| "remote": 26, |
| "right": 32, |
| "sandwich": 107, |
| "scissors": 73, |
| "sheep": 34, |
| "sink": 86, |
| "skateboard": 19, |
| "skis": 42, |
| "snowboard": 95, |
| "spoon": 23, |
| "sports ball": 82, |
| "stop sign": 79, |
| "suitcase": 18, |
| "surfboard": 68, |
| "teddy bear": 54, |
| "tennis racket": 50, |
| "tie": 61, |
| "toilet": 4, |
| "toothbrush": 57, |
| "traffic light": 94, |
| "train": 89, |
| "truck": 45, |
| "tv": 20, |
| "umbrella": 47, |
| "vase": 11, |
| "wine glass": 85, |
| "zebra": 100 |
| }, |
| "layer_norm_eps": 1e-12, |
| "max_image_length": -1, |
| "max_position_embeddings": 40, |
| "modality_type_vocab_size": 2, |
| "model_type": "vilt", |
| "num_attention_heads": 12, |
| "num_channels": 3, |
| "num_hidden_layers": 12, |
| "num_images": -1, |
| "patch_size": 32, |
| "qkv_bias": true, |
| "tie_word_embeddings": false, |
| "torch_dtype": "float32", |
| "transformers_version": "4.35.2", |
| "type_vocab_size": 2, |
| "vocab_size": 30522 |
| } |
|
|