vilt-vqa-finetune / config.json
phonghoccode's picture
Upload CustomViltForVQA
56de355 verified
raw
history blame
4.81 kB
{
"_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
"architectures": [
"CustomViltForVQA"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "boat",
"1": "above",
"2": "apple",
"3": "clock",
"4": "9",
"5": "wine glass",
"6": "mouse",
"7": "2",
"8": "sheep",
"9": "couch",
"10": "snowboard",
"11": "Yes",
"12": "spoon",
"13": "sports ball",
"14": "handbag",
"15": "Blue",
"16": "5",
"17": "laptop",
"18": "toothbrush",
"19": "carrot",
"20": "cake",
"21": "dog",
"22": "White",
"23": "motorcycle",
"24": "chair",
"25": "train",
"26": "zebra",
"27": "6",
"28": "person",
"29": "parking meter",
"30": "Brown",
"31": "car",
"32": "sink",
"33": "right",
"34": "bear",
"35": "Right",
"36": "sandwich",
"37": "Grey",
"38": "Red",
"39": "surfboard",
"40": "bowl",
"41": "airplane",
"42": "potted plant",
"43": "7",
"44": "tv",
"45": "suitcase",
"46": "kite",
"47": "tennis racket",
"48": "bench",
"49": "0",
"50": "bed",
"51": "dining table",
"52": "No",
"53": "refrigerator",
"54": "giraffe",
"55": "Purple",
"56": "hot dog",
"57": "truck",
"58": "vase",
"59": "Orange",
"60": "tie",
"61": "broccoli",
"62": "umbrella",
"63": "Green",
"64": "left",
"65": "stop sign",
"66": "cat",
"67": "teddy bear",
"68": "bicycle",
"69": "orange",
"70": "3",
"71": "scissors",
"72": "baseball glove",
"73": "frisbee",
"74": "4",
"75": "keyboard",
"76": "banana",
"77": "bus",
"78": "1",
"79": "Black",
"80": "8",
"81": "fork",
"82": "baseball bat",
"83": "donut",
"84": "book",
"85": "Above",
"86": "skis",
"87": "oven",
"88": "Below",
"89": "cell phone",
"90": "Left",
"91": "fire hydrant",
"92": "Behind",
"93": "cup",
"94": "elephant",
"95": "pizza",
"96": "bird",
"97": "Pink",
"98": "knife",
"99": "skateboard",
"100": "horse",
"101": "Front",
"102": "Yellow",
"103": "traffic light",
"104": "remote",
"105": "cow",
"106": "toilet",
"107": "bottle"
},
"image_size": 384,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 49,
"1": 78,
"2": 7,
"3": 70,
"4": 74,
"5": 16,
"6": 27,
"7": 43,
"8": 80,
"9": 4,
"Above": 85,
"Behind": 92,
"Below": 88,
"Black": 79,
"Blue": 15,
"Brown": 30,
"Front": 101,
"Green": 63,
"Grey": 37,
"Left": 90,
"No": 52,
"Orange": 59,
"Pink": 97,
"Purple": 55,
"Red": 38,
"Right": 35,
"White": 22,
"Yellow": 102,
"Yes": 11,
"above": 1,
"airplane": 41,
"apple": 2,
"banana": 76,
"baseball bat": 82,
"baseball glove": 72,
"bear": 34,
"bed": 50,
"bench": 48,
"bicycle": 68,
"bird": 96,
"boat": 0,
"book": 84,
"bottle": 107,
"bowl": 40,
"broccoli": 61,
"bus": 77,
"cake": 20,
"car": 31,
"carrot": 19,
"cat": 66,
"cell phone": 89,
"chair": 24,
"clock": 3,
"couch": 9,
"cow": 105,
"cup": 93,
"dining table": 51,
"dog": 21,
"donut": 83,
"elephant": 94,
"fire hydrant": 91,
"fork": 81,
"frisbee": 73,
"giraffe": 54,
"handbag": 14,
"horse": 100,
"hot dog": 56,
"keyboard": 75,
"kite": 46,
"knife": 98,
"laptop": 17,
"left": 64,
"motorcycle": 23,
"mouse": 6,
"orange": 69,
"oven": 87,
"parking meter": 29,
"person": 28,
"pizza": 95,
"potted plant": 42,
"refrigerator": 53,
"remote": 104,
"right": 33,
"sandwich": 36,
"scissors": 71,
"sheep": 8,
"sink": 32,
"skateboard": 99,
"skis": 86,
"snowboard": 10,
"spoon": 12,
"sports ball": 13,
"stop sign": 65,
"suitcase": 45,
"surfboard": 39,
"teddy bear": 67,
"tennis racket": 47,
"tie": 60,
"toilet": 106,
"toothbrush": 18,
"traffic light": 103,
"train": 25,
"truck": 57,
"tv": 44,
"umbrella": 62,
"vase": 58,
"wine glass": 5,
"zebra": 26
},
"layer_norm_eps": 1e-12,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"qkv_bias": true,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.35.2",
"type_vocab_size": 2,
"vocab_size": 30522
}