phonghoccode's picture
Upload CustomViltForVQA
f7ac1f0 verified
{
"_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
"architectures": [
"CustomViltForVQA"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "boat",
"1": "spoon",
"2": "bench",
"3": "banana",
"4": "cat",
"5": "oven",
"6": "bottle",
"7": "No",
"8": "Yes",
"9": "knife",
"10": "clock",
"11": "5",
"12": "mouse",
"13": "cup",
"14": "sheep",
"15": "dining table",
"16": "fork",
"17": "refrigerator",
"18": "zebra",
"19": "broccoli",
"20": "dog",
"21": "skateboard",
"22": "Black",
"23": "bed",
"24": "motorcycle",
"25": "pizza",
"26": "donut",
"27": "skis",
"28": "chair",
"29": "tennis racket",
"30": "bird",
"31": "potted plant",
"32": "sports ball",
"33": "Brown",
"34": "laptop",
"35": "elephant",
"36": "horse",
"37": "Blue",
"38": "suitcase",
"39": "hot dog",
"40": "4",
"41": "Orange",
"42": "Purple",
"43": "handbag",
"44": "cow",
"45": "fire hydrant",
"46": "snowboard",
"47": "toothbrush",
"48": "Below",
"49": "parking meter",
"50": "Front",
"51": "Right",
"52": "cake",
"53": "tv",
"54": "9",
"55": "tie",
"56": "orange",
"57": "wine glass",
"58": "cell phone",
"59": "stop sign",
"60": "right",
"61": "Pink",
"62": "giraffe",
"63": "scissors",
"64": "1",
"65": "7",
"66": "keyboard",
"67": "Yellow",
"68": "3",
"69": "remote",
"70": "bear",
"71": "car",
"72": "truck",
"73": "surfboard",
"74": "traffic light",
"75": "left",
"76": "bus",
"77": "frisbee",
"78": "couch",
"79": "Red",
"80": "Left",
"81": "6",
"82": "toilet",
"83": "airplane",
"84": "Grey",
"85": "8",
"86": "above",
"87": "baseball glove",
"88": "vase",
"89": "kite",
"90": "bowl",
"91": "0",
"92": "2",
"93": "White",
"94": "Behind",
"95": "Above",
"96": "baseball bat",
"97": "Green",
"98": "person",
"99": "sandwich",
"100": "sink",
"101": "book",
"102": "train",
"103": "umbrella",
"104": "carrot",
"105": "bicycle",
"106": "apple",
"107": "teddy bear"
},
"image_size": 384,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 91,
"1": 64,
"2": 92,
"3": 68,
"4": 40,
"5": 11,
"6": 81,
"7": 65,
"8": 85,
"9": 54,
"Above": 95,
"Behind": 94,
"Below": 48,
"Black": 22,
"Blue": 37,
"Brown": 33,
"Front": 50,
"Green": 97,
"Grey": 84,
"Left": 80,
"No": 7,
"Orange": 41,
"Pink": 61,
"Purple": 42,
"Red": 79,
"Right": 51,
"White": 93,
"Yellow": 67,
"Yes": 8,
"above": 86,
"airplane": 83,
"apple": 106,
"banana": 3,
"baseball bat": 96,
"baseball glove": 87,
"bear": 70,
"bed": 23,
"bench": 2,
"bicycle": 105,
"bird": 30,
"boat": 0,
"book": 101,
"bottle": 6,
"bowl": 90,
"broccoli": 19,
"bus": 76,
"cake": 52,
"car": 71,
"carrot": 104,
"cat": 4,
"cell phone": 58,
"chair": 28,
"clock": 10,
"couch": 78,
"cow": 44,
"cup": 13,
"dining table": 15,
"dog": 20,
"donut": 26,
"elephant": 35,
"fire hydrant": 45,
"fork": 16,
"frisbee": 77,
"giraffe": 62,
"handbag": 43,
"horse": 36,
"hot dog": 39,
"keyboard": 66,
"kite": 89,
"knife": 9,
"laptop": 34,
"left": 75,
"motorcycle": 24,
"mouse": 12,
"orange": 56,
"oven": 5,
"parking meter": 49,
"person": 98,
"pizza": 25,
"potted plant": 31,
"refrigerator": 17,
"remote": 69,
"right": 60,
"sandwich": 99,
"scissors": 63,
"sheep": 14,
"sink": 100,
"skateboard": 21,
"skis": 27,
"snowboard": 46,
"spoon": 1,
"sports ball": 32,
"stop sign": 59,
"suitcase": 38,
"surfboard": 73,
"teddy bear": 107,
"tennis racket": 29,
"tie": 55,
"toilet": 82,
"toothbrush": 47,
"traffic light": 74,
"train": 102,
"truck": 72,
"tv": 53,
"umbrella": 103,
"vase": 88,
"wine glass": 57,
"zebra": 18
},
"layer_norm_eps": 1e-12,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"qkv_bias": true,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.47.0",
"type_vocab_size": 2,
"vocab_size": 30522
}