phonghoccode's picture
Upload CustomViltForVQA
07b4a5a verified
{
"_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
"architectures": [
"CustomViltForVQA"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "5",
"1": "zebra",
"2": "couch",
"3": "sink",
"4": "apple",
"5": "umbrella",
"6": "bus",
"7": "dog",
"8": "boat",
"9": "Grey",
"10": "suitcase",
"11": "2",
"12": "bottle",
"13": "spoon",
"14": "3",
"15": "toilet",
"16": "8",
"17": "7",
"18": "knife",
"19": "Pink",
"20": "skis",
"21": "4",
"22": "toothbrush",
"23": "surfboard",
"24": "Behind",
"25": "person",
"26": "orange",
"27": "chair",
"28": "handbag",
"29": "cow",
"30": "1",
"31": "fire hydrant",
"32": "oven",
"33": "tennis racket",
"34": "Yellow",
"35": "remote",
"36": "9",
"37": "No",
"38": "0",
"39": "carrot",
"40": "Above",
"41": "sports ball",
"42": "Purple",
"43": "snowboard",
"44": "parking meter",
"45": "mouse",
"46": "White",
"47": "clock",
"48": "dining table",
"49": "wine glass",
"50": "car",
"51": "teddy bear",
"52": "bicycle",
"53": "scissors",
"54": "keyboard",
"55": "Green",
"56": "Below",
"57": "Left",
"58": "book",
"59": "Front",
"60": "fork",
"61": "broccoli",
"62": "giraffe",
"63": "baseball glove",
"64": "Red",
"65": "Orange",
"66": "Black",
"67": "frisbee",
"68": "potted plant",
"69": "sandwich",
"70": "cup",
"71": "right",
"72": "cake",
"73": "Yes",
"74": "Right",
"75": "bed",
"76": "cell phone",
"77": "skateboard",
"78": "bowl",
"79": "truck",
"80": "donut",
"81": "above",
"82": "Brown",
"83": "kite",
"84": "cat",
"85": "traffic light",
"86": "pizza",
"87": "sheep",
"88": "elephant",
"89": "laptop",
"90": "refrigerator",
"91": "6",
"92": "banana",
"93": "Blue",
"94": "hot dog",
"95": "bear",
"96": "bird",
"97": "motorcycle",
"98": "horse",
"99": "tv",
"100": "tie",
"101": "left",
"102": "vase",
"103": "train",
"104": "baseball bat",
"105": "stop sign",
"106": "airplane",
"107": "bench"
},
"image_size": 384,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 38,
"1": 30,
"2": 11,
"3": 14,
"4": 21,
"5": 0,
"6": 91,
"7": 17,
"8": 16,
"9": 36,
"Above": 40,
"Behind": 24,
"Below": 56,
"Black": 66,
"Blue": 93,
"Brown": 82,
"Front": 59,
"Green": 55,
"Grey": 9,
"Left": 57,
"No": 37,
"Orange": 65,
"Pink": 19,
"Purple": 42,
"Red": 64,
"Right": 74,
"White": 46,
"Yellow": 34,
"Yes": 73,
"above": 81,
"airplane": 106,
"apple": 4,
"banana": 92,
"baseball bat": 104,
"baseball glove": 63,
"bear": 95,
"bed": 75,
"bench": 107,
"bicycle": 52,
"bird": 96,
"boat": 8,
"book": 58,
"bottle": 12,
"bowl": 78,
"broccoli": 61,
"bus": 6,
"cake": 72,
"car": 50,
"carrot": 39,
"cat": 84,
"cell phone": 76,
"chair": 27,
"clock": 47,
"couch": 2,
"cow": 29,
"cup": 70,
"dining table": 48,
"dog": 7,
"donut": 80,
"elephant": 88,
"fire hydrant": 31,
"fork": 60,
"frisbee": 67,
"giraffe": 62,
"handbag": 28,
"horse": 98,
"hot dog": 94,
"keyboard": 54,
"kite": 83,
"knife": 18,
"laptop": 89,
"left": 101,
"motorcycle": 97,
"mouse": 45,
"orange": 26,
"oven": 32,
"parking meter": 44,
"person": 25,
"pizza": 86,
"potted plant": 68,
"refrigerator": 90,
"remote": 35,
"right": 71,
"sandwich": 69,
"scissors": 53,
"sheep": 87,
"sink": 3,
"skateboard": 77,
"skis": 20,
"snowboard": 43,
"spoon": 13,
"sports ball": 41,
"stop sign": 105,
"suitcase": 10,
"surfboard": 23,
"teddy bear": 51,
"tennis racket": 33,
"tie": 100,
"toilet": 15,
"toothbrush": 22,
"traffic light": 85,
"train": 103,
"truck": 79,
"tv": 99,
"umbrella": 5,
"vase": 102,
"wine glass": 49,
"zebra": 1
},
"layer_norm_eps": 1e-12,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"qkv_bias": true,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.49.0",
"type_vocab_size": 2,
"vocab_size": 30522
}