| { | |
| "_name_or_path": "Salesforce/blip-vqa-base", | |
| "architectures": [ | |
| "ViltForQuestionAnswering" | |
| ], | |
| "attention_probs_dropout_prob": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "birthday", | |
| "1": "walking", | |
| "2": "talking on phone", | |
| "3": "woman", | |
| "4": "white and blue", | |
| "5": "net", | |
| "6": "window", | |
| "7": "women", | |
| "8": "out", | |
| "9": "blue and white", | |
| "10": "wine", | |
| "11": "small", | |
| "12": "dirt", | |
| "13": "king", | |
| "14": "girl", | |
| "15": "9:35", | |
| "16": "snowboarding", | |
| "17": "full", | |
| "18": "shrimp", | |
| "19": "rack", | |
| "20": "red", | |
| "21": "red and yellow", | |
| "22": "not there", | |
| "23": "necklace", | |
| "24": "unknown", | |
| "25": "crown", | |
| "26": "clear", | |
| "27": "bikes", | |
| "28": "right", | |
| "29": "ground", | |
| "30": "4", | |
| "31": "skier", | |
| "32": "skiing", | |
| "33": "7:35", | |
| "34": "down", | |
| "35": "hat", | |
| "36": "skateboard", | |
| "37": "tired", | |
| "38": "screen", | |
| "39": "plain", | |
| "40": "name tag", | |
| "41": "8", | |
| "42": "snowboarder", | |
| "43": "cloudy", | |
| "44": "zoo", | |
| "45": "boy", | |
| "46": "style", | |
| "47": "0", | |
| "48": "wine tasting", | |
| "49": "7", | |
| "50": "ice cream", | |
| "51": "smiling", | |
| "52": "blue", | |
| "53": "watching", | |
| "54": "neon", | |
| "55": "beige", | |
| "56": "can't tell", | |
| "57": "bicycle", | |
| "58": "doughnut", | |
| "59": "calico", | |
| "60": "giraffes", | |
| "61": "10", | |
| "62": "jeep", | |
| "63": "6", | |
| "64": "cage", | |
| "65": "outside", | |
| "66": "tower", | |
| "67": "gray", | |
| "68": "happy", | |
| "69": "chair", | |
| "70": "5", | |
| "71": "sky", | |
| "72": "station", | |
| "73": "lanyard", | |
| "74": "cat", | |
| "75": "solid", | |
| "76": "suv", | |
| "77": "picnic table", | |
| "78": "little girl", | |
| "79": "lying down", | |
| "80": "plastic", | |
| "81": "snow", | |
| "82": "queen", | |
| "83": "hair", | |
| "84": "big ben", | |
| "85": "windows", | |
| "86": "human", | |
| "87": "smile", | |
| "88": "park", | |
| "89": "in car", | |
| "90": "canopy", | |
| "91": "on street", | |
| "92": "curtain", | |
| "93": "green", | |
| "94": "low", | |
| "95": "white", | |
| "96": "person", | |
| "97": "orange", | |
| "98": "bike rack", | |
| "99": "french", | |
| "100": "lg", | |
| "101": "resting", | |
| "102": "backpack", | |
| "103": "bus", | |
| "104": "roof", | |
| "105": "church", | |
| "106": "fence", | |
| "107": "cup", | |
| "108": "cross", | |
| "109": "wall", | |
| "110": "forest", | |
| "111": "ball", | |
| "112": "snowboard", | |
| "113": "chopsticks", | |
| "114": "arrow", | |
| "115": "plate", | |
| "116": "bricks", | |
| "117": "clock", | |
| "118": "wedding", | |
| "119": "3", | |
| "120": "black and white", | |
| "121": "don't know", | |
| "122": "soccer", | |
| "123": "lady", | |
| "124": "platform", | |
| "125": "train", | |
| "126": "brick", | |
| "127": "leather", | |
| "128": "pink", | |
| "129": "giraffe", | |
| "130": "camera", | |
| "131": "yellow", | |
| "132": "purple", | |
| "133": "hawaii", | |
| "134": "tan", | |
| "135": "man", | |
| "136": "no", | |
| "137": "crossing", | |
| "138": "blonde", | |
| "139": "gray and black", | |
| "140": "africa", | |
| "141": "door", | |
| "142": "at table", | |
| "143": "laying down", | |
| "144": "2", | |
| "145": "8:35", | |
| "146": "table", | |
| "147": "curtains", | |
| "148": "photographer", | |
| "149": "beagle", | |
| "150": "talking", | |
| "151": "2010", | |
| "152": "nothing", | |
| "153": "1", | |
| "154": "double", | |
| "155": "yes", | |
| "156": "bedroom", | |
| "157": "2013", | |
| "158": "street", | |
| "159": "sidewalk", | |
| "160": "fashion", | |
| "161": "tent", | |
| "162": "dog", | |
| "163": "shade", | |
| "164": "shadows", | |
| "165": "black", | |
| "166": "exit", | |
| "167": "red and blue", | |
| "168": "donut", | |
| "169": "car", | |
| "170": "7:45", | |
| "171": "many", | |
| "172": "soccer ball", | |
| "173": "tv", | |
| "174": "desert", | |
| "175": "large", | |
| "176": "monitor", | |
| "177": "brown", | |
| "178": "stripes", | |
| "179": "tabby", | |
| "180": "sun", | |
| "181": "not sure", | |
| "182": "protection", | |
| "183": "sleeping", | |
| "184": "clock tower", | |
| "185": "bicycles", | |
| "186": "they aren't", | |
| "187": "woods", | |
| "188": "2000", | |
| "189": "on road", | |
| "190": "stand", | |
| "191": "shadow", | |
| "192": "white and black", | |
| "193": "trees", | |
| "194": "security", | |
| "195": "air", | |
| "196": "natural", | |
| "197": "shelter", | |
| "198": "skateboarding" | |
| }, | |
| "image_size": 384, | |
| "image_text_hidden_size": 256, | |
| "initializer_factor": 1.0, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label2id": { | |
| "0": 47, | |
| "1": 153, | |
| "10": 61, | |
| "2": 144, | |
| "2000": 188, | |
| "2010": 151, | |
| "2013": 157, | |
| "3": 119, | |
| "4": 30, | |
| "5": 70, | |
| "6": 63, | |
| "7": 49, | |
| "7:35": 33, | |
| "7:45": 170, | |
| "8": 41, | |
| "8:35": 145, | |
| "9:35": 15, | |
| "africa": 140, | |
| "air": 195, | |
| "arrow": 114, | |
| "at table": 142, | |
| "backpack": 102, | |
| "ball": 111, | |
| "beagle": 149, | |
| "bedroom": 156, | |
| "beige": 55, | |
| "bicycle": 57, | |
| "bicycles": 185, | |
| "big ben": 84, | |
| "bike rack": 98, | |
| "bikes": 27, | |
| "birthday": 0, | |
| "black": 165, | |
| "black and white": 120, | |
| "blonde": 138, | |
| "blue": 52, | |
| "blue and white": 9, | |
| "boy": 45, | |
| "brick": 126, | |
| "bricks": 116, | |
| "brown": 177, | |
| "bus": 103, | |
| "cage": 64, | |
| "calico": 59, | |
| "camera": 130, | |
| "can't tell": 56, | |
| "canopy": 90, | |
| "car": 169, | |
| "cat": 74, | |
| "chair": 69, | |
| "chopsticks": 113, | |
| "church": 105, | |
| "clear": 26, | |
| "clock": 117, | |
| "clock tower": 184, | |
| "cloudy": 43, | |
| "cross": 108, | |
| "crossing": 137, | |
| "crown": 25, | |
| "cup": 107, | |
| "curtain": 92, | |
| "curtains": 147, | |
| "desert": 174, | |
| "dirt": 12, | |
| "dog": 162, | |
| "don't know": 121, | |
| "donut": 168, | |
| "door": 141, | |
| "double": 154, | |
| "doughnut": 58, | |
| "down": 34, | |
| "exit": 166, | |
| "fashion": 160, | |
| "fence": 106, | |
| "forest": 110, | |
| "french": 99, | |
| "full": 17, | |
| "giraffe": 129, | |
| "giraffes": 60, | |
| "girl": 14, | |
| "gray": 67, | |
| "gray and black": 139, | |
| "green": 93, | |
| "ground": 29, | |
| "hair": 83, | |
| "happy": 68, | |
| "hat": 35, | |
| "hawaii": 133, | |
| "human": 86, | |
| "ice cream": 50, | |
| "in car": 89, | |
| "jeep": 62, | |
| "king": 13, | |
| "lady": 123, | |
| "lanyard": 73, | |
| "large": 175, | |
| "laying down": 143, | |
| "leather": 127, | |
| "lg": 100, | |
| "little girl": 78, | |
| "low": 94, | |
| "lying down": 79, | |
| "man": 135, | |
| "many": 171, | |
| "monitor": 176, | |
| "name tag": 40, | |
| "natural": 196, | |
| "necklace": 23, | |
| "neon": 54, | |
| "net": 5, | |
| "no": 136, | |
| "not sure": 181, | |
| "not there": 22, | |
| "nothing": 152, | |
| "on road": 189, | |
| "on street": 91, | |
| "orange": 97, | |
| "out": 8, | |
| "outside": 65, | |
| "park": 88, | |
| "person": 96, | |
| "photographer": 148, | |
| "picnic table": 77, | |
| "pink": 128, | |
| "plain": 39, | |
| "plastic": 80, | |
| "plate": 115, | |
| "platform": 124, | |
| "protection": 182, | |
| "purple": 132, | |
| "queen": 82, | |
| "rack": 19, | |
| "red": 20, | |
| "red and blue": 167, | |
| "red and yellow": 21, | |
| "resting": 101, | |
| "right": 28, | |
| "roof": 104, | |
| "screen": 38, | |
| "security": 194, | |
| "shade": 163, | |
| "shadow": 191, | |
| "shadows": 164, | |
| "shelter": 197, | |
| "shrimp": 18, | |
| "sidewalk": 159, | |
| "skateboard": 36, | |
| "skateboarding": 198, | |
| "skier": 31, | |
| "skiing": 32, | |
| "sky": 71, | |
| "sleeping": 183, | |
| "small": 11, | |
| "smile": 87, | |
| "smiling": 51, | |
| "snow": 81, | |
| "snowboard": 112, | |
| "snowboarder": 42, | |
| "snowboarding": 16, | |
| "soccer": 122, | |
| "soccer ball": 172, | |
| "solid": 75, | |
| "stand": 190, | |
| "station": 72, | |
| "street": 158, | |
| "stripes": 178, | |
| "style": 46, | |
| "sun": 180, | |
| "suv": 76, | |
| "tabby": 179, | |
| "table": 146, | |
| "talking": 150, | |
| "talking on phone": 2, | |
| "tan": 134, | |
| "tent": 161, | |
| "they aren't": 186, | |
| "tired": 37, | |
| "tower": 66, | |
| "train": 125, | |
| "trees": 193, | |
| "tv": 173, | |
| "unknown": 24, | |
| "walking": 1, | |
| "wall": 109, | |
| "watching": 53, | |
| "wedding": 118, | |
| "white": 95, | |
| "white and black": 192, | |
| "white and blue": 4, | |
| "window": 6, | |
| "windows": 85, | |
| "wine": 10, | |
| "wine tasting": 48, | |
| "woman": 3, | |
| "women": 7, | |
| "woods": 187, | |
| "yellow": 131, | |
| "yes": 155, | |
| "zoo": 44 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "logit_scale_init_value": 2.6592, | |
| "max_image_length": -1, | |
| "max_position_embeddings": 40, | |
| "modality_type_vocab_size": 2, | |
| "model_type": "vilt", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_images": -1, | |
| "patch_size": 32, | |
| "projection_dim": 512, | |
| "qkv_bias": true, | |
| "text_config": { | |
| "_name_or_path": "", | |
| "add_cross_attention": false, | |
| "architectures": null, | |
| "attention_probs_dropout_prob": 0.0, | |
| "bad_words_ids": null, | |
| "begin_suppress_tokens": null, | |
| "bos_token_id": 30522, | |
| "chunk_size_feed_forward": 0, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "diversity_penalty": 0.0, | |
| "do_sample": false, | |
| "early_stopping": false, | |
| "encoder_no_repeat_ngram_size": 0, | |
| "eos_token_id": 2, | |
| "exponential_decay_length_penalty": null, | |
| "finetuning_task": null, | |
| "forced_bos_token_id": null, | |
| "forced_eos_token_id": null, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1" | |
| }, | |
| "initializer_factor": 1.0, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "is_decoder": true, | |
| "is_encoder_decoder": false, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "length_penalty": 1.0, | |
| "max_length": 20, | |
| "max_position_embeddings": 512, | |
| "min_length": 0, | |
| "model_type": "blip_text_model", | |
| "no_repeat_ngram_size": 0, | |
| "num_attention_heads": 12, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "num_hidden_layers": 12, | |
| "num_return_sequences": 1, | |
| "output_attentions": false, | |
| "output_hidden_states": false, | |
| "output_scores": false, | |
| "pad_token_id": 0, | |
| "prefix": null, | |
| "problem_type": null, | |
| "projection_dim": 768, | |
| "pruned_heads": {}, | |
| "remove_invalid_values": false, | |
| "repetition_penalty": 1.0, | |
| "return_dict": true, | |
| "return_dict_in_generate": false, | |
| "sep_token_id": 102, | |
| "suppress_tokens": null, | |
| "task_specific_params": null, | |
| "temperature": 1.0, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": true, | |
| "tokenizer_class": null, | |
| "top_k": 50, | |
| "top_p": 1.0, | |
| "torch_dtype": null, | |
| "torchscript": false, | |
| "transformers_version": "4.26.0.dev0", | |
| "typical_p": 1.0, | |
| "use_bfloat16": false, | |
| "use_cache": true, | |
| "vocab_size": 30524 | |
| }, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.42.4", | |
| "type_vocab_size": 2, | |
| "vision_config": { | |
| "_name_or_path": "", | |
| "add_cross_attention": false, | |
| "architectures": null, | |
| "attention_dropout": 0.0, | |
| "bad_words_ids": null, | |
| "begin_suppress_tokens": null, | |
| "bos_token_id": null, | |
| "chunk_size_feed_forward": 0, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "diversity_penalty": 0.0, | |
| "do_sample": false, | |
| "dropout": 0.0, | |
| "early_stopping": false, | |
| "encoder_no_repeat_ngram_size": 0, | |
| "eos_token_id": null, | |
| "exponential_decay_length_penalty": null, | |
| "finetuning_task": null, | |
| "forced_bos_token_id": null, | |
| "forced_eos_token_id": null, | |
| "hidden_act": "gelu", | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1" | |
| }, | |
| "image_size": 384, | |
| "initializer_factor": 1.0, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "is_decoder": false, | |
| "is_encoder_decoder": false, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1 | |
| }, | |
| "layer_norm_eps": 1e-05, | |
| "length_penalty": 1.0, | |
| "max_length": 20, | |
| "min_length": 0, | |
| "model_type": "blip_vision_model", | |
| "no_repeat_ngram_size": 0, | |
| "num_attention_heads": 12, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_return_sequences": 1, | |
| "output_attentions": false, | |
| "output_hidden_states": false, | |
| "output_scores": false, | |
| "pad_token_id": null, | |
| "patch_size": 16, | |
| "prefix": null, | |
| "problem_type": null, | |
| "projection_dim": 512, | |
| "pruned_heads": {}, | |
| "remove_invalid_values": false, | |
| "repetition_penalty": 1.0, | |
| "return_dict": true, | |
| "return_dict_in_generate": false, | |
| "sep_token_id": null, | |
| "suppress_tokens": null, | |
| "task_specific_params": null, | |
| "temperature": 1.0, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": true, | |
| "tokenizer_class": null, | |
| "top_k": 50, | |
| "top_p": 1.0, | |
| "torch_dtype": null, | |
| "torchscript": false, | |
| "transformers_version": "4.26.0.dev0", | |
| "typical_p": 1.0, | |
| "use_bfloat16": false | |
| }, | |
| "vocab_size": 30522 | |
| } | |