| { | |
| "_name_or_path": "google/owlvit-base-patch32", | |
| "architectures": [ | |
| "OwlViTForObjectDetection" | |
| ], | |
| "id2label": { | |
| "0": 6, | |
| "1": 7, | |
| "10": 21, | |
| "11": 22, | |
| "12": 23, | |
| "13": 24, | |
| "14": 25, | |
| "15": 26, | |
| "16": 27, | |
| "17": 28, | |
| "18": 29, | |
| "19": 30, | |
| "2": 8, | |
| "20": 31, | |
| "21": 32, | |
| "22": 33, | |
| "23": 34, | |
| "24": 35, | |
| "25": 36, | |
| "26": 37, | |
| "27": 38, | |
| "28": 40, | |
| "29": 41, | |
| "3": 14, | |
| "30": 42, | |
| "31": 43, | |
| "32": 44, | |
| "33": 45, | |
| "34": 46, | |
| "35": 47, | |
| "36": 48, | |
| "4": 15, | |
| "5": 16, | |
| "6": 17, | |
| "7": 18, | |
| "8": 19, | |
| "9": 20 | |
| }, | |
| "initializer_factor": 1.0, | |
| "label2id": { | |
| "6": "0", | |
| "7": "1", | |
| "8": "2", | |
| "14": "3", | |
| "15": "4", | |
| "16": "5", | |
| "17": "6", | |
| "18": "7", | |
| "19": "8", | |
| "20": "9", | |
| "21": "10", | |
| "22": "11", | |
| "23": "12", | |
| "24": "13", | |
| "25": "14", | |
| "26": "15", | |
| "27": "16", | |
| "28": "17", | |
| "29": "18", | |
| "30": "19", | |
| "31": "20", | |
| "32": "21", | |
| "33": "22", | |
| "34": "23", | |
| "35": "24", | |
| "36": "25", | |
| "37": "26", | |
| "38": "27", | |
| "40": "28", | |
| "41": "29", | |
| "42": "30", | |
| "43": "31", | |
| "44": "32", | |
| "45": "33", | |
| "46": "34", | |
| "47": "35", | |
| "48": "36" | |
| }, | |
| "logit_scale_init_value": 2.6592, | |
| "model_type": "owlvit", | |
| "projection_dim": 512, | |
| "text_config": { | |
| "bos_token_id": 0, | |
| "dropout": 0.0, | |
| "eos_token_id": 2, | |
| "max_length": 16, | |
| "model_type": "owlvit_text_model", | |
| "pad_token_id": 1 | |
| }, | |
| "text_config_dict": null, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.38.2", | |
| "vision_config": { | |
| "dropout": 0.0, | |
| "model_type": "owlvit_vision_model" | |
| }, | |
| "vision_config_dict": null | |
| } | |