{ "architectures": [ "ImageCaptioningModel" ], "emb_size": 300, "model_type": "image_captioning_transformer", "num_heads": 6, "num_layers": 3, "torch_dtype": "float32", "transformers_version": "4.52.3", "vocab_size": 3478 }