{ "architectures": [ "ImageCaptioningModel" ], "max_seq_len": 20, "torch_dtype": "float32", "transformers_version": "4.52.3", "vocab_size": 3478 }