{"num_classes": 46, "image_encoder": "resnet50", "text_encoder": "bert-base-uncased", "fusion_layer_size": 512}