{ "model_type": "clipclap", "vision_config": { "hidden_size": 768, "image_size": 224, "patch_size": 32, "projection_dim": 512 }, "text_config": { "hidden_size": 512, "max_position_embeddings": 77, "projection_dim": 512 }, "audio_config": { "hidden_size": 1024, "sample_rate": 48000, "max_length_s": 10, "projection_dim": 512 }, "projection_dim": 512 }