{ "output_dim": 384, "text_encoder_name": "sentence-transformers/all-MiniLM-L6-v2", "image_encoder_name": "google/siglip-base-patch16-512", "audio_encoder_name": "openai/whisper-tiny", "fusion_type": "transformer", "num_fusion_layers": 2, "enable_layer_outputs": true }