| { | |
| "clip_model": "openai/clip-vit-base-patch32", | |
| "clap_model": "laion/larger_clap_music_and_speech", | |
| "embed_dim": 512, | |
| "training_dataset": "OpenSound/AudioCaps", | |
| "training_method": "clap_audio_to_clip_text", | |
| "num_samples": 10000, | |
| "epochs": 30, | |
| "batch_size": 256, | |
| "lr": 0.0001 | |
| } |