| { | |
| "model_config": { | |
| "embed_dim": 1024, | |
| "num_heads": 4, | |
| "img_hidden_size": 768, | |
| "txt_hidden_size": 1024, | |
| "audio_hidden_size": 768 | |
| }, | |
| "projection_heads": { | |
| "hidden_dim": 2048, | |
| "dropout": 0.1 | |
| }, | |
| "pretrained_models": { | |
| "vit": "google/vit-base-patch16-224-in21k", | |
| "text_encoder": "microsoft/deberta-v3-large", | |
| "wav2vec": "facebook/wav2vec2-base-960h" | |
| }, | |
| "freezing_config": { | |
| "vit_unfrozen_layers": 2, | |
| "text_unfrozen_layers": 4, | |
| "audio_unfrozen_layers": 2 | |
| }, | |
| "processing_config": { | |
| "max_text_length": 32, | |
| "image_size": 224, | |
| "patch_size": 32, | |
| "video_frames": 4 | |
| } | |
| } |