clipclap / clip_config.json
CarpenterAnt91's picture
Update CLIPCLAP model with trained audio projection
1dc9521 verified
raw
history blame contribute delete
411 Bytes
{
"model_type": "clipclap",
"vision_config": {
"hidden_size": 768,
"image_size": 224,
"patch_size": 32,
"projection_dim": 512
},
"text_config": {
"hidden_size": 512,
"max_position_embeddings": 77,
"projection_dim": 512
},
"audio_config": {
"hidden_size": 1024,
"sample_rate": 48000,
"max_length_s": 10,
"projection_dim": 512
},
"projection_dim": 512
}