Update CLIPCLAP model: contrastive loss training on AudioCaps audio embeddings
Browse files- audio_model.onnx +2 -2
- audio_projection.onnx +2 -2
- audio_projection.onnx.data +1 -1
- projection_training_metadata.json +5 -3
audio_model.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92855569ded7179bd5f401eb929fc28176cb0f4ac39d69f459627ede857026f8
|
| 3 |
+
size 3320456
|
audio_projection.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e0350792bf3ec28dfb78eb3760d2cb359151b982357d5e051dc5aaa5e00a879
|
| 3 |
+
size 12705
|
audio_projection.onnx.data
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4259840
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c041d3c0841fc7690e644179b1f787277341112a1a93235492b0c654cc61ecae
|
| 3 |
size 4259840
|
projection_training_metadata.json
CHANGED
|
@@ -2,8 +2,10 @@
|
|
| 2 |
"clip_model": "openai/clip-vit-base-patch32",
|
| 3 |
"clap_model": "laion/larger_clap_music_and_speech",
|
| 4 |
"embed_dim": 512,
|
| 5 |
-
"
|
| 6 |
-
"
|
|
|
|
|
|
|
| 7 |
"batch_size": 256,
|
| 8 |
-
"lr": 0.
|
| 9 |
}
|
|
|
|
| 2 |
"clip_model": "openai/clip-vit-base-patch32",
|
| 3 |
"clap_model": "laion/larger_clap_music_and_speech",
|
| 4 |
"embed_dim": 512,
|
| 5 |
+
"training_dataset": "OpenSound/AudioCaps",
|
| 6 |
+
"training_method": "clap_audio_to_clip_text",
|
| 7 |
+
"num_samples": 10000,
|
| 8 |
+
"epochs": 30,
|
| 9 |
"batch_size": 256,
|
| 10 |
+
"lr": 0.0001
|
| 11 |
}
|