CarpenterAnt91 commited on
Commit
a319dc7
·
verified ·
1 Parent(s): 6b55348

Update CLIPCLAP model: contrastive loss training on AudioCaps audio embeddings

Browse files
audio_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a70ad57524582540f560b1381444e4d7680d71f52fbd6240fd27f12752bb8a
3
- size 3284384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92855569ded7179bd5f401eb929fc28176cb0f4ac39d69f459627ede857026f8
3
+ size 3320456
audio_projection.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe8aefc7c9e8b6809584f9748f759acfeb5254d4fb3d070658b75817effbb9f9
3
- size 1521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0350792bf3ec28dfb78eb3760d2cb359151b982357d5e051dc5aaa5e00a879
3
+ size 12705
audio_projection.onnx.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e968f01d6a4e88089e72d5c5a173f4cdcfa830ec2f6ef1e63822810e90c0fc5f
3
  size 4259840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c041d3c0841fc7690e644179b1f787277341112a1a93235492b0c654cc61ecae
3
  size 4259840
projection_training_metadata.json CHANGED
@@ -2,8 +2,10 @@
2
  "clip_model": "openai/clip-vit-base-patch32",
3
  "clap_model": "laion/larger_clap_music_and_speech",
4
  "embed_dim": 512,
5
- "num_captions": 1000,
6
- "epochs": 20,
 
 
7
  "batch_size": 256,
8
- "lr": 0.001
9
  }
 
2
  "clip_model": "openai/clip-vit-base-patch32",
3
  "clap_model": "laion/larger_clap_music_and_speech",
4
  "embed_dim": 512,
5
+ "training_dataset": "OpenSound/AudioCaps",
6
+ "training_method": "clap_audio_to_clip_text",
7
+ "num_samples": 10000,
8
+ "epochs": 30,
9
  "batch_size": 256,
10
+ "lr": 0.0001
11
  }