CarpenterAnt91 commited on Feb 5

Commit

1dc9521

verified ·

1 Parent(s): 5767c39

Update CLIPCLAP model with trained audio projection

Files changed (18) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+audio_model.onnx.data filter=lfs diff=lfs merge=lfs -text
+audio_projection.onnx.data filter=lfs diff=lfs merge=lfs -text
+text_model.onnx.data filter=lfs diff=lfs merge=lfs -text
+text_projection.onnx.data filter=lfs diff=lfs merge=lfs -text
+visual_model.onnx.data filter=lfs diff=lfs merge=lfs -text
+visual_projection.onnx.data filter=lfs diff=lfs merge=lfs -text

audio_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:faa9d5379ba1acb8ad6e21a448ab36ea0f662400bf6409e37c88495b48b25b4d
+size 3320426

audio_model.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:318f55059a9504380f3a6e8d1765a50bf1c6c726ee0ba849d014ec950fdac87a
+size 277348352

audio_projection.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:956a052354210cdc8014adcbae668e9a4b637c1a7ad1f53d6e056065dca3be1d
+size 340

audio_projection.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb2cddffad79e72642b59b1f4c86739575534f03a87ad881bb5e09dfc84586b4
+size 1048576

clip_config.json ADDED Viewed

+{
+  "model_type": "clipclap",
+  "vision_config": {
+    "hidden_size": 768,
+    "image_size": 224,
+    "patch_size": 32,
+    "projection_dim": 512
+  },
+  "text_config": {
+    "hidden_size": 512,
+    "max_position_embeddings": 77,
+    "projection_dim": 512
+  },
+  "audio_config": {
+    "hidden_size": 1024,
+    "sample_rate": 48000,
+    "max_length_s": 10,
+    "projection_dim": 512
+  },
+  "projection_dim": 512
+}

processor_config.json ADDED Viewed

+{
+  "image_processor": {
+    "crop_size": {
+      "height": 224,
+      "width": 224
+    },
+    "do_center_crop": true,
+    "do_convert_rgb": true,
+    "do_normalize": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_mean": [
+      0.48145466,
+      0.4578275,
+      0.40821073
+    ],
+    "image_processor_type": "CLIPImageProcessor",
+    "image_std": [
+      0.26862954,
+      0.26130258,
+      0.27577711
+    ],
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "size": {
+      "shortest_edge": 224
+    }
+  },
+  "processor_class": "CLIPProcessor"
+}

projection_training_metadata.json ADDED Viewed

+{
+  "clip_model": "openai/clip-vit-base-patch32",
+  "clap_model": "laion/larger_clap_music_and_speech",
+  "embed_dim": 512,
+  "num_captions": 1000,
+  "epochs": 20,
+  "batch_size": 256,
+  "lr": 0.001
+}

text_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5501d22847954f1a203268bd3c23b0a576d98ebb6e1d573970a03bec1169956
+size 1238678

text_model.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1da1df9bcae44e6b6450fa88ca6a21af8c19735fbba2f31834ae805a41d3a125
+size 252706816

text_projection.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7dc464d70e63787590745b0ec820d361df09fc41f4bf3245856a7dd44b3c5a9
+size 339

text_projection.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:478d41294224ac5b677b45e1d5ff60b5c05cddb1c09227e8b5166ce1cb30fd51
+size 1048576

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|startoftext|>",
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

visual_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9947ea5e7826a3868800792149bebe37791b46bb56d9fcd283b48998f809cd2a
+size 1137817

visual_model.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:04ada8fe1125ce75c4629e0eaf047ec7ea0e5f43e2a518e52ba5343cea8c41c4
+size 349831168

visual_projection.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:42d814607233e9ac34b0df9dfba9d5da9e71d30c53e6d36cf7bd8024f8bd799e
+size 341

visual_projection.onnx.data ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:13295d3b6f85989c124f39c5c1581c9f6a1c0d393ea151402f757987d4327419
+size 1572864