esarkans commited on Sep 4, 2025

Commit

9a6f064

verified ·

1 Parent(s): 3cadadd

Upload folder using huggingface_hub

Browse files

Files changed (27) hide show

.gitattributes +2 -0
README.md +60 -0
fig_accuracy_latency.png +3 -0
mobileclip_blt_image.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_blt_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_blt_image.mlpackage/Manifest.json +18 -0
mobileclip_blt_text.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_blt_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_blt_text.mlpackage/Manifest.json +18 -0
mobileclip_s0_image.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s0_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s0_image.mlpackage/Manifest.json +18 -0
mobileclip_s0_text.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s0_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s0_text.mlpackage/Manifest.json +18 -0
mobileclip_s1_image.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s1_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s1_image.mlpackage/Manifest.json +18 -0
mobileclip_s1_text.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s1_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s1_text.mlpackage/Manifest.json +18 -0
mobileclip_s2_image.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s2_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s2_image.mlpackage/Manifest.json +18 -0
mobileclip_s2_text.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mobileclip_s2_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mobileclip_s2_text.mlpackage/Manifest.json +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mce_example.gif filter=lfs diff=lfs merge=lfs -text
+fig_accuracy_latency.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,60 @@

+---
+library_name: coreml
+license: other
+license_name: apple-ascl
+license_link: https://github.com/apple/ml-mobileclip/blob/main/LICENSE_weights_data
+datasets:
+- apple/DataCompDR-1B
+---
+# MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training
+MobileCLIP was introduced in [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training
+](https://arxiv.org/pdf/2311.17049.pdf) (CVPR 2024), by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
+This repository contains the text and image encoders of all variants of MobileCLIP exported to Core ML. These Core ML models can be plugged-into the demo app provided in the official [MobileCLIP repo](https://github.com/apple/ml-mobileclip)
+<img src="mce_example.gif" width="240" height="540" />
+### Highlights
+* Our smallest variant `MobileCLIP-S0` obtains similar zero-shot performance as [OpenAI](https://arxiv.org/abs/2103.00020)'s ViT-B/16 model while being 4.8x faster and 2.8x smaller.
+* `MobileCLIP-S2` obtains better avg zero-shot performance than [SigLIP](https://arxiv.org/abs/2303.15343)'s ViT-B/16 model while being 2.3x faster and 2.1x smaller, and trained with 3x less seen samples.
+* `MobileCLIP-B`(LT) attains zero-shot ImageNet performance of **77.2%** which is significantly better than recent works like [DFN](https://arxiv.org/abs/2309.17425) and [SigLIP](https://arxiv.org/abs/2303.15343) with similar architectures or even [OpenAI's ViT-L/14@336](https://arxiv.org/abs/2103.00020).
+## Checkpoints
+| Model                                                     | # Seen <BR>Samples (B) | # Params (M) <BR> (img + txt) | Latency (ms) <BR> (img + txt) | IN-1k Zero-Shot <BR> Top-1 Acc. (%) | Avg. Perf. (%) <BR> on 38 datasets |
+|:----------------------------------------------------------|:----------------------:|:-----------------------------:|:-----------------------------:|:-----------------------------------:|:----------------------------------:|
+| [MobileCLIP-S0](https://hf.co/pcuenq/MobileCLIP-S0)       |           13           |          11.4 + 42.4          |           1.5 + 1.6           |                67.8                 |                58.1                |
+| [MobileCLIP-S1](https://hf.co/pcuenq/MobileCLIP-S1)       |           13           |          21.5 + 63.4          |           2.5 + 3.3           |                72.6                 |                61.3                |
+| [MobileCLIP-S2](https://hf.co/pcuenq/MobileCLIP-S2)       |           13           |          35.7 + 63.4          |           3.6 + 3.3           |                74.4                 |                63.7                |
+| [MobileCLIP-B](https://hf.co/pcuenq/MobileCLIP-B)         |           13           |          86.3 + 63.4          |          10.4 + 3.3           |                76.8                 |                65.2                |
+| [MobileCLIP-B (LT)](https://hf.co/pcuenq/MobileCLIP-B-LT) |           36           |          86.3 + 63.4          |          10.4 + 3.3           |                77.2                 |                65.8                |
+## Download
+Install `huggingface-cli`
+```bash
+brew install huggingface-cli
+```
+```bash
+huggingface-cli download --local-dir models apple/coreml-mobileclip
+```
+## Citation
+**[MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/pdf/2311.17049.pdf). (CVPR 2024)**
+*Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.*
+```bibtex
+@InProceedings{mobileclip2024,
+  author = {Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel},
+  title = {MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  month = {June},
+  year = {2024},
+}
+```

fig_accuracy_latency.png ADDED Viewed

Git LFS Details

SHA256: 3518a573b474cb0dee8b08ac87925251cf446eb3744a2beb1c807c5ecb5ef840
Pointer size: 131 Bytes
Size of remote file: 437 kB

mobileclip_blt_image.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3acaec5c9eca2f27b7dc6d3bffb19cbb94d34e97cdd8aec70987e4ae7de09fae
+size 136798

mobileclip_blt_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c12ec418eadf5d536f11e2e575b26c0d0bbc1270a7080d97f218a0a11595c289
+size 172707392

mobileclip_blt_image.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "9D0B0D8C-A92C-4048-972A-F216AE35ADCD": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "F030A6CD-0EE5-41C8-BEEB-87EB2DC00BAB": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "F030A6CD-0EE5-41C8-BEEB-87EB2DC00BAB"
+}

mobileclip_blt_text.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a96162d32dcf3ba80c313d6126152a1f70e102a64547e4c8d1f69ce7f9b2384
+size 130291

mobileclip_blt_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd0a34acafc0fd5fd142afd2a0c27aacd16edcce8aa7410870f82541a3fec000
+size 126878848

mobileclip_blt_text.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "33EC9AB6-4248-4192-957C-43971D774DC0": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "F9218945-054B-4065-A433-A98BE465BE19": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "F9218945-054B-4065-A433-A98BE465BE19"
+}

mobileclip_s0_image.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c1afa132c41c6535817cc67894bd7484bc2cbd084ed5e2f12b24f611af17591
+size 153260

mobileclip_s0_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87d8f63997bbd2f38ba7defeaaa2c571928bdece56aa9629542198b3ce906ed6
+size 22717696

mobileclip_s0_image.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "CD146415-3A4F-4A31-922F-453338F52D17": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "F472A9FF-F93F-4BD2-BE9A-100E411F6DE2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "F472A9FF-F93F-4BD2-BE9A-100E411F6DE2"
+}

mobileclip_s0_text.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81eba836ff4dbc8ae021d70006288b533ba7eed3c2973d245b0d5ea047305bfd
+size 57953

mobileclip_s0_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34723e51445b2630106e94e1fdbebed80e7676b404fb839f4eb9bec97bdcad68
+size 84871616

mobileclip_s0_text.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "5FE5957F-3F75-4966-8C7E-1B7913FD0B76": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "9073107D-294B-4CCE-A1A0-7D9A18658D3C": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "5FE5957F-3F75-4966-8C7E-1B7913FD0B76"
+}

mobileclip_s1_image.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b1cc781d6d0af08d95d338b083ae6fb97315cc5810037ceb34bc4b19ea41219
+size 277025

mobileclip_s1_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d817354a9b98b17f289d1f3e398c1f21d1f7e659ae04d18aa7f94e5a3283da2
+size 42921984

mobileclip_s1_image.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "0BB3E4FD-3A5B-4392-8864-DF82EE5A68F5": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "3B678D65-914C-4986-B205-8D14FBF5B8B2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "0BB3E4FD-3A5B-4392-8864-DF82EE5A68F5"
+}

mobileclip_s1_text.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8651b6d030bae419a9548b41c8fae11f96b59cfa21b6e532a4c4434522b4b80
+size 128127

mobileclip_s1_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d54c20d8c105221d16532ded16765532b015db8d730b00d605252bc03cdd8ff
+size 126866880

mobileclip_s1_text.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "A3B5166A-526B-43E6-8EF1-708E632A1BAF": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "B071EF55-9CA6-4A87-B563-305828969206": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "B071EF55-9CA6-4A87-B563-305828969206"
+}

mobileclip_s2_image.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aeb3359f6cde65e9f9248ec2a742e9939bd4bbf48c2f55fcd255b4504d96a1b
+size 299056

mobileclip_s2_image.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cbc7fb06b6072c1cae9c4496d67e0e6217adbf726dfeb82e44d4efe87c34c00
+size 71397632

mobileclip_s2_image.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "0FC07BEB-5990-4420-986E-A2C6804FBB52": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "58408CDA-22F8-492C-A7B9-E1172DBD1283": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "58408CDA-22F8-492C-A7B9-E1172DBD1283"
+}

mobileclip_s2_text.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8651b6d030bae419a9548b41c8fae11f96b59cfa21b6e532a4c4434522b4b80
+size 128127

mobileclip_s2_text.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e8d5454f104b6cbb58d98bf11e038ff1f1943599efea111260a832f094cd0ce
+size 126866880

mobileclip_s2_text.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "5DAD6BBB-8BD5-4AA7-BF03-1B66E983B9DE": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "6EA38C9B-AEDD-4AD0-B0FA-AC98337A4C29": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "6EA38C9B-AEDD-4AD0-B0FA-AC98337A4C29"
+}