diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..78a8296223b1b41ea85c7e9e5e1eedb716660728 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +DeepLab.[[:space:]]A[[:space:]]Deep[[:space:]]Dive[[:space:]]into[[:space:]]Advanced[[:space:]]Visual[[:space:]]Processing.pdf filter=lfs diff=lfs merge=lfs -text +DeepLab.[[:space:]]Semantic[[:space:]]Image[[:space:]]Segmentation[[:space:]]with[[:space:]]Deep[[:space:]]Convolutional[[:space:]]Nets,[[:space:]]Atrous[[:space:]]Convolution,[[:space:]]and[[:space:]]Fully[[:space:]]Connected[[:space:]]CRFs.pdf filter=lfs diff=lfs merge=lfs -text +models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +models/deeplab_v3/checkpoints/train/model.ckpt.meta filter=lfs diff=lfs merge=lfs -text diff --git a/DeepLab. A Deep Dive into Advanced Visual Processing.pdf b/DeepLab. A Deep Dive into Advanced Visual Processing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..69299bb81f31e2a9a3d4f4a2baaed80180a5bb0d --- /dev/null +++ b/DeepLab. A Deep Dive into Advanced Visual Processing.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94cca8e020fe74d1a046e2328469f2cb25a5ef36e2610337382cda664e4ad1c +size 293640 diff --git a/DeepLab. Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf b/DeepLab. Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4bd215982eff1462b63037067d64ebf9ff58000b --- /dev/null +++ b/DeepLab. Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80b304869e8dbd0ce9505b4f874ca4b18dca354967348cd104aff62a6dcd25d +size 6101934 diff --git a/code/Deeplab_Tensorflow.zip b/code/Deeplab_Tensorflow.zip new file mode 100644 index 0000000000000000000000000000000000000000..34184ebd17cbf8648686c68051c9efcd5f647fb2 --- /dev/null +++ b/code/Deeplab_Tensorflow.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3827ddebd58f923638182c0f4b82a9313a8820adeec030c1ae47428722e4888a +size 5200061 diff --git a/code/deeplab-pytorch.zip b/code/deeplab-pytorch.zip new file mode 100644 index 0000000000000000000000000000000000000000..de209b808d3dd2bd34bd88a8012e9ee7e41666e9 --- /dev/null +++ b/code/deeplab-pytorch.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed9aad9947755c49bb4d974a1ba91041f7e92b01167c7f29d0361fe8d3bbe70 +size 139271448 diff --git a/code/deeplab2.zip b/code/deeplab2.zip new file mode 100644 index 0000000000000000000000000000000000000000..1fafe21661d1c5fd7a4bec5f74eff3361c90a699 --- /dev/null +++ b/code/deeplab2.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32b82b601b3ce06961f6eca24647477cecc546689d5c9109c83e126db3f6571 +size 17433472 diff --git a/code/deeplab_v3.zip b/code/deeplab_v3.zip new file mode 100644 index 0000000000000000000000000000000000000000..b9f8bba2fb2fbb3cfefb517c25c61ef164ecef39 --- /dev/null +++ b/code/deeplab_v3.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b2385a33d66b29bb5169186d5a2d9419604a6abce588f9aa8c67be696facbf +size 987849 diff --git a/code/deeplabv3-plus-pytorch.zip b/code/deeplabv3-plus-pytorch.zip new file mode 100644 index 0000000000000000000000000000000000000000..a3251d1b5d35cdd0245e62050357b5391e1aed57 --- /dev/null +++ b/code/deeplabv3-plus-pytorch.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb84a431024cd16187b88d127f29dcb409911c602e7ea17ef4e273147a7167a8 +size 66513814 diff --git a/code/deeplabv3.zip b/code/deeplabv3.zip new file mode 100644 index 0000000000000000000000000000000000000000..ddab7ead8ef1e48f74fde630293fe72e15a3e54a --- /dev/null +++ b/code/deeplabv3.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a4c05b38b8284fd7a9dd4257c7b6c9d40e398024feab02192ed34472c79ee0 +size 541821384 diff --git a/code/deeplabv3plus-pytorch.zip b/code/deeplabv3plus-pytorch.zip new file mode 100644 index 0000000000000000000000000000000000000000..8511471dceb837b69c6dd2f9a6becf2e3651a3bd --- /dev/null +++ b/code/deeplabv3plus-pytorch.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8626774f15d83cf780f7dc78cbe38d0bbd0f21ed7943bd22fa57ebdcff7ce88e +size 437976 diff --git a/code/keras-deeplab-v3-plus.zip b/code/keras-deeplab-v3-plus.zip new file mode 100644 index 0000000000000000000000000000000000000000..56b8efcd6d8b6771c7b0ec714edee223521e2538 --- /dev/null +++ b/code/keras-deeplab-v3-plus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bf8d61aa1155aad56a23d5e4006bbeeb8e458d4fddc9409318ab0515d13b2a +size 8187786 diff --git a/code/pytorch-deeplab-xception.zip b/code/pytorch-deeplab-xception.zip new file mode 100644 index 0000000000000000000000000000000000000000..2a27ddcc8c0e42c2ce6a2b6e5937c9a2eab8f6dd --- /dev/null +++ b/code/pytorch-deeplab-xception.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ac9b5e36df312eccabce385efb0a1123271a5493d570c995fb0068c995445e +size 1569855 diff --git a/code/semantic-segmentation-codebase.zip b/code/semantic-segmentation-codebase.zip new file mode 100644 index 0000000000000000000000000000000000000000..9e64a41959f947ba047d2a1dafe59be715b91b90 --- /dev/null +++ b/code/semantic-segmentation-codebase.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2630452a10b7adeb3efc19788a017add2de3fe244cebf867e639e3d0e18c4b7 +size 180146 diff --git a/code/tensorflow-deeplab-resnet.zip b/code/tensorflow-deeplab-resnet.zip new file mode 100644 index 0000000000000000000000000000000000000000..f6471673ad1a90d08a9954076c8e0cdd86fa2bd7 --- /dev/null +++ b/code/tensorflow-deeplab-resnet.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6140b810eb4211204f30dae17019a296f25fa10cac852eae8baf7af27a2047b +size 2996246 diff --git a/models/deeplab_v3/.gitattributes b/models/deeplab_v3/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplab_v3/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplab_v3/checkpoints/train/checkpoint b/models/deeplab_v3/checkpoints/train/checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..febd7d546081c498d644978b31e8c836a8931736 --- /dev/null +++ b/models/deeplab_v3/checkpoints/train/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt" diff --git a/models/deeplab_v3/checkpoints/train/data.json b/models/deeplab_v3/checkpoints/train/data.json new file mode 100644 index 0000000000000000000000000000000000000000..2704eac40eef2822dcc2a00c3d0e9b8bcce79ae6 --- /dev/null +++ b/models/deeplab_v3/checkpoints/train/data.json @@ -0,0 +1,19 @@ +{ + "accumulated_validation_miou": 0, + "batch_norm_decay": 0.997, + "batch_norm_epsilon": 1e-05, + "batch_size": 16, + "current_best_val_loss": "0.294389428197", + "gpu_id": 1, + "l2_regularizer": 0.0001, + "multi_grid": [ + 1, + 2, + 4 + ], + "number_of_classes": 21, + "output_stride": 16, + "starting_learning_rate": 1e-05, + "resnet_model": "resnet_v2_50", + "crop_size": 512 +} diff --git a/models/deeplab_v3/checkpoints/train/events.out.tfevents.1516966190.DIGITS-1 b/models/deeplab_v3/checkpoints/train/events.out.tfevents.1516966190.DIGITS-1 new file mode 100644 index 0000000000000000000000000000000000000000..c4be7315a984b86b4dc55565cfec2378038bbaf2 --- /dev/null +++ b/models/deeplab_v3/checkpoints/train/events.out.tfevents.1516966190.DIGITS-1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3733c938c519b433537ce40c3be607b6a67978f855f6dceb5fcb684a6a7b55b0 +size 55620454 diff --git a/models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 b/models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..591ef5d625099836dfe13319a28ac3d5fe4cdd44 --- /dev/null +++ b/models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77391ecec1ab80a707e2a06da9742cff0f6e4c321f9f6965a49079f96f91e44 +size 469051740 diff --git a/models/deeplab_v3/checkpoints/train/model.ckpt.index b/models/deeplab_v3/checkpoints/train/model.ckpt.index new file mode 100644 index 0000000000000000000000000000000000000000..9944d83ec13aeabbf063d74ada618dbb091baacf Binary files /dev/null and b/models/deeplab_v3/checkpoints/train/model.ckpt.index differ diff --git a/models/deeplab_v3/checkpoints/train/model.ckpt.meta b/models/deeplab_v3/checkpoints/train/model.ckpt.meta new file mode 100644 index 0000000000000000000000000000000000000000..61be6cb73707dbbb0cf232e45da99f9362c1e9bb --- /dev/null +++ b/models/deeplab_v3/checkpoints/train/model.ckpt.meta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd13d4322781ff1034bc7fbf98ad80e52d68eec8e8d3707a252402b54760122 +size 5094997 diff --git a/models/deeplab_v3/source.txt b/models/deeplab_v3/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..ebf4e1c77c3da9048534b1f622d0ae55ca4e3932 --- /dev/null +++ b/models/deeplab_v3/source.txt @@ -0,0 +1,2 @@ +https://github.com/sthalles/deeplab_v3 +https://www.dropbox.com/scl/fo/bf33snucsueb0pu4bwu74/AMkUv-quobAUFB_nz6LSQTg \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-small (apple)/.gitattributes b/models/deeplabv3-mobilevit-small (apple)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..3c5ee6ad9bc473c5fc527530a1a92604c1c0d13e --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-small (apple)/LICENSE b/models/deeplabv3-mobilevit-small (apple)/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0dcf7d42d3bef6bed12e42d9246324e710cebb56 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/LICENSE @@ -0,0 +1,88 @@ +Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is +specifically developed and released by Apple Inc. ("Apple") for the sole purpose +of scientific research of artificial intelligence and machine-learning +technology. “Apple Machine Learning Research Model” means the model, including +but not limited to algorithms, formulas, trained model weights, parameters, +configurations, checkpoints, and any related materials (including +documentation). + +This Apple Machine Learning Research Model is provided to You by +Apple in consideration of your agreement to the following terms, and your use, +modification, creation of Model Derivatives, and or redistribution of the Apple +Machine Learning Research Model constitutes acceptance of this Agreement. If You +do not agree with these terms, please do not use, modify, create Model +Derivatives of, or distribute this Apple Machine Learning Research Model or +Model Derivatives. + +* License Scope: In consideration of your agreement to abide by the following + terms, and subject to these terms, Apple hereby grants you a personal, + non-exclusive, worldwide, non-transferable, royalty-free, revocable, and + limited license, to use, copy, modify, distribute, and create Model + Derivatives (defined below) of the Apple Machine Learning Research Model + exclusively for Research Purposes. You agree that any Model Derivatives You + may create or that may be created for You will be limited to Research Purposes + as well. “Research Purposes” means non-commercial scientific research and + academic development activities, such as experimentation, analysis, testing + conducted by You with the sole intent to advance scientific knowledge and + research. “Research Purposes” does not include any commercial exploitation, + product development or use in any commercial product or service. + +* Distribution of Apple Machine Learning Research Model and Model Derivatives: + If you choose to redistribute Apple Machine Learning Research Model or its + Model Derivatives, you must provide a copy of this Agreement to such third + party, and ensure that the following attribution notice be provided: “Apple + Machine Learning Research Model is licensed under the Apple Machine Learning + Research Model License Agreement.” Additionally, all Model Derivatives must + clearly be identified as such, including disclosure of modifications and + changes made to the Apple Machine Learning Research Model. The name, + trademarks, service marks or logos of Apple may not be used to endorse or + promote Model Derivatives or the relationship between You and Apple. “Model + Derivatives” means any models or any other artifacts created by modifications, + improvements, adaptations, alterations to the architecture, algorithm or + training processes of the Apple Machine Learning Research Model, or by any + retraining, fine-tuning of the Apple Machine Learning Research Model. + +* No Other License: Except as expressly stated in this notice, no other rights + or licenses, express or implied, are granted by Apple herein, including but + not limited to any patent, trademark, and similar intellectual property rights + worldwide that may be infringed by the Apple Machine Learning Research Model, + the Model Derivatives or by other works in which the Apple Machine Learning + Research Model may be incorporated. + +* Compliance with Laws: Your use of Apple Machine Learning Research Model must + be in compliance with all applicable laws and regulations. + +* Term and Termination: The term of this Agreement will begin upon your + acceptance of this Agreement or use of the Apple Machine Learning Research + Model and will continue until terminated in accordance with the following + terms. Apple may terminate this Agreement at any time if You are in breach of + any term or condition of this Agreement. Upon termination of this Agreement, + You must cease to use all Apple Machine Learning Research Models and Model + Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will + survive termination. + +* Disclaimer and Limitation of Liability: This Apple Machine Learning Research + Model and any outputs generated by the Apple Machine Learning Research Model + are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, + REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY + THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for + determining the appropriateness of using or redistributing the Apple Machine + Learning Research Model and any outputs of the Apple Machine Learning Research + Model and assume any risks associated with Your use of the Apple Machine + Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE + LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING + IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF + THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE + LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, + TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS + BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +* Governing Law: This Agreement will be governed by and construed under the laws + of the State of California without regard to its choice of law principles. The + Convention on Contracts for the International Sale of Goods shall not apply to + the Agreement except that the arbitration clause and any arbitration hereunder + shall be governed by the Federal Arbitration Act, Chapters 1 and 2.  + +Copyright (C) 2025 Apple Inc. All Rights Reserved. diff --git a/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel new file mode 100644 index 0000000000000000000000000000000000000000..062faa275be9c79bff2cc16fc643620416b81b76 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f34ba0fd085efa3e1ea9c2217bd201c5b28b4a458748553f7a4ccfed1274b56 +size 147826 diff --git a/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca177b53a4e9e17bb01f085ffc419be88789231d --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50a89dd6be1e3ba7e4df23be4f2d79a081d443c1e498536377d30b8e5fb3a29 +size 25418432 diff --git a/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..d821f74dcd64fa97b3ce5cbc83ce35ca5eabd2f2 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json @@ -0,0 +1,18 @@ +{ + "fileFormatVersion": "1.0.0", + "itemInfoEntries": { + "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF": { + "author": "com.apple.CoreML", + "description": "CoreML Model Specification", + "name": "model.mlmodel", + "path": "com.apple.CoreML/model.mlmodel" + }, + "FBABE180-594F-4894-9881-F3B3D807D27D": { + "author": "com.apple.CoreML", + "description": "CoreML Model Weights", + "name": "weights", + "path": "com.apple.CoreML/weights" + } + }, + "rootModelIdentifier": "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF" +} diff --git a/models/deeplabv3-mobilevit-small (apple)/README.md b/models/deeplabv3-mobilevit-small (apple)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..638c2ee8910354e63e9903abd064462ce2a7c58e --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/README.md @@ -0,0 +1,86 @@ +--- +license: apple-amlr +tags: +- vision +- image-segmentation +datasets: +- pascal-voc +widget: +- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-2.jpg + example_title: Cat +--- + +# MobileViT + DeepLabV3 (small-sized model) + +MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE). + +Disclaimer: The team releasing MobileViT did not write a model card for this model so this model card has been written by the Hugging Face team. + +## Model description + +MobileViT is a light-weight, low latency convolutional neural network that combines MobileNetV2-style layers with a new block that replaces local processing in convolutions with global processing using transformers. As with ViT (Vision Transformer), the image data is converted into flattened patches before it is processed by the transformer layers. Afterwards, the patches are "unflattened" back into feature maps. This allows the MobileViT-block to be placed anywhere inside a CNN. MobileViT does not require any positional embeddings. + +The model in this repo adds a [DeepLabV3](https://arxiv.org/abs/1706.05587) head to the MobileViT backbone for semantic segmentation. + +## Intended uses & limitations + +You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?search=mobilevit) to look for fine-tuned versions on a task that interests you. + +### How to use + +Here is how to use this model: + +```python +from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation +from PIL import Image +import requests + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url, stream=True).raw) + +feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small") +model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small") + +inputs = feature_extractor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits +predicted_mask = logits.argmax(1).squeeze(0) +``` + +Currently, both the feature extractor and model support PyTorch. + +## Training data + +The MobileViT + DeepLabV3 model was pretrained on [ImageNet-1k](https://huggingface.co/datasets/imagenet-1k), a dataset consisting of 1 million images and 1,000 classes, and then fine-tuned on the [PASCAL VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. + +## Training procedure + +### Preprocessing + +At inference time, images are center-cropped at 512x512. Pixels are normalized to the range [0, 1]. Images are expected to be in BGR pixel order, not RGB. + +### Pretraining + +The MobileViT networks are trained from scratch for 300 epochs on ImageNet-1k on 8 NVIDIA GPUs with an effective batch size of 1024 and learning rate warmup for 3k steps, followed by cosine annealing. Also used were label smoothing cross-entropy loss and L2 weight decay. Training resolution varies from 160x160 to 320x320, using multi-scale sampling. + +To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC dataset using 4 NVIDIA A100 GPUs. + +## Evaluation results + +| Model | PASCAL VOC mIOU | # params | URL | +|------------------|-----------------|-----------|-----------------------------------------------------------| +| MobileViT-XXS | 73.6 | 1.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small | +| MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small | +| **MobileViT-S** | **79.1** | **6.4 M** | https://huggingface.co/apple/deeplabv3-mobilevit-small | + +### BibTeX entry and citation info + +```bibtex +@inproceedings{vision-transformer, +title = {MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer}, +author = {Sachin Mehta and Mohammad Rastegari}, +year = {2022}, +URL = {https://arxiv.org/abs/2110.02178} +} +``` diff --git a/models/deeplabv3-mobilevit-small (apple)/config.json b/models/deeplabv3-mobilevit-small (apple)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9db703c8fed21b22a9d512ce38fe11697887f194 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/config.json @@ -0,0 +1,91 @@ +{ + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 4.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.1, + "hidden_sizes": [ + 144, + 192, + 240 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 32, + 64, + 96, + 128, + 160, + 640 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "torch_dtype": "float32", + "transformers_version": "4.20.0.dev0" +} diff --git a/models/deeplabv3-mobilevit-small (apple)/preprocessor_config.json b/models/deeplabv3-mobilevit-small (apple)/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41ceed3cd2aee6c59b57d9260111c09e2e8c6ef --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "crop_size": 512, + "do_center_crop": true, + "do_flip_channels": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "resample": 2, + "size": 544 +} diff --git a/models/deeplabv3-mobilevit-small (apple)/pytorch_model.bin b/models/deeplabv3-mobilevit-small (apple)/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0ba3b13198769c54133d3f5cd22790e92e21888 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e68a534df237d8b89aa9209c815976b4b34f49a4e8107f630fd799697e98291 +size 25615631 diff --git a/models/deeplabv3-mobilevit-small (apple)/source.txt b/models/deeplabv3-mobilevit-small (apple)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c5773a3564f105d5ea259b7c134d09eb989c9b4 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/apple/deeplabv3-mobilevit-small \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-small (apple)/tf_model.h5 b/models/deeplabv3-mobilevit-small (apple)/tf_model.h5 new file mode 100644 index 0000000000000000000000000000000000000000..3e70a3a61145e67fb760b9ec70907d72ae24cc69 --- /dev/null +++ b/models/deeplabv3-mobilevit-small (apple)/tf_model.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e14ab532bd4b573c60e4f4c6639de6176db4c35c803cc7c0ba05fdb16e5b3de +size 25943848 diff --git a/models/deeplabv3-mobilevit-small/.gitattributes b/models/deeplabv3-mobilevit-small/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3-mobilevit-small/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-small/README.md b/models/deeplabv3-mobilevit-small/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4e7421425b851248570df07e6df291b097428752 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/README.md @@ -0,0 +1,9 @@ +--- +base_model: apple/deeplabv3-mobilevit-small +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/apple/deeplabv3-mobilevit-small with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-small/config.json b/models/deeplabv3-mobilevit-small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6b25ecbbab1722c20984eef70907aff90ba37e40 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/config.json @@ -0,0 +1,91 @@ +{ + "_name_or_path": "apple/deeplabv3-mobilevit-small", + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 4.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.1, + "hidden_sizes": [ + 144, + 192, + 240 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 32, + 64, + 96, + 128, + 160, + 640 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "transformers_version": "4.30.2" +} diff --git a/models/deeplabv3-mobilevit-small/onnx/model.onnx b/models/deeplabv3-mobilevit-small/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0184d5821dc71ef4e8429a2074ba63f9b3c5d489 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ebc9436f8387fac8595e99566c3b4eae2802bd614924468d7a3d0948e4dbd7 +size 25725066 diff --git a/models/deeplabv3-mobilevit-small/onnx/model_fp16.onnx b/models/deeplabv3-mobilevit-small/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9448019c20464a1d7579a4dceae6cf7c35511369 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef568c975bfcaee201535f4b27855a370cd88891000f5d5573b686d69caa49b +size 13127014 diff --git a/models/deeplabv3-mobilevit-small/onnx/model_quantized.onnx b/models/deeplabv3-mobilevit-small/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f9f3f4663c07e88502a15de5e6825d4387ae882b --- /dev/null +++ b/models/deeplabv3-mobilevit-small/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb155ceb71ffad0b6785f8101cfa177e4dba18b906511044348c4dd094117598 +size 7095228 diff --git a/models/deeplabv3-mobilevit-small/preprocessor_config.json b/models/deeplabv3-mobilevit-small/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9681fc7bed5716a2079910c45cf1806115d0d0aa --- /dev/null +++ b/models/deeplabv3-mobilevit-small/preprocessor_config.json @@ -0,0 +1,18 @@ +{ + "crop_size": { + "height": 512, + "width": 512 + }, + "do_center_crop": true, + "do_flip_channel_order": true, + "do_flip_channels": true, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "image_processor_type": "MobileViTFeatureExtractor", + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "shortest_edge": 544 + } +} diff --git a/models/deeplabv3-mobilevit-small/quant_config.json b/models/deeplabv3-mobilevit-small/quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f84b57d626c9b7dcbc5998c076c6e2ea517128e9 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/quant_config.json @@ -0,0 +1,34 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Add", + "Gather", + "Softmax", + "GlobalAveragePool", + "Transpose", + "Relu", + "Concat", + "ReduceMean", + "Resize", + "Cast", + "Shape", + "Div", + "Constant", + "Slice", + "Pow", + "Sqrt", + "Reshape", + "Unsqueeze", + "MatMul", + "Conv", + "Mul", + "Sub", + "Sigmoid" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-small/source.txt b/models/deeplabv3-mobilevit-small/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6a8d38aec1653cbfadea5766ead4f3754983a24 --- /dev/null +++ b/models/deeplabv3-mobilevit-small/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/deeplabv3-mobilevit-small \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-x-small (apple)/.gitattributes b/models/deeplabv3-mobilevit-x-small (apple)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..3c5ee6ad9bc473c5fc527530a1a92604c1c0d13e --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-x-small (apple)/LICENSE b/models/deeplabv3-mobilevit-x-small (apple)/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0dcf7d42d3bef6bed12e42d9246324e710cebb56 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/LICENSE @@ -0,0 +1,88 @@ +Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is +specifically developed and released by Apple Inc. ("Apple") for the sole purpose +of scientific research of artificial intelligence and machine-learning +technology. “Apple Machine Learning Research Model” means the model, including +but not limited to algorithms, formulas, trained model weights, parameters, +configurations, checkpoints, and any related materials (including +documentation). + +This Apple Machine Learning Research Model is provided to You by +Apple in consideration of your agreement to the following terms, and your use, +modification, creation of Model Derivatives, and or redistribution of the Apple +Machine Learning Research Model constitutes acceptance of this Agreement. If You +do not agree with these terms, please do not use, modify, create Model +Derivatives of, or distribute this Apple Machine Learning Research Model or +Model Derivatives. + +* License Scope: In consideration of your agreement to abide by the following + terms, and subject to these terms, Apple hereby grants you a personal, + non-exclusive, worldwide, non-transferable, royalty-free, revocable, and + limited license, to use, copy, modify, distribute, and create Model + Derivatives (defined below) of the Apple Machine Learning Research Model + exclusively for Research Purposes. You agree that any Model Derivatives You + may create or that may be created for You will be limited to Research Purposes + as well. “Research Purposes” means non-commercial scientific research and + academic development activities, such as experimentation, analysis, testing + conducted by You with the sole intent to advance scientific knowledge and + research. “Research Purposes” does not include any commercial exploitation, + product development or use in any commercial product or service. + +* Distribution of Apple Machine Learning Research Model and Model Derivatives: + If you choose to redistribute Apple Machine Learning Research Model or its + Model Derivatives, you must provide a copy of this Agreement to such third + party, and ensure that the following attribution notice be provided: “Apple + Machine Learning Research Model is licensed under the Apple Machine Learning + Research Model License Agreement.” Additionally, all Model Derivatives must + clearly be identified as such, including disclosure of modifications and + changes made to the Apple Machine Learning Research Model. The name, + trademarks, service marks or logos of Apple may not be used to endorse or + promote Model Derivatives or the relationship between You and Apple. “Model + Derivatives” means any models or any other artifacts created by modifications, + improvements, adaptations, alterations to the architecture, algorithm or + training processes of the Apple Machine Learning Research Model, or by any + retraining, fine-tuning of the Apple Machine Learning Research Model. + +* No Other License: Except as expressly stated in this notice, no other rights + or licenses, express or implied, are granted by Apple herein, including but + not limited to any patent, trademark, and similar intellectual property rights + worldwide that may be infringed by the Apple Machine Learning Research Model, + the Model Derivatives or by other works in which the Apple Machine Learning + Research Model may be incorporated. + +* Compliance with Laws: Your use of Apple Machine Learning Research Model must + be in compliance with all applicable laws and regulations. + +* Term and Termination: The term of this Agreement will begin upon your + acceptance of this Agreement or use of the Apple Machine Learning Research + Model and will continue until terminated in accordance with the following + terms. Apple may terminate this Agreement at any time if You are in breach of + any term or condition of this Agreement. Upon termination of this Agreement, + You must cease to use all Apple Machine Learning Research Models and Model + Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will + survive termination. + +* Disclaimer and Limitation of Liability: This Apple Machine Learning Research + Model and any outputs generated by the Apple Machine Learning Research Model + are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, + REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY + THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for + determining the appropriateness of using or redistributing the Apple Machine + Learning Research Model and any outputs of the Apple Machine Learning Research + Model and assume any risks associated with Your use of the Apple Machine + Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE + LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING + IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF + THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE + LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, + TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS + BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +* Governing Law: This Agreement will be governed by and construed under the laws + of the State of California without regard to its choice of law principles. The + Convention on Contracts for the International Sale of Goods shall not apply to + the Agreement except that the arbitration clause and any arbitration hereunder + shall be governed by the Federal Arbitration Act, Chapters 1 and 2.  + +Copyright (C) 2025 Apple Inc. All Rights Reserved. diff --git a/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel new file mode 100644 index 0000000000000000000000000000000000000000..aa5284088d7819842db09495a0ee5a2a90a5cf1d --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956d0bcc1ee6a542a38da38b41c336c67133d1a0d042cc25e2d5c614b8204a2e +size 147391 diff --git a/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..8921db2913b7ab6679a892661148cf2cad441b5b --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a953a528a0d96f99cb90985edb269343c3388448f085cbdb674b73a8e801bf5 +size 11770752 diff --git a/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..0160a5cd6ea635fbdd4f4f77403d7e6f53ad40d6 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json @@ -0,0 +1,18 @@ +{ + "fileFormatVersion": "1.0.0", + "itemInfoEntries": { + "68772B66-C952-4603-A56D-D7A693B54D42": { + "author": "com.apple.CoreML", + "description": "CoreML Model Weights", + "name": "weights", + "path": "com.apple.CoreML/weights" + }, + "A74A3117-90A0-44A8-A884-981A9F31DC56": { + "author": "com.apple.CoreML", + "description": "CoreML Model Specification", + "name": "model.mlmodel", + "path": "com.apple.CoreML/model.mlmodel" + } + }, + "rootModelIdentifier": "A74A3117-90A0-44A8-A884-981A9F31DC56" +} diff --git a/models/deeplabv3-mobilevit-x-small (apple)/README.md b/models/deeplabv3-mobilevit-x-small (apple)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c000e0b6ef65d496294616c2abef733c9eb9fb2 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/README.md @@ -0,0 +1,86 @@ +--- +license: other +tags: +- vision +- image-segmentation +datasets: +- pascal-voc +widget: +- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-2.jpg + example_title: Cat +--- + +# MobileViT + DeepLabV3 (extra small-sized model) + +MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE). + +Disclaimer: The team releasing MobileViT did not write a model card for this model so this model card has been written by the Hugging Face team. + +## Model description + +MobileViT is a light-weight, low latency convolutional neural network that combines MobileNetV2-style layers with a new block that replaces local processing in convolutions with global processing using transformers. As with ViT (Vision Transformer), the image data is converted into flattened patches before it is processed by the transformer layers. Afterwards, the patches are "unflattened" back into feature maps. This allows the MobileViT-block to be placed anywhere inside a CNN. MobileViT does not require any positional embeddings. + +The model in this repo adds a [DeepLabV3](https://arxiv.org/abs/1706.05587) head to the MobileViT backbone for semantic segmentation. + +## Intended uses & limitations + +You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?search=mobilevit) to look for fine-tuned versions on a task that interests you. + +### How to use + +Here is how to use this model: + +```python +from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation +from PIL import Image +import requests + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url, stream=True).raw) + +feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-x-small") +model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-x-small") + +inputs = feature_extractor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits +predicted_mask = logits.argmax(1).squeeze(0) +``` + +Currently, both the feature extractor and model support PyTorch. + +## Training data + +The MobileViT + DeepLabV3 model was pretrained on [ImageNet-1k](https://huggingface.co/datasets/imagenet-1k), a dataset consisting of 1 million images and 1,000 classes, and then fine-tuned on the [PASCAL VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. + +## Training procedure + +### Preprocessing + +At inference time, images are center-cropped at 512x512. Pixels are normalized to the range [0, 1]. Images are expected to be in BGR pixel order, not RGB. + +### Pretraining + +The MobileViT networks are trained from scratch for 300 epochs on ImageNet-1k on 8 NVIDIA GPUs with an effective batch size of 1024 and learning rate warmup for 3k steps, followed by cosine annealing. Also used were label smoothing cross-entropy loss and L2 weight decay. Training resolution varies from 160x160 to 320x320, using multi-scale sampling. + +To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC dataset using 4 NVIDIA A100 GPUs. + +## Evaluation results + +| Model | PASCAL VOC mIOU | # params | URL | +|------------------|-----------------|-----------|-----------------------------------------------------------| +| MobileViT-XXS | 73.6 | 1.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small | +| **MobileViT-XS** | **77.1** | **2.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-x-small | +| MobileViT-S | 79.1 | 6.4 M | https://huggingface.co/apple/deeplabv3-mobilevit-small | + +### BibTeX entry and citation info + +```bibtex +@inproceedings{vision-transformer, +title = {MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer}, +author = {Sachin Mehta and Mohammad Rastegari}, +year = {2022}, +URL = {https://arxiv.org/abs/2110.02178} +} +``` diff --git a/models/deeplabv3-mobilevit-x-small (apple)/config.json b/models/deeplabv3-mobilevit-x-small (apple)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..216b557f79f3cedfc83188bc02b9b96761614d52 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/config.json @@ -0,0 +1,91 @@ +{ + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 4.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.1, + "hidden_sizes": [ + 96, + 120, + 144 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 32, + 48, + 64, + 80, + 96, + 384 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "torch_dtype": "float32", + "transformers_version": "4.20.0.dev0" +} diff --git a/models/deeplabv3-mobilevit-x-small (apple)/preprocessor_config.json b/models/deeplabv3-mobilevit-x-small (apple)/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41ceed3cd2aee6c59b57d9260111c09e2e8c6ef --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "crop_size": 512, + "do_center_crop": true, + "do_flip_channels": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "resample": 2, + "size": 544 +} diff --git a/models/deeplabv3-mobilevit-x-small (apple)/pytorch_model.bin b/models/deeplabv3-mobilevit-x-small (apple)/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d35c0f828adbef0b72b22cd78c8fde91708199ed --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9450e85de944b10ba3461be3469c4b24bc9140edcc024075af6ab28b46471985 +size 11945615 diff --git a/models/deeplabv3-mobilevit-x-small (apple)/source.txt b/models/deeplabv3-mobilevit-x-small (apple)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a7ae61c423908cb70807b4f041e57369fe34a65 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/apple/deeplabv3-mobilevit-x-small \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-x-small (apple)/tf_model.h5 b/models/deeplabv3-mobilevit-x-small (apple)/tf_model.h5 new file mode 100644 index 0000000000000000000000000000000000000000..43e799949745597fa6954a1b01914c97fb8d1f18 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small (apple)/tf_model.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff25647596314b8b537a46374c1b90ee7b89611e2822a90aa1b3df0d3071dbf +size 12276104 diff --git a/models/deeplabv3-mobilevit-x-small/.gitattributes b/models/deeplabv3-mobilevit-x-small/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-x-small/README.md b/models/deeplabv3-mobilevit-x-small/README.md new file mode 100644 index 0000000000000000000000000000000000000000..865acf0187841d5fab227e959499bd348c95ff04 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/README.md @@ -0,0 +1,9 @@ +--- +base_model: apple/deeplabv3-mobilevit-x-small +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/apple/deeplabv3-mobilevit-x-small with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-x-small/config.json b/models/deeplabv3-mobilevit-x-small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..519c7de5facc41be586cc1c215a6211165ad19b0 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/config.json @@ -0,0 +1,91 @@ +{ + "_name_or_path": "apple/deeplabv3-mobilevit-x-small", + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 4.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.1, + "hidden_sizes": [ + 96, + 120, + 144 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 32, + 48, + 64, + 80, + 96, + 384 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "transformers_version": "4.30.2" +} diff --git a/models/deeplabv3-mobilevit-x-small/onnx/model.onnx b/models/deeplabv3-mobilevit-x-small/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b91afa91d8532f39a875d2dd507715bb77ec42e0 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342d7d449ee97ae7d73d5608297ae51641459a9fbac08daae3647ebb3debdb1e +size 12076020 diff --git a/models/deeplabv3-mobilevit-x-small/onnx/model_fp16.onnx b/models/deeplabv3-mobilevit-x-small/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..27a0d8a61c587043536001fbdc42efee744ce64e --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c30bcd9c4e4c8d4e4a099421a4497a0fea97dad2509cf7c1c36a9440ac25c2 +size 6302405 diff --git a/models/deeplabv3-mobilevit-x-small/onnx/model_quantized.onnx b/models/deeplabv3-mobilevit-x-small/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ff2b677cbbc1db83abfe36341278c15250e8292d --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02878228b780deef28640a3cbebd8f5e883be7434ac80649e888d07d73ced88e +size 3630761 diff --git a/models/deeplabv3-mobilevit-x-small/preprocessor_config.json b/models/deeplabv3-mobilevit-x-small/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9681fc7bed5716a2079910c45cf1806115d0d0aa --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/preprocessor_config.json @@ -0,0 +1,18 @@ +{ + "crop_size": { + "height": 512, + "width": 512 + }, + "do_center_crop": true, + "do_flip_channel_order": true, + "do_flip_channels": true, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "image_processor_type": "MobileViTFeatureExtractor", + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "shortest_edge": 544 + } +} diff --git a/models/deeplabv3-mobilevit-x-small/quant_config.json b/models/deeplabv3-mobilevit-x-small/quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aac0ac1a0aa135bf879853033765484c83950042 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/quant_config.json @@ -0,0 +1,34 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "GlobalAveragePool", + "Gather", + "Transpose", + "Relu", + "Sub", + "Unsqueeze", + "Concat", + "MatMul", + "Shape", + "Reshape", + "Add", + "ReduceMean", + "Resize", + "Pow", + "Div", + "Constant", + "Slice", + "Softmax", + "Conv", + "Cast", + "Mul", + "Sqrt", + "Sigmoid" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-x-small/source.txt b/models/deeplabv3-mobilevit-x-small/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..4681163ebef16527c947766310a68ef9559b2f07 --- /dev/null +++ b/models/deeplabv3-mobilevit-x-small/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/deeplabv3-mobilevit-x-small \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/.gitattributes b/models/deeplabv3-mobilevit-xx-small (apple)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..3c5ee6ad9bc473c5fc527530a1a92604c1c0d13e --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/LICENSE b/models/deeplabv3-mobilevit-xx-small (apple)/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0dcf7d42d3bef6bed12e42d9246324e710cebb56 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/LICENSE @@ -0,0 +1,88 @@ +Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is +specifically developed and released by Apple Inc. ("Apple") for the sole purpose +of scientific research of artificial intelligence and machine-learning +technology. “Apple Machine Learning Research Model” means the model, including +but not limited to algorithms, formulas, trained model weights, parameters, +configurations, checkpoints, and any related materials (including +documentation). + +This Apple Machine Learning Research Model is provided to You by +Apple in consideration of your agreement to the following terms, and your use, +modification, creation of Model Derivatives, and or redistribution of the Apple +Machine Learning Research Model constitutes acceptance of this Agreement. If You +do not agree with these terms, please do not use, modify, create Model +Derivatives of, or distribute this Apple Machine Learning Research Model or +Model Derivatives. + +* License Scope: In consideration of your agreement to abide by the following + terms, and subject to these terms, Apple hereby grants you a personal, + non-exclusive, worldwide, non-transferable, royalty-free, revocable, and + limited license, to use, copy, modify, distribute, and create Model + Derivatives (defined below) of the Apple Machine Learning Research Model + exclusively for Research Purposes. You agree that any Model Derivatives You + may create or that may be created for You will be limited to Research Purposes + as well. “Research Purposes” means non-commercial scientific research and + academic development activities, such as experimentation, analysis, testing + conducted by You with the sole intent to advance scientific knowledge and + research. “Research Purposes” does not include any commercial exploitation, + product development or use in any commercial product or service. + +* Distribution of Apple Machine Learning Research Model and Model Derivatives: + If you choose to redistribute Apple Machine Learning Research Model or its + Model Derivatives, you must provide a copy of this Agreement to such third + party, and ensure that the following attribution notice be provided: “Apple + Machine Learning Research Model is licensed under the Apple Machine Learning + Research Model License Agreement.” Additionally, all Model Derivatives must + clearly be identified as such, including disclosure of modifications and + changes made to the Apple Machine Learning Research Model. The name, + trademarks, service marks or logos of Apple may not be used to endorse or + promote Model Derivatives or the relationship between You and Apple. “Model + Derivatives” means any models or any other artifacts created by modifications, + improvements, adaptations, alterations to the architecture, algorithm or + training processes of the Apple Machine Learning Research Model, or by any + retraining, fine-tuning of the Apple Machine Learning Research Model. + +* No Other License: Except as expressly stated in this notice, no other rights + or licenses, express or implied, are granted by Apple herein, including but + not limited to any patent, trademark, and similar intellectual property rights + worldwide that may be infringed by the Apple Machine Learning Research Model, + the Model Derivatives or by other works in which the Apple Machine Learning + Research Model may be incorporated. + +* Compliance with Laws: Your use of Apple Machine Learning Research Model must + be in compliance with all applicable laws and regulations. + +* Term and Termination: The term of this Agreement will begin upon your + acceptance of this Agreement or use of the Apple Machine Learning Research + Model and will continue until terminated in accordance with the following + terms. Apple may terminate this Agreement at any time if You are in breach of + any term or condition of this Agreement. Upon termination of this Agreement, + You must cease to use all Apple Machine Learning Research Models and Model + Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will + survive termination. + +* Disclaimer and Limitation of Liability: This Apple Machine Learning Research + Model and any outputs generated by the Apple Machine Learning Research Model + are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, + REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY + THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for + determining the appropriateness of using or redistributing the Apple Machine + Learning Research Model and any outputs of the Apple Machine Learning Research + Model and assume any risks associated with Your use of the Apple Machine + Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE + LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING + IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF + THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE + LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, + TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS + BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +* Governing Law: This Agreement will be governed by and construed under the laws + of the State of California without regard to its choice of law principles. The + Convention on Contracts for the International Sale of Goods shall not apply to + the Agreement except that the arbitration clause and any arbitration hereunder + shall be governed by the Federal Arbitration Act, Chapters 1 and 2.  + +Copyright (C) 2025 Apple Inc. All Rights Reserved. diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel new file mode 100644 index 0000000000000000000000000000000000000000..33a4678c150913076ce1bf576eba2a4ebe6360d0 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1d38ee1d54ddd3e021c92e93b489409b537913d67a26ed3178532492368420 +size 147108 diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..655f8327961b15319a16fb14b2d8b60bb4d416a5 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836d1d6b55ee4dabf7cc8fde30c61f3a2e2a4fe770bab04dd4ac4d738dd74d48 +size 7421184 diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..280801f1327d5a91f3b1d45ffeb79f30854870e7 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json @@ -0,0 +1,18 @@ +{ + "fileFormatVersion": "1.0.0", + "itemInfoEntries": { + "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE": { + "author": "com.apple.CoreML", + "description": "CoreML Model Specification", + "name": "model.mlmodel", + "path": "com.apple.CoreML/model.mlmodel" + }, + "871A3834-A719-4108-9576-7E776094437D": { + "author": "com.apple.CoreML", + "description": "CoreML Model Weights", + "name": "weights", + "path": "com.apple.CoreML/weights" + } + }, + "rootModelIdentifier": "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE" +} diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/README.md b/models/deeplabv3-mobilevit-xx-small (apple)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e3648cfd46e17b094f051cf6152ec44097a6bfa5 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/README.md @@ -0,0 +1,86 @@ +--- +license: other +tags: +- vision +- image-segmentation +datasets: +- pascal-voc +widget: +- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-2.jpg + example_title: Cat +--- + +# MobileViT + DeepLabV3 (extra extra small-sized model) + +MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE). + +Disclaimer: The team releasing MobileViT did not write a model card for this model so this model card has been written by the Hugging Face team. + +## Model description + +MobileViT is a light-weight, low latency convolutional neural network that combines MobileNetV2-style layers with a new block that replaces local processing in convolutions with global processing using transformers. As with ViT (Vision Transformer), the image data is converted into flattened patches before it is processed by the transformer layers. Afterwards, the patches are "unflattened" back into feature maps. This allows the MobileViT-block to be placed anywhere inside a CNN. MobileViT does not require any positional embeddings. + +The model in this repo adds a [DeepLabV3](https://arxiv.org/abs/1706.05587) head to the MobileViT backbone for semantic segmentation. + +## Intended uses & limitations + +You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?search=mobilevit) to look for fine-tuned versions on a task that interests you. + +### How to use + +Here is how to use this model: + +```python +from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation +from PIL import Image +import requests + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url, stream=True).raw) + +feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-xx-small") +model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-xx-small") + +inputs = feature_extractor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits +predicted_mask = logits.argmax(1).squeeze(0) +``` + +Currently, both the feature extractor and model support PyTorch. + +## Training data + +The MobileViT + DeepLabV3 model was pretrained on [ImageNet-1k](https://huggingface.co/datasets/imagenet-1k), a dataset consisting of 1 million images and 1,000 classes, and then fine-tuned on the [PASCAL VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. + +## Training procedure + +### Preprocessing + +At inference time, images are center-cropped at 512x512. Pixels are normalized to the range [0, 1]. Images are expected to be in BGR pixel order, not RGB. + +### Pretraining + +The MobileViT networks are trained from scratch for 300 epochs on ImageNet-1k on 8 NVIDIA GPUs with an effective batch size of 1024 and learning rate warmup for 3k steps, followed by cosine annealing. Also used were label smoothing cross-entropy loss and L2 weight decay. Training resolution varies from 160x160 to 320x320, using multi-scale sampling. + +To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC dataset using 4 NVIDIA A100 GPUs. + +## Evaluation results + +| Model | PASCAL VOC mIOU | # params | URL | +|-------------------|-----------------|-----------|-----------------------------------------------------------| +| **MobileViT-XXS** | **73.6** | **1.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small | +| MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small | +| MobileViT-S | 79.1 | 6.4 M | https://huggingface.co/apple/deeplabv3-mobilevit-small | + +### BibTeX entry and citation info + +```bibtex +@inproceedings{vision-transformer, +title = {MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer}, +author = {Sachin Mehta and Mohammad Rastegari}, +year = {2022}, +URL = {https://arxiv.org/abs/2110.02178} +} +``` diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/config.json b/models/deeplabv3-mobilevit-xx-small (apple)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..38bc48716b0d9bbd53e0de2d0c8c57de962eaad7 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/config.json @@ -0,0 +1,91 @@ +{ + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 2.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.05, + "hidden_sizes": [ + 64, + 80, + 96 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 16, + 24, + 48, + 64, + 80, + 320 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "torch_dtype": "float32", + "transformers_version": "4.20.0.dev0" +} diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/preprocessor_config.json b/models/deeplabv3-mobilevit-xx-small (apple)/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41ceed3cd2aee6c59b57d9260111c09e2e8c6ef --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "crop_size": 512, + "do_center_crop": true, + "do_flip_channels": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "resample": 2, + "size": 544 +} diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/pytorch_model.bin b/models/deeplabv3-mobilevit-xx-small (apple)/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac79d813f6f6ffac7a2c072d69739373a503d481 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f91dba8e66cf725cc0cd987b9bf47b0e95788bf4050032d55de23217d5ffa60 +size 7572751 diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/source.txt b/models/deeplabv3-mobilevit-xx-small (apple)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..96a5adaf704787478078b9656e9cccb91715fdb5 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/apple/deeplabv3-mobilevit-xx-small \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-xx-small (apple)/tf_model.h5 b/models/deeplabv3-mobilevit-xx-small (apple)/tf_model.h5 new file mode 100644 index 0000000000000000000000000000000000000000..a3d68f1befaf867e7703a0a4c3ed631ce72317b1 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small (apple)/tf_model.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feec578f1413c5f2a9c759d488bc95548b9741a370ed408b2b0f8f5a921394ee +size 7898720 diff --git a/models/deeplabv3-mobilevit-xx-small/.gitattributes b/models/deeplabv3-mobilevit-xx-small/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3-mobilevit-xx-small/README.md b/models/deeplabv3-mobilevit-xx-small/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f85ee00fd4502716d6c236499fa62e654d25b666 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/README.md @@ -0,0 +1,9 @@ +--- +base_model: apple/deeplabv3-mobilevit-xx-small +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/apple/deeplabv3-mobilevit-xx-small with ONNX weights to be compatible with Transformers.js. + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-xx-small/config.json b/models/deeplabv3-mobilevit-xx-small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7ce2691b6065fd0ccdd2ca75bcef71e45fc862f --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/config.json @@ -0,0 +1,91 @@ +{ + "_name_or_path": "apple/deeplabv3-mobilevit-xx-small", + "architectures": [ + "MobileViTForSemanticSegmentation" + ], + "aspp_dropout_prob": 0.1, + "aspp_out_channels": 256, + "atrous_rates": [ + 6, + 12, + 18 + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "conv_kernel_size": 3, + "expand_ratio": 2.0, + "hidden_act": "silu", + "hidden_dropout_prob": 0.05, + "hidden_sizes": [ + 64, + 80, + 96 + ], + "id2label": { + "0": "background", + "1": "aeroplane", + "2": "bicycle", + "3": "bird", + "4": "boat", + "5": "bottle", + "6": "bus", + "7": "car", + "8": "cat", + "9": "chair", + "10": "cow", + "11": "diningtable", + "12": "dog", + "13": "horse", + "14": "motorbike", + "15": "person", + "16": "pottedplant", + "17": "sheep", + "18": "sofa", + "19": "train", + "20": "tvmonitor" + }, + "image_size": 512, + "initializer_range": 0.02, + "label2id": { + "aeroplane": 1, + "background": 0, + "bicycle": 2, + "bird": 3, + "boat": 4, + "bottle": 5, + "bus": 6, + "car": 7, + "cat": 8, + "chair": 9, + "cow": 10, + "diningtable": 11, + "dog": 12, + "horse": 13, + "motorbike": 14, + "person": 15, + "pottedplant": 16, + "sheep": 17, + "sofa": 18, + "train": 19, + "tvmonitor": 20 + }, + "layer_norm_eps": 1e-05, + "mlp_ratio": 2.0, + "model_type": "mobilevit", + "neck_hidden_sizes": [ + 16, + 16, + 24, + 48, + 64, + 80, + 320 + ], + "num_attention_heads": 4, + "num_channels": 3, + "output_stride": 16, + "patch_size": 2, + "qkv_bias": true, + "semantic_loss_ignore_index": 255, + "transformers_version": "4.30.2" +} diff --git a/models/deeplabv3-mobilevit-xx-small/onnx/model.onnx b/models/deeplabv3-mobilevit-xx-small/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4e3fe2ccd5d62b6938845f0a028bec296b2fc44d --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526bc2e2ba04db5e638ee3eaabf96959a8955296a02f690b1e584a58db958caa +size 7727644 diff --git a/models/deeplabv3-mobilevit-xx-small/onnx/model_fp16.onnx b/models/deeplabv3-mobilevit-xx-small/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2b66e47b58fa4fa864e9b384174b38d0dd5b6e19 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbccc1c508fc574d2e1abbffceaa7865178c2d027434b876e50ce018b549954e +size 4128262 diff --git a/models/deeplabv3-mobilevit-xx-small/onnx/model_quantized.onnx b/models/deeplabv3-mobilevit-xx-small/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ee4f52c2091856422975b180897ad34804545b00 --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc714a550dce9a934c81067f61decfcef39a4df526658cd988d6f2eb3b12ea3 +size 2511467 diff --git a/models/deeplabv3-mobilevit-xx-small/preprocessor_config.json b/models/deeplabv3-mobilevit-xx-small/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9681fc7bed5716a2079910c45cf1806115d0d0aa --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/preprocessor_config.json @@ -0,0 +1,18 @@ +{ + "crop_size": { + "height": 512, + "width": 512 + }, + "do_center_crop": true, + "do_flip_channel_order": true, + "do_flip_channels": true, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "MobileViTFeatureExtractor", + "image_processor_type": "MobileViTFeatureExtractor", + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "shortest_edge": 544 + } +} diff --git a/models/deeplabv3-mobilevit-xx-small/quant_config.json b/models/deeplabv3-mobilevit-xx-small/quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75aee7f5d5e4b68a62457e007a38a270b51a81ca --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/quant_config.json @@ -0,0 +1,34 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "MatMul", + "Unsqueeze", + "Cast", + "Relu", + "Resize", + "Add", + "Slice", + "Constant", + "Softmax", + "Mul", + "Shape", + "Conv", + "Reshape", + "Sub", + "Transpose", + "Div", + "Gather", + "ReduceMean", + "Sqrt", + "GlobalAveragePool", + "Concat", + "Pow", + "Sigmoid" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/deeplabv3-mobilevit-xx-small/source.txt b/models/deeplabv3-mobilevit-xx-small/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..28eb093be74d1b8e07399783ea7fc9aab404041e --- /dev/null +++ b/models/deeplabv3-mobilevit-xx-small/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/deeplabv3-mobilevit-xx-small \ No newline at end of file diff --git a/models/deeplabv3p-resnet50-human/.gitattributes b/models/deeplabv3p-resnet50-human/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3p-resnet50-human/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3p-resnet50-human/README.md b/models/deeplabv3p-resnet50-human/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aade4df2bed62cff415a230a0fdbbf32e7d06c03 --- /dev/null +++ b/models/deeplabv3p-resnet50-human/README.md @@ -0,0 +1,101 @@ +--- +license: cc0-1.0 +tags: +- art +- computer vision +- Image segmentation +--- + +# DeepLabV3+ ResNet50 for human body parts segmentation + +This is a very simple ONNX model that can segment human body parts. + +## Why this model + +This model is a ONNX transposition of [keras-io/deeplabv3p-resnet50](https://huggingface.co/keras-io/deeplabv3p-resnet50) +where the provided model can segment human body parts. All the others models that I found was trained on +city segmentation. + +The original model is built for old version of Keras and cannot be used with recent version of TensorFlow. +I translated the model to ONNX format. + +## Usage + +Get the `deeplabv3p-resnet50-human.onnx` file and use it with ONNXRuntime package. + +The result of `model.run` is a `(1, 1, 512, 512, 20)` tensor: + +- 1: number of output (you can squeeze it) +- 1: batch size (you can squeeze it) +- 512, 512: the size of the image (fixed) +- 20: number of classes, so you can take the `argmax`` of the tensor to get the class of each pixel + +```python +import onnxruntime +import numpy as np +from PIL import Image + +model = onnxruntime.InferenceSession("deeplabv3p-resnet50-human.onnx") + +img = Image.open(sys.argv[1] if len(sys.argv) > 1 else "image.jpg") +img = img.resize((512, 512)) +img = np.array(img).astype(np.float32) / 127.5 - 1 + +# infer +input_name = model.get_inputs()[0].name +output_name = model.get_outputs()[0].name +result = model.run([output_name], {input_name: img}) + +# squeeze, argmax... +result = np.array(result[0]) +# argmax the classes, remove the batch size +result = result.argmax(axis=3).squeeze(0) + +# get the masks +for i in range(20): + detected = result == i # get the detected pixels for the class i + # detected is a 512, 512 boolean array + mask = np.zeros_like(img) + mask[detected] = 255 + Image.fromarray(mask).show() # or save, or return the mask... +``` + +## Classes index + +This is the list of classes that the model can detect (some classes are not specifically identified, see below): + +- 0: "background", +- 1: "unknown", +- 2: "hair", +- 3: "unknown", +- 4: "glasses", +- 5: "top-clothes", +- 6: "unknown", +- 7: "unknown", +- 8: "unknown", +- 9: "bottom-clothes", +- 10: "torso-skin", +- 11: "unknown", +- 12: "unknown", +- 13: "face", +- 14: "left-arm", +- 15: "right-arm", +- 16: "left-leg", +- 17: "right-leg", +- 18: "left-foot", +- 19: "right-foot", + +## Known limitation + +- The model could fail on portrait images, because the model was trained on "full body" images. +- There are some classes that I don't know what they are. I can't find the list of classes (help !). +- The model is not perfect, and can fail on some images. I'm not the author of the model, so I can't fix it. + +## License + +The [original model card](https://huggingface.co/keras-io/deeplabv3p-resnet50/blob/main/README.md) proposes the "CC0-1.0" +license. I don't know if it's the right license for the model, but I keep it. + +> Anyway, thanks to the authors of the model for sharing it and to leave it open to use. + +This means that you may use the model, share, modify, and distribute it without any restriction. \ No newline at end of file diff --git a/models/deeplabv3p-resnet50-human/deeplabv3p-resnet50-human.onnx b/models/deeplabv3p-resnet50-human/deeplabv3p-resnet50-human.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8376a597a104a6e80efc061e53b61ecc68247b2b --- /dev/null +++ b/models/deeplabv3p-resnet50-human/deeplabv3p-resnet50-human.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e823a82da10ba24c29adfb544130684568c46bfac865e215bbace3b4035a71 +size 47210581 diff --git a/models/deeplabv3p-resnet50-human/source.txt b/models/deeplabv3p-resnet50-human/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..a50b921bfeb1a8750ea3cabefddcd940c03ce802 --- /dev/null +++ b/models/deeplabv3p-resnet50-human/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Metal3d/deeplabv3p-resnet50-human \ No newline at end of file diff --git a/models/deeplabv3plus-mobilenetv2/.gitattributes b/models/deeplabv3plus-mobilenetv2/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3plus-mobilenetv2/README.md b/models/deeplabv3plus-mobilenetv2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..46957602e9da426e951b7155912efc79cc282cbc --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/README.md @@ -0,0 +1,40 @@ +--- +license: apache-2.0 +pipeline_tag: image-segmentation +base_model: +- google/deeplabv3_mobilenet_v2_1.0_513 +--- + +# Introduction + +This repository stores the model for Deeplabv3plus-mobilenetv2, compatible with Kalray's neural network API.
+Please see www.github.com/kalray/kann-models-zoo for details and proper usage.
+ +# Contents + +- ONNX: deeplab-mb2_bilinear.onnx, deeplab-mb2_asym_floor.onnx (converted from tensorflow model) +- Tensorflow: deeplab_mobilenetv2_voc2012_513x513.pb + +# Reference links + +- github: https://github.com/tensorflow/models/blob/archive/research/deeplab/g3doc/model_zoo.md +- weights: http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz + +Lecture reference +```BibTex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} + +@inproceedings{mobilenetv22018, + title={MobileNetV2: Inverted Residuals and Linear Bottlenecks}, + author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen}, + booktitle={CVPR}, + year={2018} +} +``` + +Author: qmuller@kalrayinc.com diff --git a/models/deeplabv3plus-mobilenetv2/deeplab-mb2_asym_floor.onnx b/models/deeplabv3plus-mobilenetv2/deeplab-mb2_asym_floor.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6f87357308cd461e31e09226b818278a0274f8b6 --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/deeplab-mb2_asym_floor.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7680a4d4fbf9f32821e1a00a7e130f0eb6a45dd67e49387a9ef9b6de43772601 +size 8438083 diff --git a/models/deeplabv3plus-mobilenetv2/deeplab-mb2_bilinear.onnx b/models/deeplabv3plus-mobilenetv2/deeplab-mb2_bilinear.onnx new file mode 100644 index 0000000000000000000000000000000000000000..93f3425d7eb9f3ca193e6b9450dda983faaca651 --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/deeplab-mb2_bilinear.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e793c4e28c2f1768c08901b9169342317de382e7773cd9fd622aaf63b5fecb27 +size 8438125 diff --git a/models/deeplabv3plus-mobilenetv2/deeplab_mobilenetv2_voc2012_513x513.pb b/models/deeplabv3plus-mobilenetv2/deeplab_mobilenetv2_voc2012_513x513.pb new file mode 100644 index 0000000000000000000000000000000000000000..b5674d24810a45dfb0e4086a2f225a0d22842fb5 --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/deeplab_mobilenetv2_voc2012_513x513.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11d8f37a9993f67f54e85187a0acafe28f6408f8592e3a3caab245bb88c4fd1 +size 8778958 diff --git a/models/deeplabv3plus-mobilenetv2/source.txt b/models/deeplabv3plus-mobilenetv2/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..81b97e31262f09f808c1315750c99e95b814962a --- /dev/null +++ b/models/deeplabv3plus-mobilenetv2/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Kalray/deeplabv3plus-mobilenetv2 \ No newline at end of file diff --git a/models/deeplabv3plus-resnet50/.gitattributes b/models/deeplabv3plus-resnet50/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/deeplabv3plus-resnet50/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/deeplabv3plus-resnet50/README.md b/models/deeplabv3plus-resnet50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..138ea70007559c76df90ce97961c5004f02bdda3 --- /dev/null +++ b/models/deeplabv3plus-resnet50/README.md @@ -0,0 +1,37 @@ +--- +license: apache-2.0 +pipeline_tag: image-segmentation +--- + +# Introduction + +This repository stores the model for Deeplabv3plus-resnet50, compatible with Kalray's neural network API.
+Please see www.github.com/kalray/kann-models-zoo for details and proper usage.
+ +# Contents + +- ONNX: deeplabv3plus-resnet50-s.onnx +- Tensorflow: deeplabv3plus_resnet50.pb + +# Introduction + +This repository stores the model for Deeplabv3plus-mobilenetv2, compatible with Kalray's neural network API.
+Please see www.github.com/kalray/kann-models-zoo for details and proper usage.
+ +# Reference links + +- github: https://github.com/tensorflow/models/blob/archive/research/deeplab/g3doc/model_zoo.md +- weights: http://download.tensorflow.org/models/resnet_v1_50_2018_05_04.tar.gz + +Lecture reference +```BibTex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} +``` + +Author: qmuller@kalrayinc.com + diff --git a/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-optimized.onnx b/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-optimized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1e932d1dd55b9f5d4b930a74e4b4fb26a0247501 --- /dev/null +++ b/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-optimized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cefeca415a3434c62286513da47ad6ba0032c3f65f98d96ebbb4aaaf772c5a +size 48164329 diff --git a/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-s.onnx b/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-s.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a5419f03e4eb7dd1b568d2633c1d90dd9334c137 --- /dev/null +++ b/models/deeplabv3plus-resnet50/deeplabv3plus-resnet50-s.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9575ecbfda3a508303e7e506478b5cb9df3a368c2e906cded4448ed302759e2 +size 158485256 diff --git a/models/deeplabv3plus-resnet50/deeplabv3plus_resnet50.pb b/models/deeplabv3plus-resnet50/deeplabv3plus_resnet50.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2852d0ed47cbbedf8a24d859f88b2600c93d057 --- /dev/null +++ b/models/deeplabv3plus-resnet50/deeplabv3plus_resnet50.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58dbbf043575e54e7ce24d1617991f05d3de49b0321da8f92fbddb5b3590036 +size 48179178 diff --git a/models/deeplabv3plus-resnet50/source.txt b/models/deeplabv3plus-resnet50/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..90c7200c80d6e28f995bdaabe1804e790895b23d --- /dev/null +++ b/models/deeplabv3plus-resnet50/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Kalray/deeplabv3plus-resnet50 \ No newline at end of file