diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5d1dcee7d12b45d9279a3c3d0f74d9217ff096b1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +Panoptic[[:space:]]SegFormer.[[:space:]]Delving[[:space:]]Deeper[[:space:]]into[[:space:]]Panoptic[[:space:]]Segmentation[[:space:]]with[[:space:]]Transformers.pdf filter=lfs diff=lfs merge=lfs -text +SegFormer.[[:space:]]Simple[[:space:]]and[[:space:]]Efficient[[:space:]]Design[[:space:]]for[[:space:]]Semantic[[:space:]]Segmentation[[:space:]]with[[:space:]]Transformers.pdf filter=lfs diff=lfs merge=lfs -text +SegFormer3D.[[:space:]]An[[:space:]]Efficient[[:space:]]Transformer[[:space:]]for[[:space:]]3D[[:space:]]Medical[[:space:]]Image[[:space:]]Segmentation.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/Panoptic SegFormer. Delving Deeper into Panoptic Segmentation with Transformers.pdf b/Panoptic SegFormer. Delving Deeper into Panoptic Segmentation with Transformers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3dfec0c2e44e5271fabd5810ca16f094dd2b6ca2 --- /dev/null +++ b/Panoptic SegFormer. Delving Deeper into Panoptic Segmentation with Transformers.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b2d4d88adf766fa7d289bc2bfcbfa1f6eedbe3396ecc7e969e6273341ac7d7 +size 7090451 diff --git a/SegFormer. Simple and Efficient Design for Semantic Segmentation with Transformers.pdf b/SegFormer. Simple and Efficient Design for Semantic Segmentation with Transformers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fb40c702e3af134c12ffbfba5830a7fbe7930945 --- /dev/null +++ b/SegFormer. Simple and Efficient Design for Semantic Segmentation with Transformers.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8867c9600277fe39e046ad1ef9050f0c7087a24736284b85710265821c1027 +size 5780677 diff --git a/SegFormer3D. An Efficient Transformer for 3D Medical Image Segmentation.pdf b/SegFormer3D. An Efficient Transformer for 3D Medical Image Segmentation.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f6b276e4c527963dccb74c8c862bf4036eba2fbf --- /dev/null +++ b/SegFormer3D. An Efficient Transformer for 3D Medical Image Segmentation.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1503364e7a2291cfa8dd94a8d3a50149da476c39b20c95ea98a21f3f2bc53b48 +size 4107042 diff --git a/code/Comfyui_segformer_b2_clothes.zip b/code/Comfyui_segformer_b2_clothes.zip new file mode 100644 index 0000000000000000000000000000000000000000..ed21650610289470621392a3c4df0dcaab7dad00 --- /dev/null +++ b/code/Comfyui_segformer_b2_clothes.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb699654aa551351852b375be85838b962c1d78f7e5168316eaf5409773ecff8 +size 44829 diff --git a/code/Panoptic-SegFormer.zip b/code/Panoptic-SegFormer.zip new file mode 100644 index 0000000000000000000000000000000000000000..5d5893dc0559a3bea36f80a9180473a085a0ee31 --- /dev/null +++ b/code/Panoptic-SegFormer.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d226c424fd4f35072925b640a32924e278c79e88602eadb95fa8a95c8ab526 +size 998883 diff --git a/code/SegFormer (FrancescoSaverioZuppichini).zip b/code/SegFormer (FrancescoSaverioZuppichini).zip new file mode 100644 index 0000000000000000000000000000000000000000..4012bda86a9b39f3fe980ddd7c4ccf7755b2f4aa --- /dev/null +++ b/code/SegFormer (FrancescoSaverioZuppichini).zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93984eefd51b5d26e7eff815f69f0c3961c6a2a8fc795be33397015d211f36dc +size 1707368 diff --git a/code/SegFormer-tf.zip b/code/SegFormer-tf.zip new file mode 100644 index 0000000000000000000000000000000000000000..8a0c9662ab594f5528a5cc8cfdb1024eefa2580d --- /dev/null +++ b/code/SegFormer-tf.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703b70036fc543a7a193cf08b44860f7a0496ca6d199621b9cfc6fe0b6a2140a +size 13001087 diff --git a/code/SegFormer.zip b/code/SegFormer.zip new file mode 100644 index 0000000000000000000000000000000000000000..30754a28443d1601c1098e847b8a4f21c18fcab8 --- /dev/null +++ b/code/SegFormer.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38e8b39a58aa26875f18d637faf55295494a3c447091a22d5b578366e69f96e +size 6618182 diff --git a/code/SegFormer3D.zip b/code/SegFormer3D.zip new file mode 100644 index 0000000000000000000000000000000000000000..7fd785bfe5b37bd53fcb66af536234e18c54c77e --- /dev/null +++ b/code/SegFormer3D.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe267d47ddb21d169affb5bbb9da6df85fdfe5285e66dc86afeb54e196f4ad1 +size 14008086 diff --git a/code/SegFormer_Segmentation.zip b/code/SegFormer_Segmentation.zip new file mode 100644 index 0000000000000000000000000000000000000000..7b2e0a98553144a735a4dc3ffe40c6ae52bc22ea --- /dev/null +++ b/code/SegFormer_Segmentation.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0602d37d82fe2a2a18f22b071303bc4ff06f46c89e5cc84fd1800c69a7922eae +size 2741258 diff --git a/code/segformer (anibali).zip b/code/segformer (anibali).zip new file mode 100644 index 0000000000000000000000000000000000000000..01245bfeaa12d44bd82353246d73648f80506740 --- /dev/null +++ b/code/segformer (anibali).zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67c0625a1c53b7b828847a5d8ebed4efcce76379e0d890835bc1d4790ff1baa +size 702665 diff --git a/code/segformer-pytorch (bubbliiiing).zip b/code/segformer-pytorch (bubbliiiing).zip new file mode 100644 index 0000000000000000000000000000000000000000..6f734690cc0900edf9973843fc7f6e7788dd688a --- /dev/null +++ b/code/segformer-pytorch (bubbliiiing).zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0059a5359498608e26dd56baebaa1e4911c76c521284386235b09ec62719968d +size 1356854 diff --git a/code/segformer-pytorch (rulixiang).zip b/code/segformer-pytorch (rulixiang).zip new file mode 100644 index 0000000000000000000000000000000000000000..84d45964d990c1e410df80f970af46f50ac2307f --- /dev/null +++ b/code/segformer-pytorch (rulixiang).zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79de92f3c280ad9263c5b256e3bbb037d3bc0f3a30bebc35e228d60dd65b286 +size 232551 diff --git a/code/segformer-tf-transformers.zip b/code/segformer-tf-transformers.zip new file mode 100644 index 0000000000000000000000000000000000000000..670eeb185b5c5d22d46fe546b820a3c947080f0c --- /dev/null +++ b/code/segformer-tf-transformers.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32259b182b33a44275df012327b9fdfe1e9195b1f21b0f60c5ab40334f035112 +size 11700677 diff --git a/code/segformer_b2_clothes.zip b/code/segformer_b2_clothes.zip new file mode 100644 index 0000000000000000000000000000000000000000..44a0264df065721e48bd3c00c5fd912c9d5949cd --- /dev/null +++ b/code/segformer_b2_clothes.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ae1ef2303c6ae1a5c245dd5b7797db32c5d0591c97262d40480154ba919509 +size 36590 diff --git a/code/segmentation_pytorch.zip b/code/segmentation_pytorch.zip new file mode 100644 index 0000000000000000000000000000000000000000..0979b4dc512ea4588a0199bf338d88c9dd84265c --- /dev/null +++ b/code/segmentation_pytorch.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937e851c89eefe4b47a4fb122c3854510ac03aabea10ee765e7966ae2d3b6dba +size 281622224 diff --git a/models/Panoptic-SegFormer/panoptic_segformer_pvtv2b5_2x.pth b/models/Panoptic-SegFormer/panoptic_segformer_pvtv2b5_2x.pth new file mode 100644 index 0000000000000000000000000000000000000000..923be349365b346b4836dd3f37796d955c554751 --- /dev/null +++ b/models/Panoptic-SegFormer/panoptic_segformer_pvtv2b5_2x.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6dcdfda5f825711dc01c10b18ba56a74f06b578b7e5afd79b54ed70f8a8ddc7 +size 420553488 diff --git a/models/Panoptic-SegFormer/panoptic_segformer_r101_2x.pth b/models/Panoptic-SegFormer/panoptic_segformer_r101_2x.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbec32bcd30492f73f7b11011d9b03e7548776ae --- /dev/null +++ b/models/Panoptic-SegFormer/panoptic_segformer_r101_2x.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b25edf173bbf387a8672f7c685e1e6bd26d19228b1cce7edef0ff9f4093296 +size 281902532 diff --git a/models/Panoptic-SegFormer/panoptic_segformer_r50_1x.pth b/models/Panoptic-SegFormer/panoptic_segformer_r50_1x.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a9723d668beb4f8a1bd92b10afac6697f76880c --- /dev/null +++ b/models/Panoptic-SegFormer/panoptic_segformer_r50_1x.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6de668b1f921292cdd5241101f9cb980084e2321b34e0873d9f37bcd6d68ee +size 205624386 diff --git a/models/Panoptic-SegFormer/panoptic_segformer_r50_2x.pth b/models/Panoptic-SegFormer/panoptic_segformer_r50_2x.pth new file mode 100644 index 0000000000000000000000000000000000000000..840da44fa7e5b2cd569f00daac154bfe68072221 --- /dev/null +++ b/models/Panoptic-SegFormer/panoptic_segformer_r50_2x.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8cbf8210dd67aa955b219ef851deddf87fde7f1d23641e567fb7396bee8765 +size 205624386 diff --git a/models/Panoptic-SegFormer/panoptic_segformer_swinl_2x.pth b/models/Panoptic-SegFormer/panoptic_segformer_swinl_2x.pth new file mode 100644 index 0000000000000000000000000000000000000000..d74f9b7c48b020e44e21bf46288751b89df3c34d --- /dev/null +++ b/models/Panoptic-SegFormer/panoptic_segformer_swinl_2x.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb84715ae9d4d9057f8706f4deb852c56b693e85966ce973ca639a68c2bd011 +size 886211471 diff --git a/models/Panoptic-SegFormer/source.txt b/models/Panoptic-SegFormer/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..6cb23e410d22e27567cb077b67e2264b01d89812 --- /dev/null +++ b/models/Panoptic-SegFormer/source.txt @@ -0,0 +1 @@ +https://github.com/zhiqi-li/Panoptic-SegFormer \ No newline at end of file diff --git a/models/SegFormer3D/best_segformer3d_brats_performance.pth b/models/SegFormer3D/best_segformer3d_brats_performance.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c55058d01d26c8c51f323801324c9cf2ba1577f --- /dev/null +++ b/models/SegFormer3D/best_segformer3d_brats_performance.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:483662ce1f1dbbaf0b5451b716d4fd3d56ada215218c37790d576e2b3c10b063 +size 18220102 diff --git a/models/SegFormer3D/nnformer_visualization.zip b/models/SegFormer3D/nnformer_visualization.zip new file mode 100644 index 0000000000000000000000000000000000000000..a2411865fb5b004bac9d2fd6b26b82a80f9a3978 --- /dev/null +++ b/models/SegFormer3D/nnformer_visualization.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f8d0d238f6f2a06d65f3f16ab1d6943043bc213dc0c45c4c3fc73b93a41983 +size 6865889 diff --git a/models/SegFormer3D/segformer3d_visualization.zip b/models/SegFormer3D/segformer3d_visualization.zip new file mode 100644 index 0000000000000000000000000000000000000000..b2f073d4d541131aa6e4a7bb916cc37142d52025 --- /dev/null +++ b/models/SegFormer3D/segformer3d_visualization.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b361167934b6ab4ef9d1c23ec9a431ac53345e1c3e4e4f5010e8ba5205ab45db +size 462729539 diff --git a/models/SegFormer3D/source.txt b/models/SegFormer3D/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..c9c57d51975695de9ada65387c0a4475d353ab41 --- /dev/null +++ b/models/SegFormer3D/source.txt @@ -0,0 +1,4 @@ +https://github.com/OSUPCVLab/SegFormer3D +https://drive.google.com/file/d/1MfcyyS6yEEC2-wQ5SHgC3v9sUVo285-I/view +https://drive.google.com/file/d/1QXAcZbOAdMDOkQXAAXHGLl6Y52j8-Aok/view +https://drive.google.com/file/d/1Lb4rIkwIpuJS3tomBiKl7FBtNF2dv_6M/view \ No newline at end of file diff --git a/models/segformer (Kalray)/.gitattributes b/models/segformer (Kalray)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer (Kalray)/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer (Kalray)/README.md b/models/segformer (Kalray)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d3571cc81704ecf4af144edeacc82b9ad142d32c --- /dev/null +++ b/models/segformer (Kalray)/README.md @@ -0,0 +1,3 @@ +--- +license: apache-2.0 +--- \ No newline at end of file diff --git a/models/segformer (Kalray)/segformer-b0-ade-512-512.onnx b/models/segformer (Kalray)/segformer-b0-ade-512-512.onnx new file mode 100644 index 0000000000000000000000000000000000000000..462c9de84f1a936cead8261175d95713463c773a --- /dev/null +++ b/models/segformer (Kalray)/segformer-b0-ade-512-512.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6222d9a62653ddfc1fb47c3c695621bb87177aeaeff2f7c650f14bcb0bdd5a8c +size 14888038 diff --git a/models/segformer (Kalray)/segformer-b2-ade-512-512.onnx b/models/segformer (Kalray)/segformer-b2-ade-512-512.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fb64fb8d4fe03bd4a5ad0d7f97682b4935ba68b4 --- /dev/null +++ b/models/segformer (Kalray)/segformer-b2-ade-512-512.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0a1074cbbcf9d1533f8ebbe17ba310d0a3bb09dee09732569653893e5164aee +size 99951229 diff --git a/models/segformer (Kalray)/source.txt b/models/segformer (Kalray)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ff35b6f3c438b073edc53743cc37bbe1f6c8d88 --- /dev/null +++ b/models/segformer (Kalray)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Kalray/segformer \ No newline at end of file diff --git a/models/segformer (anibali)/pretrained/segformer-0.0.0.zip b/models/segformer (anibali)/pretrained/segformer-0.0.0.zip new file mode 100644 index 0000000000000000000000000000000000000000..11080659011277a94d059cb51ef8c27fc3e7ad0f --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer-0.0.0.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d98cddbd36dcb22948b1000b1b46b2412d572bbe6f164bc817c6440b3a2bf01 +size 487 diff --git a/models/segformer (anibali)/pretrained/segformer_b0_1024x1024_city_160k-3e581249.pth b/models/segformer (anibali)/pretrained/segformer_b0_1024x1024_city_160k-3e581249.pth new file mode 100644 index 0000000000000000000000000000000000000000..781cc85e4d2055405f8a86913c6cc091054680cb --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b0_1024x1024_city_160k-3e581249.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e58124941fbac217b9ebc692a1af6c25bf3169b1890029bb9bd4659e4e0a979 +size 14936193 diff --git a/models/segformer (anibali)/pretrained/segformer_b0_512x512_ade_160k-d0c08cfd.pth b/models/segformer (anibali)/pretrained/segformer_b0_512x512_ade_160k-d0c08cfd.pth new file mode 100644 index 0000000000000000000000000000000000000000..1aeda6888db4344e1488637931cd41f264da647c --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b0_512x512_ade_160k-d0c08cfd.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c08cfd289a0fda85f7247330149c1f8b74793b0cde798de9df05c6d0b4340e +size 15070849 diff --git a/models/segformer (anibali)/pretrained/segformer_b0_backbone_imagenet-eb42d485.pth b/models/segformer (anibali)/pretrained/segformer_b0_backbone_imagenet-eb42d485.pth new file mode 100644 index 0000000000000000000000000000000000000000..d00c5e4e4da54692144fe1cf051818aa767915c3 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b0_backbone_imagenet-eb42d485.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb42d4850606120baca988591e3370b5131eb1f117adae22c40abbd20e8d2c90 +size 13328993 diff --git a/models/segformer (anibali)/pretrained/segformer_b1_1024x1024_city_160k-e415b121.pth b/models/segformer (anibali)/pretrained/segformer_b1_1024x1024_city_160k-e415b121.pth new file mode 100644 index 0000000000000000000000000000000000000000..4704ae4eb5ae1d028b570bc73b6efc8f9d9b5367 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b1_1024x1024_city_160k-e415b121.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e415b121f9a97bfb3bb5c082bc2ca34cd41b65166d08cbcf89137afd801ea679 +size 54788737 diff --git a/models/segformer (anibali)/pretrained/segformer_b1_512x512_ade_160k-1cd52578.pth b/models/segformer (anibali)/pretrained/segformer_b1_512x512_ade_160k-1cd52578.pth new file mode 100644 index 0000000000000000000000000000000000000000..da79d7f0129a15d833c521af162ab798b4d8ebb1 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b1_512x512_ade_160k-1cd52578.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd525781069dd8b879081bf494e6a35f78f30fbe6cb054bac7c0fcce870dfdf +size 54923393 diff --git a/models/segformer (anibali)/pretrained/segformer_b1_backbone_imagenet-357971ac.pth b/models/segformer (anibali)/pretrained/segformer_b1_backbone_imagenet-357971ac.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a8d0859734f78a6b2af768c324dacb7559e6945 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b1_backbone_imagenet-357971ac.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357971ac36d9c1b39d2f052218fda762f2f20ebb6980fe55847efcb1deb413c9 +size 52657249 diff --git a/models/segformer (anibali)/pretrained/segformer_b2_1024x1024_city_160k-9793f658.pth b/models/segformer (anibali)/pretrained/segformer_b2_1024x1024_city_160k-9793f658.pth new file mode 100644 index 0000000000000000000000000000000000000000..906e114290b5c72aa21d8dbd981466c9ec9347e4 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b2_1024x1024_city_160k-9793f658.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9793f658a662feacdf786d990581f14c3181ae09d7495ca2df6d8b65d3ac7a8d +size 109557321 diff --git a/models/segformer (anibali)/pretrained/segformer_b2_512x512_ade_160k-fa162a4f.pth b/models/segformer (anibali)/pretrained/segformer_b2_512x512_ade_160k-fa162a4f.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddf67406d3d82851730dd4585906a2dbac37c2ea --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b2_512x512_ade_160k-fa162a4f.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa162a4f08ac66ec008c95c46e463823af4842577f278dd2fdcf94789510d949 +size 109960265 diff --git a/models/segformer (anibali)/pretrained/segformer_b2_backbone_imagenet-3c162bb8.pth b/models/segformer (anibali)/pretrained/segformer_b2_backbone_imagenet-3c162bb8.pth new file mode 100644 index 0000000000000000000000000000000000000000..7523c0284be68fbe80ab8b05e23fc6406da80f27 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b2_backbone_imagenet-3c162bb8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c162bb8559ae234260e31c15a59eca7525ac252592b6fcd179694197c34048f +size 96883433 diff --git a/models/segformer (anibali)/pretrained/segformer_b3_1024x1024_city_160k-732b9fde.pth b/models/segformer (anibali)/pretrained/segformer_b3_1024x1024_city_160k-732b9fde.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ce30495c228e74b655863a06a28072b53016178 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b3_1024x1024_city_160k-732b9fde.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732b9fde5e68a49c088377b4b0d24c52efa43282bba0183d1e14ca7a43d3057a +size 189135145 diff --git a/models/segformer (anibali)/pretrained/segformer_b3_512x512_ade_160k-5abb3eb3.pth b/models/segformer (anibali)/pretrained/segformer_b3_512x512_ade_160k-5abb3eb3.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d37b187353c3dad2d4b965842b7d69240ba98f5 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b3_512x512_ade_160k-5abb3eb3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abb3eb34bcca13b0f7ee57065c203e4c37b8bf64c57df6355f0754eb78a27c7 +size 189538089 diff --git a/models/segformer (anibali)/pretrained/segformer_b3_backbone_imagenet-0d113e32.pth b/models/segformer (anibali)/pretrained/segformer_b3_backbone_imagenet-0d113e32.pth new file mode 100644 index 0000000000000000000000000000000000000000..832923a7796283480804fcb0dc35eabdbf53b33a --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b3_backbone_imagenet-0d113e32.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d113e32a2012ecc4d6e46f8cb716a51a26ab1223ccf14d7949c79d72450c525 +size 176459081 diff --git a/models/segformer (anibali)/pretrained/segformer_b4_1024x1024_city_160k-1836d907.pth b/models/segformer (anibali)/pretrained/segformer_b4_1024x1024_city_160k-1836d907.pth new file mode 100644 index 0000000000000000000000000000000000000000..52f0f6683445c4a5ccf5e92c9297b4056a2f48bb --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b4_1024x1024_city_160k-1836d907.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1836d907b44c70d634662a6545cd745e8cff63d9d608fe053b04f9334f9188ea +size 256297889 diff --git a/models/segformer (anibali)/pretrained/segformer_b4_512x512_ade_160k-bb0fa50c.pth b/models/segformer (anibali)/pretrained/segformer_b4_512x512_ade_160k-bb0fa50c.pth new file mode 100644 index 0000000000000000000000000000000000000000..2908788950e048dbd6d070da621f7097678ef23b --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b4_512x512_ade_160k-bb0fa50c.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0fa50cbf4e57b9c23bc10ac41738a47de34a0cafd7dd3d27071d21a6c15251 +size 256700833 diff --git a/models/segformer (anibali)/pretrained/segformer_b4_backbone_imagenet-b757a54d.pth b/models/segformer (anibali)/pretrained/segformer_b4_backbone_imagenet-b757a54d.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bbabf966f8128e8e160336005942906077926c2 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b4_backbone_imagenet-b757a54d.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b757a54dc19b3aba660087d3f6ad9c3e3f78fd19610ef8db1605e17a638496be +size 243619457 diff --git a/models/segformer (anibali)/pretrained/segformer_b5_1024x1024_city_160k-2ca4dff8.pth b/models/segformer (anibali)/pretrained/segformer_b5_1024x1024_city_160k-2ca4dff8.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e85bdedbeb3061b7494556dbcf547b2ebebb3f7 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b5_1024x1024_city_160k-2ca4dff8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca4dff88eb0c4870f80f9f8c6d866796a5acaf246d748523972b2fda1403b45 +size 338767725 diff --git a/models/segformer (anibali)/pretrained/segformer_b5_640x640_ade_160k-106a5e57.pth b/models/segformer (anibali)/pretrained/segformer_b5_640x640_ade_160k-106a5e57.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f2cd0ae356baab5be2018e4ddc366f929eb30e2 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b5_640x640_ade_160k-106a5e57.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106a5e571c655e3223a598bd8db829815c8b55733cd58b9414f350d29f8a87fc +size 339170669 diff --git a/models/segformer (anibali)/pretrained/segformer_b5_backbone_imagenet-d552b33d.pth b/models/segformer (anibali)/pretrained/segformer_b5_backbone_imagenet-d552b33d.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf0ee167a47d65a9b6d68b36c4125613bc7dfbb5 --- /dev/null +++ b/models/segformer (anibali)/pretrained/segformer_b5_backbone_imagenet-d552b33d.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d552b33d913654b905d7a7ebd1bfc8ef4bee7b23b95eea601725c18f8cd76c76 +size 326087293 diff --git a/models/segformer (anibali)/source.txt b/models/segformer (anibali)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..079520939e971a260b81e2bb99a91b06835445e3 --- /dev/null +++ b/models/segformer (anibali)/source.txt @@ -0,0 +1 @@ +https://github.com/anibali/segformer/releases \ No newline at end of file diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/.gitattributes b/models/segformer-b0-finetuned-ade-512-512 (optimum)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..07f0db3339ad9053dc95b284c4ae14e014efff89 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/.gitattributes @@ -0,0 +1,16 @@ +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tar.gz filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/README.md b/models/segformer-b0-finetuned-ade-512-512 (optimum)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9a33adb74eb77c2266c7042d961036f2d3c9dd6d --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/README.md @@ -0,0 +1,92 @@ +--- +license: other +tags: +- vision +- image-segmentation +datasets: +- scene_parse_150 +widget: +- src: https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg + example_title: House +- src: https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000002.jpg + example_title: Castle +--- + +# SegFormer (b0-sized) model fine-tuned on ADE20k + +SegFormer model fine-tuned on ADE20k at resolution 512x512. It was introduced in the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) by Xie et al. and first released in [this repository](https://github.com/NVlabs/SegFormer). + +Disclaimer: The team releasing SegFormer did not write a model card for this model so this model card has been written by the Hugging Face team. + +## Model description + +SegFormer consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great results on semantic segmentation benchmarks such as ADE20K and Cityscapes. The hierarchical Transformer is first pre-trained on ImageNet-1k, after which a decode head is added and fine-tuned altogether on a downstream dataset. + +## Intended uses & limitations + +You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?other=segformer) to look for fine-tuned versions on a task that interests you. + +### How to use + +Here is how to use this model to classify an image of the COCO 2017 dataset into one of the 1,000 ImageNet classes: + +```python +from transformers import SegformerImageProcessor +from PIL import Image +import requests + +from optimum.onnxruntime import ORTModelForSemanticSegmentation + +image_processor = SegformerImageProcessor.from_pretrained("optimum/segformer-b0-finetuned-ade-512-512") +model = ORTModelForSemanticSegmentation.from_pretrained("optimum/segformer-b0-finetuned-ade-512-512") + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url, stream=True).raw) + +inputs = image_processor(images=image, return_tensors="pt").to(device) +outputs = model(**inputs) +logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4) +``` +If you use pipeline: + +```python +from transformers import SegformerImageProcessor, pipeline +from optimum.onnxruntime import ORTModelForSemanticSegmentation + +image_processor = SegformerImageProcessor.from_pretrained("optimum/segformer-b0-finetuned-ade-512-512") +model = ORTModelForSemanticSegmentation.from_pretrained("optimum/segformer-b0-finetuned-ade-512-512") + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +pipe = pipeline("image-segmentation", model=model, feature_extractor=image_processor) +pred = pipe(url) +``` + +For more code examples, we refer to the [Optimum documentation](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models). + +### License + +The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE). + +### BibTeX entry and citation info + +```bibtex +@article{DBLP:journals/corr/abs-2105-15203, + author = {Enze Xie and + Wenhai Wang and + Zhiding Yu and + Anima Anandkumar and + Jose M. Alvarez and + Ping Luo}, + title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers}, + journal = {CoRR}, + volume = {abs/2105.15203}, + year = {2021}, + url = {https://arxiv.org/abs/2105.15203}, + eprinttype = {arXiv}, + eprint = {2105.15203}, + timestamp = {Wed, 02 Jun 2021 11:46:42 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/config.json b/models/segformer-b0-finetuned-ade-512-512 (optimum)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f4f67828171331a9dc9ab29958723ec95b123488 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/config.json @@ -0,0 +1,374 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.25.1" +} diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/model.onnx b/models/segformer-b0-finetuned-ade-512-512 (optimum)/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ba61ac42358083d7dd1fb6ad01b2e1d5ada5eeb4 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a89102115fe3c16230502437b894844ba50cde6f7c800f9884e87c360bcbfc9 +size 15142812 diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/preprocessor_config.json b/models/segformer-b0-finetuned-ade-512-512 (optimum)/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2888c6bca1dadab676eb38e02c29ed77623eed83 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/preprocessor_config.json @@ -0,0 +1,24 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "SegformerFeatureExtractor", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-ade-512-512 (optimum)/source.txt b/models/segformer-b0-finetuned-ade-512-512 (optimum)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..9cd98cdc434b59a0dabe431eed5a3f1d29bda872 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (optimum)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/optimum/segformer-b0-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/.gitattributes b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c7d9f3332a950355d5a77d85000f05e6f45435ea --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/.gitattributes @@ -0,0 +1,34 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx new file mode 100644 index 0000000000000000000000000000000000000000..30b87273ea45d9b1f1e5128b675bdcefad16f3eb --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c14ebfb37bf3e41c707524b9f1e77d95207d0f32c1b5b18a3329fd3953a4292 +size 3990504 diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx.gz b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx.gz new file mode 100644 index 0000000000000000000000000000000000000000..74f7ab86d4f34c0b09691ac0b9ccfa0e66cda181 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/b0.onnx.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220b240b5f8728f35d70bbb9a4829c88f3cca91e0f9fe26299aaa77dcf499b3c +size 2771135 diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/config.json b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba9e66725a3c53aeff1e9fede9ed14e6800f76d5 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/config.json @@ -0,0 +1,372 @@ +{ + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.12.0.dev0" +} diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/preprocessor_config.json b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..731939640bb201ce03c59a5b2d6bc1ee4f4162b3 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/preprocessor_config.json @@ -0,0 +1,18 @@ +{ + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "SegformerFeatureExtractor", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "reduce_labels": true, + "resample": 2, + "size": 512 +} diff --git a/models/segformer-b0-finetuned-ade-512-512 (visheratin)/source.txt b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..afc7cf75458de41a52fa80ac7e16c98bdcf9e8ef --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512 (visheratin)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/visheratin/segformer-b0-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-ade-512-512/.gitattributes b/models/segformer-b0-finetuned-ade-512-512/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-ade-512-512/README.md b/models/segformer-b0-finetuned-ade-512-512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f27dd20e00a15a959ee40b71e9f6dba4371b134 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b0-finetuned-ade-512-512 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b0-finetuned-ade-512-512`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b0-finetuned-ade-512-512'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b0-finetuned-ade-512-512/config.json b/models/segformer-b0-finetuned-ade-512-512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5607fd032135d72df01bd50d1ba8101b2fe28831 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b0-finetuned-ade-512-512/onnx/model.onnx b/models/segformer-b0-finetuned-ade-512-512/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..88934f0aee698d0f0ab1c04e77955b0a46d755a5 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5c18a4be395f16646438d54c42377ddc202edfa33d5eced0c9506de75c44c2 +size 15335446 diff --git a/models/segformer-b0-finetuned-ade-512-512/onnx/model_fp16.onnx b/models/segformer-b0-finetuned-ade-512-512/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5ef530a660d5c9dc5696162a751a6a8e9d31866c --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ee378a26bb782f960161ab2d99885db168334ec2ab3429ac36afdcc0f2b038 +size 7939696 diff --git a/models/segformer-b0-finetuned-ade-512-512/onnx/model_quantized.onnx b/models/segformer-b0-finetuned-ade-512-512/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1d7578ad961c9f5907897dd1add029096a42a7aa --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a98d6daf3d926869ab8cc4c2ed7374a2bc23b889bb7ca3b0915d15e3c4756bb +size 4418863 diff --git a/models/segformer-b0-finetuned-ade-512-512/preprocessor_config.json b/models/segformer-b0-finetuned-ade-512-512/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6446f33b72ee83c38d5f0a31ce336a9979dae8a9 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-ade-512-512/quantize_config.json b/models/segformer-b0-finetuned-ade-512-512/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53b7b72032375b97b7019f1a69de21e40735d22c --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Reshape", + "Sqrt", + "Unsqueeze", + "Pow", + "Mul", + "Slice", + "Gather", + "Sub", + "Relu", + "Resize", + "Softmax", + "ReduceMean", + "Add", + "MatMul", + "Erf", + "Div", + "Constant", + "Transpose", + "Conv", + "Shape", + "Cast", + "Concat" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b0-finetuned-ade-512-512/source.txt b/models/segformer-b0-finetuned-ade-512-512/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..7dee1dd7667ea6f5d56c107be656d5825f59d0b2 --- /dev/null +++ b/models/segformer-b0-finetuned-ade-512-512/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b0-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b0-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b0-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a0e60ac98ea928153cdbd6b022535dcaebf7c8f --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b0-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b0-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b0-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b0-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b0-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..28364c01bf3707302dc88495d8bae913d7cfd11b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f720553412966ed99a16836a6ab03d238c277356 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142427caa45a47d57d5bd23b62c3bfc852fa7df0c6c2291c5ef34f581fa406bf +size 15200774 diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..10a007c8f5d4d45df17ed7da47cfc0fba8c975be --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3d21488be62e1f975b21c1e7620376899532c8691735bd45cec4cae37bca5b +size 7872356 diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a0d48e24f7da5c8b1034e36469341356238c70b7 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b77475730108e13ee34de744faf0690f2b1b79e79849b268798b18a6583a50 +size 4384797 diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b0-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b0-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69683f7d1375dcee370b4c7057848a84e924bcee --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Relu", + "Concat", + "Div", + "Transpose", + "Gather", + "Mul", + "Constant", + "Add", + "Cast", + "Sub", + "Reshape", + "Resize", + "Unsqueeze", + "Softmax", + "Conv", + "Sqrt", + "ReduceMean", + "Slice", + "Shape", + "Erf", + "Pow", + "MatMul" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b0-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..141c580248e19e580126a6b548ecfb66cb60fb59 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b0-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/.gitattributes b/models/segformer-b0-finetuned-cityscapes-512-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/README.md b/models/segformer-b0-finetuned-cityscapes-512-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..80b0d56e68b3e654bb960d2fa8625a0be11c413b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b0-finetuned-cityscapes-512-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b0-finetuned-cityscapes-512-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: +```bash +npm i @xenova/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b0-finetuned-cityscapes-512-1024`. + +```js +import { pipeline } from '@xenova/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b0-finetuned-cityscapes-512-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/config.json b/models/segformer-b0-finetuned-cityscapes-512-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1339a12f8ba7dc05c2d4a54a4ac574b7e1faf0c --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-cityscapes-512-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model.onnx b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..551d8a89124efe4bfab8863811ec266577634b85 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd5ceb33f7c7a969bc69d3ad7ab06017de15c42085e7ed10be8ed8d23b10fec +size 15200774 diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_fp16.onnx b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b595792c89d5f4fb8516c86c860f059abf446734 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47fd87bdf13640eedd575701e3215687e180ba64a4e18f9d116635ec1a56f1ad +size 7872356 diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_quantized.onnx b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6ccee002ae71cbb6cc63b645dd866d306c9d6534 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a8a153251d347c0a29f44c64ddb5814fcd524fa5ab98b543ab88afea1de37e +size 4384797 diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/preprocessor_config.json b/models/segformer-b0-finetuned-cityscapes-512-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/quantize_config.json b/models/segformer-b0-finetuned-cityscapes-512-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e7a1773c3e1852994e4eb4458c0808a872c51a3 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "ReduceMean", + "Div", + "Concat", + "Add", + "Unsqueeze", + "Conv", + "Sub", + "Pow", + "Cast", + "Constant", + "Mul", + "MatMul", + "Resize", + "Relu", + "Shape", + "Gather", + "Transpose", + "Sqrt", + "Softmax", + "Erf", + "Reshape", + "Slice" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-512-1024/source.txt b/models/segformer-b0-finetuned-cityscapes-512-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2bab31a1727eea21cd897d46b8aa5831be90fa30 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-512-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b0-finetuned-cityscapes-512-1024 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/.gitattributes b/models/segformer-b0-finetuned-cityscapes-640-1280/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/README.md b/models/segformer-b0-finetuned-cityscapes-640-1280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ab3f07c734417c24d7487d37ec10b6fc7ec348dd --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b0-finetuned-cityscapes-640-1280 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b0-finetuned-cityscapes-640-1280 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b0-finetuned-cityscapes-640-1280`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b0-finetuned-cityscapes-640-1280'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/config.json b/models/segformer-b0-finetuned-cityscapes-640-1280/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e2e2c8c411a1c7116aa8a5fc9916c9b1ec3d6a2d --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-cityscapes-640-1280", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model.onnx b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..790525bbb3ae4e44a04b83a349555fa7dbd07ae0 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc3ab8ed0c6d724ad58fa7ddfeea0f38c85f2e5c0aa544c88a83bc233f95292 +size 15200774 diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_fp16.onnx b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..35b75a0e87d77910609d490b81484939890e3d6e --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cce5903820b7394d36bd646a94f3e76f5e05680e8f2011a3ecf552f9a122015 +size 7872356 diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_quantized.onnx b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fda707cbe522527e1575ded92c09c4f81111d7cb --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff44b29fae09d351065d942d2d7fb6e3f4647f4a9cb49ed2a548d06141e11530 +size 4384797 diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/preprocessor_config.json b/models/segformer-b0-finetuned-cityscapes-640-1280/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/quantize_config.json b/models/segformer-b0-finetuned-cityscapes-640-1280/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..869c0726e20245e987b9263b6f074e7af4479dbd --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Constant", + "Gather", + "Div", + "Relu", + "Mul", + "Concat", + "Resize", + "Slice", + "Conv", + "Unsqueeze", + "Add", + "ReduceMean", + "Erf", + "Sqrt", + "Reshape", + "MatMul", + "Cast", + "Sub", + "Pow", + "Shape", + "Transpose", + "Softmax" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-640-1280/source.txt b/models/segformer-b0-finetuned-cityscapes-640-1280/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..04efbe6e7987abef4e9fa4dbc79ad8da369da207 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-640-1280/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b0-finetuned-cityscapes-640-1280 \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/.gitattributes b/models/segformer-b0-finetuned-cityscapes-768-768/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/README.md b/models/segformer-b0-finetuned-cityscapes-768-768/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5bc076315471b8982b2cab068e98e22b2dc7a7e8 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b0-finetuned-cityscapes-768-768 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b0-finetuned-cityscapes-768-768 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b0-finetuned-cityscapes-768-768`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b0-finetuned-cityscapes-768-768'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/config.json b/models/segformer-b0-finetuned-cityscapes-768-768/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0648bbf3c56f84a2789b2fced3a7d83d196a1799 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b0-finetuned-cityscapes-768-768", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model.onnx b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..60da9deaa5068bd2b1c6d0f635d6900e7039f5e6 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4206c577eac06a24f3ef53cf98e175527c020174fccfa2a683eb56db3c41a5 +size 15200774 diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_fp16.onnx b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..70adbf3e22c644091167655dcbe80d6b2171aeb7 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42574a8801cdfd179a60d542b7356df6e4b35bf370a81257e18e204f21fbd6a8 +size 7872356 diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_quantized.onnx b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..56aa58cbab6591a9531847b1181d4938ba131bf7 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90dc4112c066066110c5aac955990427095a27a88c7dbfd4fcc7d1bbf147147 +size 4384797 diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/preprocessor_config.json b/models/segformer-b0-finetuned-cityscapes-768-768/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/quantize_config.json b/models/segformer-b0-finetuned-cityscapes-768-768/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28e090d7741148261bd2bc66248b5502facd1106 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "MatMul", + "Relu", + "Constant", + "Sub", + "Concat", + "Add", + "Div", + "Cast", + "Gather", + "Resize", + "Conv", + "ReduceMean", + "Reshape", + "Shape", + "Softmax", + "Slice", + "Mul", + "Transpose", + "Pow", + "Erf", + "Sqrt", + "Unsqueeze" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b0-finetuned-cityscapes-768-768/source.txt b/models/segformer-b0-finetuned-cityscapes-768-768/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..373ab05fcf2f756eb09700ad154455a1f7b98c44 --- /dev/null +++ b/models/segformer-b0-finetuned-cityscapes-768-768/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b0-finetuned-cityscapes-768-768 \ No newline at end of file diff --git a/models/segformer-b1-finetuned-ade-512-512/.gitattributes b/models/segformer-b1-finetuned-ade-512-512/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b1-finetuned-ade-512-512/README.md b/models/segformer-b1-finetuned-ade-512-512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f3cc56ce37424d9371527fe5c7b16ac470cfb8ba --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b1-finetuned-ade-512-512 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b1-finetuned-ade-512-512 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: +```bash +npm i @xenova/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b1-finetuned-ade-512-512`. + +```js +import { pipeline } from '@xenova/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b1-finetuned-ade-512-512'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b1-finetuned-ade-512-512/config.json b/models/segformer-b1-finetuned-ade-512-512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6bb5233b1d14a00743b03577ca87b2f086161046 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b1-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b1-finetuned-ade-512-512/onnx/model.onnx b/models/segformer-b1-finetuned-ade-512-512/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..eff1538a3613a31ba80bb98076447fb5cf7c0911 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd97ac49e7420088cfd75fe028437af021342611ffffa3a33465720692900c6 +size 55187948 diff --git a/models/segformer-b1-finetuned-ade-512-512/onnx/model_fp16.onnx b/models/segformer-b1-finetuned-ade-512-512/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bde7e99d5b0ed42db02ce19d50adfaf48c168721 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3750ef42453b1af00721beccf9bce44d07470e2c072dab6f578b51344ae22f94 +size 27866054 diff --git a/models/segformer-b1-finetuned-ade-512-512/onnx/model_quantized.onnx b/models/segformer-b1-finetuned-ade-512-512/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d23292df5377cc8735116ce5c35135274f207189 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8807e663350807b1d7fa8c85494dc0dde0fb0c783900b29807c23f72c459d900 +size 14492685 diff --git a/models/segformer-b1-finetuned-ade-512-512/preprocessor_config.json b/models/segformer-b1-finetuned-ade-512-512/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6446f33b72ee83c38d5f0a31ce336a9979dae8a9 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b1-finetuned-ade-512-512/quantize_config.json b/models/segformer-b1-finetuned-ade-512-512/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ff142f810e292f5af59e95269a119f2b49be1527 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Div", + "Conv", + "Constant", + "Transpose", + "Cast", + "Relu", + "Reshape", + "Softmax", + "Sqrt", + "Resize", + "MatMul", + "Concat", + "Add", + "Pow", + "Mul", + "Gather", + "Sub", + "Unsqueeze", + "Shape", + "Erf", + "ReduceMean", + "Slice" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b1-finetuned-ade-512-512/source.txt b/models/segformer-b1-finetuned-ade-512-512/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..82d53d11ebb9a427fd0aaf779e7f0f1476997341 --- /dev/null +++ b/models/segformer-b1-finetuned-ade-512-512/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b1-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b1-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b1-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e8dd26f4de942a89f52141254302900e716d1614 --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b1-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b1-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b1-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b1-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b1-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..47d314d1462710b324326a646e44bc162f211d21 --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b1-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0b3bd982fb0425b810891bc55fc3e81403d62dc9 --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b58cb3c54659c78f1b9da35475464ca842ac80810866af30954cc1b011585c +size 55053276 diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c18ea5a5f4bc74d4c89bad84ff5a3971ebdd164e --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51824a0b7aa924b4093104d0f87a3b3684013950baf6eba82b4a73b259dc5f74 +size 27798714 diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5a98605c7049ddbe325b4740d875fe4f4ea831cd --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e14a6a71d020f52b5365a537eab810b5b34b07cf9556d31770c17cbed6651cb +size 14458619 diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b1-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ceac080c548ebb7885e0c46db7b44f1e188ca18e --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 1024, + "width": 1024 + } +} diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b1-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..79cb01c2458243c16fac871bc3f7c406e30ecccf --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Div", + "Reshape", + "Pow", + "Erf", + "Softmax", + "Conv", + "Sqrt", + "Shape", + "Unsqueeze", + "Sub", + "Cast", + "Concat", + "Constant", + "Slice", + "Gather", + "Relu", + "Resize", + "Mul", + "ReduceMean", + "MatMul", + "Transpose", + "Add" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b1-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b1-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..c79fbffc06dedaebae8c27fd0a95dd738e658b48 --- /dev/null +++ b/models/segformer-b1-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b1-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-b2-finetuned-ade-512-512/.gitattributes b/models/segformer-b2-finetuned-ade-512-512/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b2-finetuned-ade-512-512/README.md b/models/segformer-b2-finetuned-ade-512-512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a0191b1e7b41097b8a353a2d41fce684068420ef --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b2-finetuned-ade-512-512 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b2-finetuned-ade-512-512 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b2-finetuned-ade-512-512`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b2-finetuned-ade-512-512'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b2-finetuned-ade-512-512/config.json b/models/segformer-b2-finetuned-ade-512-512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb56a5887527d380528af8285808c28d873266a --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b2-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 6, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b2-finetuned-ade-512-512/onnx/model.onnx b/models/segformer-b2-finetuned-ade-512-512/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..254476527f97921864ab290eed1dae2dee760319 --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819c15e6af8c4de3359c1de7ab0a17d0dde495df1d16f8908a7163f8038e0fa0 +size 110445327 diff --git a/models/segformer-b2-finetuned-ade-512-512/onnx/model_fp16.onnx b/models/segformer-b2-finetuned-ade-512-512/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c1ab39b5026ad24ca272e1b1098a8371c29ad15f --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79209c2663c66b35af907b9bfeab79570d396ab7c4d0c22a54814288538a8d1b +size 55721117 diff --git a/models/segformer-b2-finetuned-ade-512-512/onnx/model_quantized.onnx b/models/segformer-b2-finetuned-ade-512-512/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d8dd159f78f47601febe46dd352dac4c8e65dde1 --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba81fb2f5cbaa68bced6a4de889f2d9ae7bf12dfac72a2145d5f54e932774218 +size 28939869 diff --git a/models/segformer-b2-finetuned-ade-512-512/preprocessor_config.json b/models/segformer-b2-finetuned-ade-512-512/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6446f33b72ee83c38d5f0a31ce336a9979dae8a9 --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b2-finetuned-ade-512-512/quantize_config.json b/models/segformer-b2-finetuned-ade-512-512/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf955ae40494ba69ca9c8495f692666e52982b72 --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Unsqueeze", + "Pow", + "Constant", + "Mul", + "Cast", + "Transpose", + "Conv", + "Sqrt", + "Slice", + "Div", + "MatMul", + "Resize", + "Add", + "Erf", + "Reshape", + "Relu", + "Shape", + "Gather", + "ReduceMean", + "Concat", + "Sub", + "Softmax" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b2-finetuned-ade-512-512/source.txt b/models/segformer-b2-finetuned-ade-512-512/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e0321344d1932643dda33f9e28055ecc52a370f --- /dev/null +++ b/models/segformer-b2-finetuned-ade-512-512/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b2-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b2-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b2-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f82be3f094ac25fb72367f1a639b6c83bce75ac --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b2-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b2-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b2-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b2-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b2-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef4cff1556bf5687ce6a5811f1913833fa67aba3 --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 6, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2ba8f9802e54a3a584c01f119a5b186cf0cc116c --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b6aa93fb404a7b0538b0aa7881f3a28bb3d1b289e8d83c418d98e0881bc7d3 +size 110042367 diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9d91b71e9980c33ed1f67041ad8168d0cf4bc04a --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323d74b5e4150b93d2682059c40ce227a6effeabc9a7926776b6e8499c2b2761 +size 55519635 diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1718c9724335189964cf7fb90aabe38f93148d6b --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63bec875538ac55c13b818cdcf66f3cc0d913c61eaea49850240c191d85217de +size 28838731 diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b2-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b2-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5cdc15f2e1dc883c3b62bd2ab4f3bcd5f4060095 --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Mul", + "Erf", + "Sub", + "Concat", + "Transpose", + "Pow", + "Relu", + "Cast", + "Sqrt", + "Add", + "Resize", + "Constant", + "Shape", + "Unsqueeze", + "Gather", + "MatMul", + "ReduceMean", + "Slice", + "Reshape", + "Conv", + "Softmax", + "Div" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b2-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b2-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..923eab53dd85b6e4a71868bf73305d0d14e7e16a --- /dev/null +++ b/models/segformer-b2-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b2-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-b3-finetuned-ade-512-512/.gitattributes b/models/segformer-b3-finetuned-ade-512-512/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b3-finetuned-ade-512-512/README.md b/models/segformer-b3-finetuned-ade-512-512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..861da2c2d65c23304e763d6f9518fa5020048894 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b3-finetuned-ade-512-512 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b3-finetuned-ade-512-512 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b3-finetuned-ade-512-512`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b3-finetuned-ade-512-512'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b3-finetuned-ade-512-512/config.json b/models/segformer-b3-finetuned-ade-512-512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d52de513c87634ad1c2c79492e554b9389a4a51 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b3-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 18, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b3-finetuned-ade-512-512/onnx/model.onnx b/models/segformer-b3-finetuned-ade-512-512/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bac08b1cecade3919d823f908d245ef370171b9f --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa5e8f62b7c1683de1edcea22c8c7a0d7f8e6768b5ae93f7c62e683f0b98708 +size 190376626 diff --git a/models/segformer-b3-finetuned-ade-512-512/onnx/model_fp16.onnx b/models/segformer-b3-finetuned-ade-512-512/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8185a9fa4aa140ec0938f2d052e1d5404cddedd4 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9bd781793457e0b5f7f1c84445b0411111869b7e93520f8963d91b4ffc23595 +size 96039140 diff --git a/models/segformer-b3-finetuned-ade-512-512/onnx/model_quantized.onnx b/models/segformer-b3-finetuned-ade-512-512/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..60e73b5580b463712326a7a4f52fd27e9358cdc3 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fcbb169fd6ebf373a18e3a5b52444a32bd66f25e3533e0b261ee114974890e9 +size 49945152 diff --git a/models/segformer-b3-finetuned-ade-512-512/preprocessor_config.json b/models/segformer-b3-finetuned-ade-512-512/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6446f33b72ee83c38d5f0a31ce336a9979dae8a9 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b3-finetuned-ade-512-512/quantize_config.json b/models/segformer-b3-finetuned-ade-512-512/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7de8f7536363f6aa483c81fdf3c4bcd1e7290f --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Reshape", + "Resize", + "Div", + "Add", + "Relu", + "Shape", + "Softmax", + "Gather", + "ReduceMean", + "Erf", + "Conv", + "MatMul", + "Transpose", + "Slice", + "Mul", + "Concat", + "Pow", + "Unsqueeze", + "Sub", + "Cast", + "Sqrt", + "Constant" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b3-finetuned-ade-512-512/source.txt b/models/segformer-b3-finetuned-ade-512-512/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ed67a18f249fa44120ce22e1efd7b37242ff127 --- /dev/null +++ b/models/segformer-b3-finetuned-ade-512-512/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b3-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b3-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b3-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6a912178e875e83a364421dd0d62f95f1f8efad2 --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b3-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b3-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b3-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b3-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b3-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a04f2cf0e8293c6c121b4fac98631af6f28de075 --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b3-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 18, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c98b45e02187dbc856448902fbb382e2b9cca59d --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9009cd0fa8d0755a82f5426725ab121290a8c31dc9db5bef2e3332f3afe320d5 +size 189973666 diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ff013f53e4cf3460bc53dd454c42c9b1eec3d1b0 --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6cf6c0b40250f3ff8fe35585abc2a3e8d1b51f300c9f8118384422e4fbdedab +size 95837658 diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..19e2b28e65a4fe5ea59855685bcc8d77b1b2a52b --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4817db4dc339ebdb8648e700499aae4b6d17249bee806f40b73703fd812f5c +size 49844014 diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b3-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b3-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..58e696fce8d46a8b784fa819576019ed898d6a55 --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Sqrt", + "Shape", + "Mul", + "Gather", + "Slice", + "Conv", + "Concat", + "Unsqueeze", + "Softmax", + "Add", + "Cast", + "Reshape", + "Resize", + "Transpose", + "MatMul", + "Pow", + "Sub", + "Erf", + "Constant", + "Relu", + "Div", + "ReduceMean" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b3-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b3-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..05f071d6577fae147b338841bfc0db321b752fe0 --- /dev/null +++ b/models/segformer-b3-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b3-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-b4-finetuned-ade-512-512/.gitattributes b/models/segformer-b4-finetuned-ade-512-512/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b4-finetuned-ade-512-512/README.md b/models/segformer-b4-finetuned-ade-512-512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9ceff55b1449c755ef4ea7e4707ad226022377da --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b4-finetuned-ade-512-512 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b4-finetuned-ade-512-512 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b4-finetuned-ade-512-512`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b4-finetuned-ade-512-512'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b4-finetuned-ade-512-512/config.json b/models/segformer-b4-finetuned-ade-512-512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc48119cfb5fb1d58dc56883669c973f9176230c --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b4-finetuned-ade-512-512", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 8, + 27, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b4-finetuned-ade-512-512/onnx/model.onnx b/models/segformer-b4-finetuned-ade-512-512/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..578b7daa0127f5316a8085c58c53dbcc6b53dfbf --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936ab0904982dc98f625b412c55089be64d70ef77ba8a4122bcde0298ad076f8 +size 257922641 diff --git a/models/segformer-b4-finetuned-ade-512-512/onnx/model_fp16.onnx b/models/segformer-b4-finetuned-ade-512-512/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..28ea0c58579fa2df660da6c5d79288c9017957af --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddde50c9d2ceb18e88a4a736f65b96fa18b0beeebdd0c4ad96a86fe9b3485a22 +size 130194016 diff --git a/models/segformer-b4-finetuned-ade-512-512/onnx/model_quantized.onnx b/models/segformer-b4-finetuned-ade-512-512/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a4b9ceaed7fb6ef7a31518831bf8c799cf59df9b --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce20a5b038c120c1765db433725196d8619dbc6b431d27dc1c05ba3f0fb732a8 +size 67858911 diff --git a/models/segformer-b4-finetuned-ade-512-512/preprocessor_config.json b/models/segformer-b4-finetuned-ade-512-512/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6446f33b72ee83c38d5f0a31ce336a9979dae8a9 --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b4-finetuned-ade-512-512/quantize_config.json b/models/segformer-b4-finetuned-ade-512-512/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bddb99fdbd572ee4e7d267911a7d30bdaea82c04 --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "ReduceMean", + "Reshape", + "Div", + "Transpose", + "Softmax", + "Erf", + "Gather", + "Constant", + "Unsqueeze", + "Sqrt", + "Concat", + "Pow", + "Relu", + "Resize", + "Shape", + "Conv", + "Add", + "MatMul", + "Slice", + "Sub", + "Cast", + "Mul" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b4-finetuned-ade-512-512/source.txt b/models/segformer-b4-finetuned-ade-512-512/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..76d2d6ef6a4b34a6e54f62add23867023491c32d --- /dev/null +++ b/models/segformer-b4-finetuned-ade-512-512/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b4-finetuned-ade-512-512 \ No newline at end of file diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b4-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b4-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db12a9539a668e6bc5e47301bec92c64cd004bde --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b4-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b4-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b4-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b4-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b4-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2f38a60a71b7feab783c854644750684b429b7 --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b4-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 8, + 27, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5aec891d080cfe64210ba6d7e0017ed3c178a1f0 --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b43ce362c7542fa81a2a3a8af507d82b0c05918e0d3ee30aa19e3bd2958b34c4 +size 257519681 diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..be01f092e8c62e6f044b56846e76f898d20ca33d --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872890f3ac43550aab2de33e4642ae9d73810ce9efd63e69661f0fb84b9bf809 +size 129992534 diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0551186378dfbc385bbed129153d8b80e159da3e --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf57e9423950585de2e7c69992df6b9abce3de1f2f4257c73451939b3582e8a1 +size 67757773 diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b4-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b4-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d018ea9b8a5042c2e54a123d70383916edb2f45 --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Reshape", + "MatMul", + "Transpose", + "Mul", + "Add", + "Unsqueeze", + "Conv", + "Erf", + "Constant", + "Div", + "Gather", + "Resize", + "Slice", + "Softmax", + "Concat", + "Sub", + "Shape", + "Cast", + "Sqrt", + "ReduceMean", + "Relu", + "Pow" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b4-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b4-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..aefd269173b4d2eaaddfbe501f8a40ce3ba5a5b9 --- /dev/null +++ b/models/segformer-b4-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b4-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-b5-finetuned-ade-640-640/.gitattributes b/models/segformer-b5-finetuned-ade-640-640/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b5-finetuned-ade-640-640/README.md b/models/segformer-b5-finetuned-ade-640-640/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9d49d06bdc53ae647597374e662bc7d688338efb --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b5-finetuned-ade-640-640 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b5-finetuned-ade-640-640 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b5-finetuned-ade-640-640`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b5-finetuned-ade-640-640'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/house.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'wall', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'building', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b5-finetuned-ade-640-640/config.json b/models/segformer-b5-finetuned-ade-640-640/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f2ffd67a8e9fbf946e664af1fd6888f9e5a7817d --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/config.json @@ -0,0 +1,373 @@ +{ + "_name_or_path": "nvidia/segformer-b5-finetuned-ade-640-640", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 6, + 40, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "wall", + "1": "building", + "2": "sky", + "3": "floor", + "4": "tree", + "5": "ceiling", + "6": "road", + "7": "bed ", + "8": "windowpane", + "9": "grass", + "10": "cabinet", + "11": "sidewalk", + "12": "person", + "13": "earth", + "14": "door", + "15": "table", + "16": "mountain", + "17": "plant", + "18": "curtain", + "19": "chair", + "20": "car", + "21": "water", + "22": "painting", + "23": "sofa", + "24": "shelf", + "25": "house", + "26": "sea", + "27": "mirror", + "28": "rug", + "29": "field", + "30": "armchair", + "31": "seat", + "32": "fence", + "33": "desk", + "34": "rock", + "35": "wardrobe", + "36": "lamp", + "37": "bathtub", + "38": "railing", + "39": "cushion", + "40": "base", + "41": "box", + "42": "column", + "43": "signboard", + "44": "chest of drawers", + "45": "counter", + "46": "sand", + "47": "sink", + "48": "skyscraper", + "49": "fireplace", + "50": "refrigerator", + "51": "grandstand", + "52": "path", + "53": "stairs", + "54": "runway", + "55": "case", + "56": "pool table", + "57": "pillow", + "58": "screen door", + "59": "stairway", + "60": "river", + "61": "bridge", + "62": "bookcase", + "63": "blind", + "64": "coffee table", + "65": "toilet", + "66": "flower", + "67": "book", + "68": "hill", + "69": "bench", + "70": "countertop", + "71": "stove", + "72": "palm", + "73": "kitchen island", + "74": "computer", + "75": "swivel chair", + "76": "boat", + "77": "bar", + "78": "arcade machine", + "79": "hovel", + "80": "bus", + "81": "towel", + "82": "light", + "83": "truck", + "84": "tower", + "85": "chandelier", + "86": "awning", + "87": "streetlight", + "88": "booth", + "89": "television receiver", + "90": "airplane", + "91": "dirt track", + "92": "apparel", + "93": "pole", + "94": "land", + "95": "bannister", + "96": "escalator", + "97": "ottoman", + "98": "bottle", + "99": "buffet", + "100": "poster", + "101": "stage", + "102": "van", + "103": "ship", + "104": "fountain", + "105": "conveyer belt", + "106": "canopy", + "107": "washer", + "108": "plaything", + "109": "swimming pool", + "110": "stool", + "111": "barrel", + "112": "basket", + "113": "waterfall", + "114": "tent", + "115": "bag", + "116": "minibike", + "117": "cradle", + "118": "oven", + "119": "ball", + "120": "food", + "121": "step", + "122": "tank", + "123": "trade name", + "124": "microwave", + "125": "pot", + "126": "animal", + "127": "bicycle", + "128": "lake", + "129": "dishwasher", + "130": "screen", + "131": "blanket", + "132": "sculpture", + "133": "hood", + "134": "sconce", + "135": "vase", + "136": "traffic light", + "137": "tray", + "138": "ashcan", + "139": "fan", + "140": "pier", + "141": "crt screen", + "142": "plate", + "143": "monitor", + "144": "bulletin board", + "145": "shower", + "146": "radiator", + "147": "glass", + "148": "clock", + "149": "flag" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "airplane": 90, + "animal": 126, + "apparel": 92, + "arcade machine": 78, + "armchair": 30, + "ashcan": 138, + "awning": 86, + "bag": 115, + "ball": 119, + "bannister": 95, + "bar": 77, + "barrel": 111, + "base": 40, + "basket": 112, + "bathtub": 37, + "bed ": 7, + "bench": 69, + "bicycle": 127, + "blanket": 131, + "blind": 63, + "boat": 76, + "book": 67, + "bookcase": 62, + "booth": 88, + "bottle": 98, + "box": 41, + "bridge": 61, + "buffet": 99, + "building": 1, + "bulletin board": 144, + "bus": 80, + "cabinet": 10, + "canopy": 106, + "car": 20, + "case": 55, + "ceiling": 5, + "chair": 19, + "chandelier": 85, + "chest of drawers": 44, + "clock": 148, + "coffee table": 64, + "column": 42, + "computer": 74, + "conveyer belt": 105, + "counter": 45, + "countertop": 70, + "cradle": 117, + "crt screen": 141, + "curtain": 18, + "cushion": 39, + "desk": 33, + "dirt track": 91, + "dishwasher": 129, + "door": 14, + "earth": 13, + "escalator": 96, + "fan": 139, + "fence": 32, + "field": 29, + "fireplace": 49, + "flag": 149, + "floor": 3, + "flower": 66, + "food": 120, + "fountain": 104, + "glass": 147, + "grandstand": 51, + "grass": 9, + "hill": 68, + "hood": 133, + "house": 25, + "hovel": 79, + "kitchen island": 73, + "lake": 128, + "lamp": 36, + "land": 94, + "light": 82, + "microwave": 124, + "minibike": 116, + "mirror": 27, + "monitor": 143, + "mountain": 16, + "ottoman": 97, + "oven": 118, + "painting": 22, + "palm": 72, + "path": 52, + "person": 12, + "pier": 140, + "pillow": 57, + "plant": 17, + "plate": 142, + "plaything": 108, + "pole": 93, + "pool table": 56, + "poster": 100, + "pot": 125, + "radiator": 146, + "railing": 38, + "refrigerator": 50, + "river": 60, + "road": 6, + "rock": 34, + "rug": 28, + "runway": 54, + "sand": 46, + "sconce": 134, + "screen": 130, + "screen door": 58, + "sculpture": 132, + "sea": 26, + "seat": 31, + "shelf": 24, + "ship": 103, + "shower": 145, + "sidewalk": 11, + "signboard": 43, + "sink": 47, + "sky": 2, + "skyscraper": 48, + "sofa": 23, + "stage": 101, + "stairs": 53, + "stairway": 59, + "step": 121, + "stool": 110, + "stove": 71, + "streetlight": 87, + "swimming pool": 109, + "swivel chair": 75, + "table": 15, + "tank": 122, + "television receiver": 89, + "tent": 114, + "toilet": 65, + "towel": 81, + "tower": 84, + "trade name": 123, + "traffic light": 136, + "tray": 137, + "tree": 4, + "truck": 83, + "van": 102, + "vase": 135, + "wall": 0, + "wardrobe": 35, + "washer": 107, + "water": 21, + "waterfall": 113, + "windowpane": 8 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b5-finetuned-ade-640-640/onnx/model.onnx b/models/segformer-b5-finetuned-ade-640-640/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e2a6d250b38fe748f77dd3415faecfa60aee2a92 --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8305f7c32e7d5ccdaa24d97161e4246e027cfef5835c90d0f63454d7fbfb41 +size 340719571 diff --git a/models/segformer-b5-finetuned-ade-640-640/onnx/model_fp16.onnx b/models/segformer-b5-finetuned-ade-640-640/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d77d787a953fcb03593b9dad8d75e13623cc643e --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19461294738e6d21b7bb64d66b4e1c3c77ec8f74c7e87c42acdb89b27bd19ebd +size 171918052 diff --git a/models/segformer-b5-finetuned-ade-640-640/onnx/model_quantized.onnx b/models/segformer-b5-finetuned-ade-640-640/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f06861be0b1c2c29f29cdad31ca951df0b4735d8 --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b20b28f213e6d1128cb850c3fa273a061f0aa87a49224316791fdab49515a51 +size 89540816 diff --git a/models/segformer-b5-finetuned-ade-640-640/preprocessor_config.json b/models/segformer-b5-finetuned-ade-640-640/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b5b6860eebd419622319b35171257a2f562cc7bb --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 640, + "width": 640 + } +} diff --git a/models/segformer-b5-finetuned-ade-640-640/quantize_config.json b/models/segformer-b5-finetuned-ade-640-640/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1bdd645a14558c1e2834a215b938ec8800b7a7eb --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Constant", + "Gather", + "Unsqueeze", + "Sub", + "Transpose", + "Shape", + "Cast", + "Erf", + "Add", + "Relu", + "Concat", + "MatMul", + "Softmax", + "Resize", + "Pow", + "Div", + "Mul", + "Conv", + "Reshape", + "Slice", + "Sqrt", + "ReduceMean" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b5-finetuned-ade-640-640/source.txt b/models/segformer-b5-finetuned-ade-640-640/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..424c69e1d50c55a61e38cd896f913eabe95e5dce --- /dev/null +++ b/models/segformer-b5-finetuned-ade-640-640/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b5-finetuned-ade-640-640 \ No newline at end of file diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/.gitattributes b/models/segformer-b5-finetuned-cityscapes-1024-1024/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/README.md b/models/segformer-b5-finetuned-cityscapes-1024-1024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..792db41e2c9c598c1132abab59abd251906c4ef8 --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/README.md @@ -0,0 +1,52 @@ +--- +base_model: nvidia/segformer-b5-finetuned-cityscapes-1024-1024 +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/nvidia/segformer-b5-finetuned-cityscapes-1024-1024 with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Image segmentation with `Xenova/segformer-b5-finetuned-cityscapes-1024-1024`. + +```js +import { pipeline } from '@huggingface/transformers'; + +// Create an image segmentation pipeline +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer-b5-finetuned-cityscapes-1024-1024'); + +// Segment an image +const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cityscapes.png'; +const output = await segmenter(url); +console.log(output); +// [ +// { +// score: null, +// label: 'road', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'sidewalk', +// mask: RawImage { ... } +// }, +// ... +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/config.json b/models/segformer-b5-finetuned-cityscapes-1024-1024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f106d1edc5a20bbfefe3b9fb75937d75393921f --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "nvidia/segformer-b5-finetuned-cityscapes-1024-1024", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 6, + 40, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "road", + "1": "sidewalk", + "2": "building", + "3": "wall", + "4": "fence", + "5": "pole", + "6": "traffic light", + "7": "traffic sign", + "8": "vegetation", + "9": "terrain", + "10": "sky", + "11": "person", + "12": "rider", + "13": "car", + "14": "truck", + "15": "bus", + "16": "train", + "17": "motorcycle", + "18": "bicycle" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "bicycle": 18, + "building": 2, + "bus": 15, + "car": 13, + "fence": 4, + "motorcycle": 17, + "person": 11, + "pole": 5, + "rider": 12, + "road": 0, + "sidewalk": 1, + "sky": 10, + "terrain": 9, + "traffic light": 6, + "traffic sign": 7, + "train": 16, + "truck": 14, + "vegetation": 8, + "wall": 3 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model.onnx b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b98ab240712de516d9a79195f0b55ff37a102f5b --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c566248fc4c8665913b5b53f18d7a63dd34b678cc83eda518bfdf70dea96914c +size 340316611 diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1e96972d64316a87cd7d55da45821113e148b600 --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032aa335cc9983cfb996612bebf7725232b0378f225f2152f189163994fb07fc +size 171716570 diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..80e85cd0eee20ae6d3ead12d2d1c87e59b046398 --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ad3d9578fbce3303548ee40c2a120d848375e88e59dd20cdd4f7882a17d4e7 +size 89439678 diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/preprocessor_config.json b/models/segformer-b5-finetuned-cityscapes-1024-1024/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ceac080c548ebb7885e0c46db7b44f1e188ca18e --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 1024, + "width": 1024 + } +} diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/quantize_config.json b/models/segformer-b5-finetuned-cityscapes-1024-1024/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5332a9dd69b8b5a3e3d43278ef2f676227a4887c --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "Unsqueeze", + "Sqrt", + "Relu", + "Pow", + "Reshape", + "Shape", + "Add", + "Slice", + "ReduceMean", + "Div", + "Transpose", + "Concat", + "Sub", + "Softmax", + "MatMul", + "Gather", + "Mul", + "Conv", + "Constant", + "Cast", + "Erf", + "Resize" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer-b5-finetuned-cityscapes-1024-1024/source.txt b/models/segformer-b5-finetuned-cityscapes-1024-1024/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2dd05942543cba38ee6f1873b2fc269777e52f95 --- /dev/null +++ b/models/segformer-b5-finetuned-cityscapes-1024-1024/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer-b5-finetuned-cityscapes-1024-1024 \ No newline at end of file diff --git a/models/segformer-onnx/.gitattributes b/models/segformer-onnx/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer-onnx/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer-onnx/README.md b/models/segformer-onnx/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b8256e5eeb6599bcf0f124887d4abb371169cb07 --- /dev/null +++ b/models/segformer-onnx/README.md @@ -0,0 +1,31 @@ +--- +base_model: mattmdjaga/segformer_b2_clothes +tags: +- onnx +- semantic-segmentation +--- + +# ONNX Model converted from mattmdjaga/segformer_b2_clothes + +This is an ONNX version of the model mattmdjaga/segformer_b2_clothes, converted automatically. + +## Model Information +- Original Model: mattmdjaga/segformer_b2_clothes +- ONNX Opset Version: 12 +- Input Shape: Dynamic (batch_size, 3, height, width) + +## Usage + +```python +import onnxruntime as ort +import numpy as np + +# Load ONNX model +session = ort.InferenceSession("model.onnx") + +# Prepare input +input_data = np.zeros((1, 3, 224, 224), dtype=np.float32) + +# Run inference +outputs = session.run(None, {"input": input_data}) +``` diff --git a/models/segformer-onnx/segformer-b3-fashion.onnx b/models/segformer-onnx/segformer-b3-fashion.onnx new file mode 100644 index 0000000000000000000000000000000000000000..219f6d01d7bf7b4f3bbddee078bc9f7ec0789ec5 --- /dev/null +++ b/models/segformer-onnx/segformer-b3-fashion.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:722a595aa3f78055f8f00773142d82e590cc4eb6b0f3d5fd6dff9fd4a8ee0683 +size 190059774 diff --git a/models/segformer-onnx/segformer_b2_clothes.onnx b/models/segformer-onnx/segformer_b2_clothes.onnx new file mode 100644 index 0000000000000000000000000000000000000000..98bbd7e336d4460799cb8a1f7176a3d003160891 --- /dev/null +++ b/models/segformer-onnx/segformer_b2_clothes.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fdd1137d4fc35bafc67efeab21549011c4b9958ea7a0c2d33e05839ba1ea09 +size 110039269 diff --git a/models/segformer-onnx/source.txt b/models/segformer-onnx/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..22cbfeb5724f74a2745adff70d1d27d5f8436f3d --- /dev/null +++ b/models/segformer-onnx/source.txt @@ -0,0 +1 @@ +https://huggingface.co/alexgenovese/segformer-onnx \ No newline at end of file diff --git a/models/segformer_b0_clothes/.gitattributes b/models/segformer_b0_clothes/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/models/segformer_b0_clothes/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer_b0_clothes/README.md b/models/segformer_b0_clothes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..11a1153fe378b8867ba53ead68f024e05b5150d2 --- /dev/null +++ b/models/segformer_b0_clothes/README.md @@ -0,0 +1,54 @@ +--- +base_model: mattmdjaga/segformer_b0_clothes +library_name: transformers.js +pipeline_tag: image-segmentation +--- + +https://huggingface.co/mattmdjaga/segformer_b0_clothes with ONNX weights to be compatible with Transformers.js. + +## Usage (Transformers.js) + +If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using: +```bash +npm i @huggingface/transformers +``` + +**Example:** Clothes segmentation with `Xenova/segformer_b0_clothes`. + +```js +import { pipeline } from '@huggingface/transformers'; + +const segmenter = await pipeline('image-segmentation', 'Xenova/segformer_b0_clothes'); + +const url = 'https://freerangestock.com/sample/139043/young-man-standing-and-leaning-on-car.jpg'; +const output = await segmenter(url); +console.log(output) +// [ +// { +// score: null, +// label: 'Background', +// mask: RawImage { ... } +// }, +// { +// score: null, +// label: 'Hair', +// mask: RawImage { ... } +// }, +// ... +// } +// ] +``` + +You can visualize the outputs with: +```js +for (const l of output) { + l.mask.save(`${l.label}.png`); +} +``` + + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/61b253b7ac5ecaae3d1efe0c/cyas1v8geg7dO7umYLt4E.png) + +--- + +Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`). \ No newline at end of file diff --git a/models/segformer_b0_clothes/config.json b/models/segformer_b0_clothes/config.json new file mode 100644 index 0000000000000000000000000000000000000000..914d9b3986158d87a1da7dbebd8f905dd7189467 --- /dev/null +++ b/models/segformer_b0_clothes/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "mattmdjaga/segformer_b0_clothes", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 256, + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 32, + 64, + 160, + 256 + ], + "id2label": { + "0": "Background", + "1": "Hat", + "2": "Hair", + "3": "Sunglasses", + "4": "Upper-clothes", + "5": "Skirt", + "6": "Pants", + "7": "Dress", + "8": "Belt", + "9": "Left-shoe", + "10": "Right-shoe", + "11": "Face", + "12": "Left-leg", + "13": "Right-leg", + "14": "Left-arm", + "15": "Right-arm", + "16": "Bag", + "17": "Scarf" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "Background": 0, + "Bag": 16, + "Belt": 8, + "Dress": 7, + "Face": 11, + "Hair": 2, + "Hat": 1, + "Left-arm": 14, + "Left-leg": 12, + "Left-shoe": 9, + "Pants": 6, + "Right-arm": 15, + "Right-leg": 13, + "Right-shoe": 10, + "Scarf": 17, + "Skirt": 5, + "Sunglasses": 3, + "Upper-clothes": 4 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/models/segformer_b0_clothes/onnx/model.onnx b/models/segformer_b0_clothes/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..07315bea6c4814d571a02f1360f39b594b89b103 --- /dev/null +++ b/models/segformer_b0_clothes/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e6f1545475bc549184b6710487555b4b73eb08354cc3a480fb4fab1473a9d3 +size 15199746 diff --git a/models/segformer_b0_clothes/onnx/model_fp16.onnx b/models/segformer_b0_clothes/onnx/model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..928bac834e28f1399c845eef31117dc4116e6867 --- /dev/null +++ b/models/segformer_b0_clothes/onnx/model_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0204914560685e2cdb94ac919c006d20353f59c68814bd0974801e759a9d822 +size 7871842 diff --git a/models/segformer_b0_clothes/onnx/model_quantized.onnx b/models/segformer_b0_clothes/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..03e386adca8bae5a292c515429a06037c0f967b8 --- /dev/null +++ b/models/segformer_b0_clothes/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9d3e4fcf3e0b677ee0cf9810ebccdc7f478f9c69a60d38267bad598799ef73 +size 4384537 diff --git a/models/segformer_b0_clothes/preprocessor_config.json b/models/segformer_b0_clothes/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/models/segformer_b0_clothes/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/models/segformer_b0_clothes/quantize_config.json b/models/segformer_b0_clothes/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..057b8dc76648ed24be858a38adc2e0eb4d42528c --- /dev/null +++ b/models/segformer_b0_clothes/quantize_config.json @@ -0,0 +1,33 @@ +{ + "per_channel": true, + "reduce_range": true, + "per_model_config": { + "model": { + "op_types": [ + "ReduceMean", + "Mul", + "Conv", + "MatMul", + "Softmax", + "Transpose", + "Unsqueeze", + "Constant", + "Div", + "Gather", + "Erf", + "Resize", + "Sqrt", + "Sub", + "Add", + "Concat", + "Cast", + "Pow", + "Slice", + "Shape", + "Reshape", + "Relu" + ], + "weight_type": "QUInt8" + } + } +} \ No newline at end of file diff --git a/models/segformer_b0_clothes/source.txt b/models/segformer_b0_clothes/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..df1ffa3ec2cd857f3d912498d8d096238161174d --- /dev/null +++ b/models/segformer_b0_clothes/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Xenova/segformer_b0_clothes \ No newline at end of file diff --git a/models/segformer_b2_clothes (mattmdjaga)/.gitattributes b/models/segformer_b2_clothes (mattmdjaga)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c7d9f3332a950355d5a77d85000f05e6f45435ea --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/.gitattributes @@ -0,0 +1,34 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/models/segformer_b2_clothes (mattmdjaga)/.gitignore b/models/segformer_b2_clothes (mattmdjaga)/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d0e300c55f010e9077e7befa1bb4df35ccf0a818 --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +test.ipynb \ No newline at end of file diff --git a/models/segformer_b2_clothes (mattmdjaga)/README.md b/models/segformer_b2_clothes (mattmdjaga)/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c744122aa160061440cc7ee3380c0cae1012747c --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/README.md @@ -0,0 +1,107 @@ +--- +license: other +tags: +- vision +- image-segmentation +widget: +- src: >- + https://images.unsplash.com/photo-1643310325061-2beef64926a5?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8Nnx8cmFjb29uc3xlbnwwfHwwfHw%3D&w=1000&q=80 + example_title: Person +- src: >- + https://freerangestock.com/sample/139043/young-man-standing-and-leaning-on-car.jpg + example_title: Person +datasets: +- mattmdjaga/human_parsing_dataset +--- +# Segformer B2 fine-tuned for clothes segmentation + +SegFormer model fine-tuned on [ATR dataset](https://github.com/lemondan/HumanParsing-Dataset) for clothes segmentation but can also be used for human segmentation. +The dataset on hugging face is called "mattmdjaga/human_parsing_dataset". + +**[Training code](https://github.com/mattmdjaga/segformer_b2_clothes)**. +```python +from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation +from PIL import Image +import requests +import matplotlib.pyplot as plt +import torch.nn as nn + +processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes") +model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes") + +url = "https://plus.unsplash.com/premium_photo-1673210886161-bfcc40f54d1f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8cGVyc29uJTIwc3RhbmRpbmd8ZW58MHx8MHx8&w=1000&q=80" + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits.cpu() + +upsampled_logits = nn.functional.interpolate( + logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False, +) + +pred_seg = upsampled_logits.argmax(dim=1)[0] +plt.imshow(pred_seg) +``` + +Labels: 0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf" + +### Evaluation + +| Label Index | Label Name | Category Accuracy | Category IoU | +|:-------------:|:----------------:|:-----------------:|:------------:| +| 0 | Background | 0.99 | 0.99 | +| 1 | Hat | 0.73 | 0.68 | +| 2 | Hair | 0.91 | 0.82 | +| 3 | Sunglasses | 0.73 | 0.63 | +| 4 | Upper-clothes | 0.87 | 0.78 | +| 5 | Skirt | 0.76 | 0.65 | +| 6 | Pants | 0.90 | 0.84 | +| 7 | Dress | 0.74 | 0.55 | +| 8 | Belt | 0.35 | 0.30 | +| 9 | Left-shoe | 0.74 | 0.58 | +| 10 | Right-shoe | 0.75 | 0.60 | +| 11 | Face | 0.92 | 0.85 | +| 12 | Left-leg | 0.90 | 0.82 | +| 13 | Right-leg | 0.90 | 0.81 | +| 14 | Left-arm | 0.86 | 0.74 | +| 15 | Right-arm | 0.82 | 0.73 | +| 16 | Bag | 0.91 | 0.84 | +| 17 | Scarf | 0.63 | 0.29 | + +Overall Evaluation Metrics: +- Evaluation Loss: 0.15 +- Mean Accuracy: 0.80 +- Mean IoU: 0.69 + +### License + +The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE). + +### BibTeX entry and citation info + +```bibtex +@article{DBLP:journals/corr/abs-2105-15203, + author = {Enze Xie and + Wenhai Wang and + Zhiding Yu and + Anima Anandkumar and + Jose M. Alvarez and + Ping Luo}, + title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers}, + journal = {CoRR}, + volume = {abs/2105.15203}, + year = {2021}, + url = {https://arxiv.org/abs/2105.15203}, + eprinttype = {arXiv}, + eprint = {2105.15203}, + timestamp = {Wed, 02 Jun 2021 11:46:42 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` \ No newline at end of file diff --git a/models/segformer_b2_clothes (mattmdjaga)/config.json b/models/segformer_b2_clothes (mattmdjaga)/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf506a3672235c681b35e124ef9333ca6f5e8b2 --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/config.json @@ -0,0 +1,110 @@ +{ + "_name_or_path": "nvidia/mit-b2", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 6, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "Background", + "1": "Hat", + "2": "Hair", + "3": "Sunglasses", + "4": "Upper-clothes", + "5": "Skirt", + "6": "Pants", + "7": "Dress", + "8": "Belt", + "9": "Left-shoe", + "10": "Right-shoe", + "11": "Face", + "12": "Left-leg", + "13": "Right-leg", + "14": "Left-arm", + "15": "Right-arm", + "16": "Bag", + "17": "Scarf" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "Background": 0, + "Bag": 16, + "Belt": 8, + "Dress": 7, + "Face": 11, + "Hair": 2, + "Hat": 1, + "Left-arm": 14, + "Left-leg": 12, + "Left-shoe": 9, + "Pants": 6, + "Right-arm": 15, + "Right-leg": 13, + "Right-shoe": 10, + "Scarf": 17, + "Skirt": 5, + "Sunglasses": 3, + "Upper-clothes": 4 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.24.0" +} diff --git a/models/segformer_b2_clothes (mattmdjaga)/handler.py b/models/segformer_b2_clothes (mattmdjaga)/handler.py new file mode 100644 index 0000000000000000000000000000000000000000..677fdf4567de92cdd30b8957c5b8d7c0563bcdfe --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/handler.py @@ -0,0 +1,39 @@ +from typing import Dict, List, Any +from PIL import Image +from io import BytesIO +from transformers import AutoModelForSemanticSegmentation, AutoFeatureExtractor +import base64 +import torch +from torch import nn + +class EndpointHandler(): + def __init__(self, path="."): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = AutoModelForSemanticSegmentation.from_pretrained(path).to(self.device).eval() + self.feature_extractor = AutoFeatureExtractor.from_pretrained(path) + + def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + data args: + images (:obj:`PIL.Image`) + candiates (:obj:`list`) + Return: + A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82} + """ + inputs = data.pop("inputs", data) + + # decode base64 image to PIL + image = Image.open(BytesIO(base64.b64decode(inputs['image']))) + + # preprocess image + encoding = self.feature_extractor(images=image, return_tensors="pt") + pixel_values = encoding["pixel_values"].to(self.device) + with torch.no_grad(): + outputs = self.model(pixel_values=pixel_values) + logits = outputs.logits + upsampled_logits = nn.functional.interpolate(logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False,) + pred_seg = upsampled_logits.argmax(dim=1)[0] + return pred_seg.tolist() diff --git a/models/segformer_b2_clothes (mattmdjaga)/mattmdjaga_segformer_b2_clothes.json b/models/segformer_b2_clothes (mattmdjaga)/mattmdjaga_segformer_b2_clothes.json new file mode 100644 index 0000000000000000000000000000000000000000..0b574956d4dbb16142a6309fa81d8c8071511b35 --- /dev/null +++ b/models/segformer_b2_clothes (mattmdjaga)/mattmdjaga_segformer_b2_clothes.json @@ -0,0 +1,111 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:efaab1cf-c09e-495a-9d76-54604e1e8c37", + "version": 1, + "metadata": { + "timestamp": "2025-06-05T09:34:49.062306+00:00", + "component": { + "type": "machine-learning-model", + "bom-ref": "mattmdjaga/segformer_b2_clothes-ea4ad23a-7fca-57ca-9b50-5cbca9a21441", + "name": "mattmdjaga/segformer_b2_clothes", + "externalReferences": [ + { + "url": "https://huggingface.co/mattmdjaga/segformer_b2_clothes", + "type": "documentation" + } + ], + "modelCard": { + "modelParameters": { + "task": "image-segmentation", + "architectureFamily": "segformer", + "modelArchitecture": "SegformerForSemanticSegmentation", + "datasets": [ + { + "ref": "mattmdjaga/human_parsing_dataset-2f113f97-be86-5ece-a359-cb1d9aa1cb78" + } + ] + }, + "properties": [ + { + "name": "library_name", + "value": "transformers" + } + ] + }, + "authors": [ + { + "name": "mattmdjaga" + } + ], + "licenses": [ + { + "license": { + "id": "MIT", + "url": "https://spdx.org/licenses/MIT.html" + } + } + ], + "tags": [ + "transformers", + "pytorch", + "onnx", + "safetensors", + "segformer", + "vision", + "image-segmentation", + "dataset:mattmdjaga/human_parsing_dataset", + "arxiv:2105.15203", + "license:mit", + "endpoints_compatible", + "region:us" + ] + } + }, + "components": [ + { + "type": "data", + "bom-ref": "mattmdjaga/human_parsing_dataset-2f113f97-be86-5ece-a359-cb1d9aa1cb78", + "name": "mattmdjaga/human_parsing_dataset", + "data": [ + { + "type": "dataset", + "bom-ref": "mattmdjaga/human_parsing_dataset-2f113f97-be86-5ece-a359-cb1d9aa1cb78", + "name": "mattmdjaga/human_parsing_dataset", + "contents": { + "url": "https://huggingface.co/datasets/mattmdjaga/human_parsing_dataset", + "properties": [ + { + "name": "task_categories", + "value": "image-segmentation" + }, + { + "name": "task_ids", + "value": "semantic-segmentation" + }, + { + "name": "size_categories", + "value": "10K