boopathiraj commited on Jan 24

Commit

9285cd7

verified ·

1 Parent(s): 8ac21af

Upload folder using huggingface_hub

Browse files

Files changed (48) hide show

.gitattributes +6 -0
MODNet/.gitignore +97 -0
MODNet/LICENSE +201 -0
MODNet/README.md +129 -0
MODNet/__pycache__/modnet.cpython-312.pyc +0 -0
MODNet/demo/image_matting/colab/README.md +2 -0
MODNet/demo/image_matting/colab/__pycache__/inference.cpython-312.pyc +0 -0
MODNet/demo/image_matting/colab/inference.py +105 -0
MODNet/demo/image_matting/colab/input/portrait.jpg +3 -0
MODNet/demo/video_matting/custom/README.md +50 -0
MODNet/demo/video_matting/custom/requirements.txt +6 -0
MODNet/demo/video_matting/custom/run.py +114 -0
MODNet/demo/video_matting/webcam/README.md +52 -0
MODNet/demo/video_matting/webcam/requirements.txt +5 -0
MODNet/demo/video_matting/webcam/run.py +67 -0
MODNet/doc/gif/commercial_image_matting_model_result.gif +3 -0
MODNet/doc/gif/commercial_image_matting_website.gif +3 -0
MODNet/doc/gif/homepage_demo.gif +3 -0
MODNet/doc/gif/image_matting_demo.gif +3 -0
MODNet/doc/gif/video_matting_demo.gif +3 -0
MODNet/matte.zip +3 -0
MODNet/modnet.py +255 -0
MODNet/onnx/README.md +30 -0
MODNet/onnx/__init__.py +0 -0
MODNet/onnx/export_onnx.py +55 -0
MODNet/onnx/inference_onnx.py +104 -0
MODNet/onnx/modnet_onnx.py +252 -0
MODNet/onnx/requirements.txt +4 -0
MODNet/pretrained/README.md +2 -0
MODNet/pretrained/modnet_photographic_portrait_matting.ckpt +3 -0
MODNet/src/__init__.py +0 -0
MODNet/src/__pycache__/__init__.cpython-312.pyc +0 -0
MODNet/src/models/__init__.py +0 -0
MODNet/src/models/__pycache__/__init__.cpython-312.pyc +0 -0
MODNet/src/models/__pycache__/modnet.cpython-312.pyc +0 -0
MODNet/src/models/backbones/__init__.py +10 -0
MODNet/src/models/backbones/__pycache__/__init__.cpython-312.pyc +0 -0
MODNet/src/models/backbones/__pycache__/mobilenetv2.cpython-312.pyc +0 -0
MODNet/src/models/backbones/__pycache__/wrapper.cpython-312.pyc +0 -0
MODNet/src/models/backbones/mobilenetv2.py +199 -0
MODNet/src/models/backbones/wrapper.py +82 -0
MODNet/src/trainer.py +299 -0
MODNet/torchscript/README.md +18 -0
MODNet/torchscript/__init__.py +0 -0
MODNet/torchscript/export_torchscript.py +46 -0
MODNet/torchscript/modnet_torchscript.py +258 -0
config.json +6 -6
modeling_modnet.py +16 -16

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+MODNet/demo/image_matting/colab/input/portrait.jpg filter=lfs diff=lfs merge=lfs -text
+MODNet/doc/gif/commercial_image_matting_model_result.gif filter=lfs diff=lfs merge=lfs -text
+MODNet/doc/gif/commercial_image_matting_website.gif filter=lfs diff=lfs merge=lfs -text
+MODNet/doc/gif/homepage_demo.gif filter=lfs diff=lfs merge=lfs -text
+MODNet/doc/gif/image_matting_demo.gif filter=lfs diff=lfs merge=lfs -text
+MODNet/doc/gif/video_matting_demo.gif filter=lfs diff=lfs merge=lfs -text

MODNet/.gitignore ADDED Viewed

	@@ -0,0 +1,97 @@

+# Temporary directories and files
+*.ckpt
+*.onnx
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+# virtualenv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+# Rope project settings
+.ropeproject
+# Project files
+.vscode

MODNet/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MODNet/README.md ADDED Viewed

	@@ -0,0 +1,129 @@

+<h2 align="center">MODNet: Trimap-Free Portrait Matting in Real Time</h2>
+<div align="center"><i>MODNet: Real-Time Trimap-Free Portrait Matting via Objective Decomposition (AAAI 2022)</i></div>
+<br />
+<img src="doc/gif/homepage_demo.gif" width="100%">
+<div align="center">MODNet is a model for <b>real-time</b> portrait matting with <b>only RGB image input</b></div>
+<div align="center">MODNet是一个<b>仅需RGB图片输入</b>的<b>实时</b>人像抠图模型</div>
+<br />
+<p align="center">
+  <a href="#online-application-在线应用">Online Application (在线应用)</a> |
+  <a href="#research-demo">Research Demo</a> |
+  <a href="https://arxiv.org/pdf/2011.11961.pdf">AAAI 2022 Paper</a> |
+  <a href="https://youtu.be/PqJ3BRHX3Lc">Supplementary Video</a>
+</p>
+<p align="center">
+  <a href="#community">Community</a> |
+  <a href="#code">Code</a> |
+  <a href="#ppm-benchmark">PPM Benchmark</a> |
+  <a href="#license">License</a> |
+  <a href="#acknowledgement">Acknowledgement</a> |
+  <a href="#citation">Citation</a> |
+  <a href="#contact">Contact</a>
+</p>
+---
+## Online Application (在线应用)
+The model used in the online demo (unpublished) is only **7M**! Process **2K** resolution image with a **Fast** speed on common PCs or Mobiles! **Beter** than research demos!
+Please try online portrait image matting on [my personal homepage](https://zhke.io/#/?modnet_demo) for fun!
+在线应用中使用的模型（未发布）大小仅为**7M**！可以在普通PC或移动设备上**快速**处理具有**2K**分辨率的图像！效果比研究示例**更好**！
+请通过[我的主页](https://zhke.io/#/?modnet_demo)在线尝试图片抠像！
+## Research Demo
+All the models behind the following demos are trained on the datasets mentioned in [our paper](https://arxiv.org/pdf/2011.11961.pdf).
+### Portrait Image Matting
+We provide an [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing) for portrait image matting.
+It allows you to upload portrait images and predict/visualize/download the alpha mattes.
+<!-- <img src="doc/gif/image_matting_demo.gif" width='40%'> -->
+### Portrait Video Matting
+We provide two real-time portrait video matting demos based on WebCam. When using the demo, you can move the WebCam around at will.
+If you have an Ubuntu system, we recommend you to try the [offline demo](demo/video_matting/webcam) to get a higher *fps*. Otherwise, you can access the [online Colab demo](https://colab.research.google.com/drive/1Pt3KDSc2q7WxFvekCnCLD8P0gBEbxm6J?usp=sharing).
+We also provide an [offline demo](demo/video_matting/custom) that allows you to process custom videos.
+<!-- <img src="doc/gif/video_matting_demo.gif" width='60%'> -->
+## Community
+We share some cool applications/extentions of MODNet built by the community.
+<!-- - **WebGUI for Portrait Image Matting** -->
+<!-- You can try [this WebGUI](https://www.gradio.app/hub/aliabd/modnet) (hosted on [Gradio](https://www.gradio.app/)) for portrait image matting from your browser without code! -->
+- **Colab Demo of Bokeh (Blur Background)**
+You can try [this Colab demo](https://colab.research.google.com/github/eyaler/avatars4all/blob/master/yarok.ipynb) (built by [@eyaler](https://github.com/eyaler)) to blur the backgroud based on MODNet!
+- **ONNX Version of MODNet**
+You can convert the pre-trained MODNet to an ONNX model by using [this code](onnx) (provided by [@manthan3C273](https://github.com/manthan3C273)). You can also try [this Colab demo](https://colab.research.google.com/drive/1P3cWtg8fnmu9karZHYDAtmm1vj1rgA-f?usp=sharing) for MODNet image matting (ONNX version).
+- **TorchScript Version of MODNet**
+You can convert the pre-trained MODNet to an TorchScript model by using [this code](torchscript) (provided by [@yarkable](https://github.com/yarkable)).
+- **TensorRT Version of MODNet**
+You can access [this Github repository](https://github.com/jkjung-avt/tensorrt_demos) to try the TensorRT version of MODNet (provided by [@jkjung-avt](https://github.com/jkjung-avt)).
+- **Docker Container for MODnet**
+You can access [this Github repository](https://github.com/nahidalam/modnet_docker) for a containerized version of MODNet with the Docker environment (provided by [@nahidalam](https://github.com/nahidalam)).
+There are some resources about MODNet from the community.
+- [Video from What's AI YouTube Channel](https://youtu.be/rUo0wuVyefU)
+- [Article from Louis Bouchard's Blog](https://www.louisbouchard.ai/remove-background/)
+## Code
+We provide the [code](src/trainer.py) of MODNet training iteration, including:
+- **Supervised Training**: Train MODNet on a labeled matting dataset
+- **SOC Adaptation**: Adapt a trained MODNet to an unlabeled dataset
+In code comments, we provide examples for using the functions.
+## PPM Benchmark
+The PPM benchmark is released in a separate repository [PPM](https://github.com/ZHKKKe/PPM).
+## License
+The code, models, and demos in this repository (excluding GIF files under the folder `doc/gif`) are released under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) license.
+## Acknowledgement
+- We thank
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[@yzhou0919](https://github.com/yzhou0919), [@eyaler](https://github.com/eyaler), [@manthan3C273](https://github.com/manthan3C273),  [@yarkable](https://github.com/yarkable), [@jkjung-avt](https://github.com/jkjung-avt),  [@manzke](https://github.com/manzke),  [@nahidalam](https://github.com/nahidalam),
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[the Gradio team](https://github.com/gradio-app/gradio), [What's AI YouTube Channel](https://www.youtube.com/channel/UCUzGQrN-lyyc0BWTYoJM_Sg), [Louis Bouchard's Blog](https://www.louisbouchard.ai),
+for their contributions to this repository or their cool applications/extentions/resources of MODNet.
+## Citation
+If this work helps your research, please consider to cite:
+```bibtex
+@InProceedings{MODNet,
+  author = {Zhanghan Ke and Jiayu Sun and Kaican Li and Qiong Yan and Rynson W.H. Lau},
+  title = {MODNet: Real-Time Trimap-Free Portrait Matting via Objective Decomposition},
+  booktitle = {AAAI},
+  year = {2022},
+}
+```
+## Contact
+This repository is maintained by Zhanghan Ke ([@ZHKKKe](https://github.com/ZHKKKe)).
+For questions, please contact `kezhanghan@outlook.com`.
+<!-- <img src="doc/gif/commercial_image_matting_model_result.gif" width='100%'> -->

MODNet/__pycache__/modnet.cpython-312.pyc ADDED Viewed

Binary file (14.2 kB). View file

MODNet/demo/image_matting/colab/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ## MODNet - Portrait Image Matting Demo
2	+ Please try MODNet portrait image matting demo through our [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing).

MODNet/demo/image_matting/colab/__pycache__/inference.cpython-312.pyc ADDED Viewed

Binary file (4.85 kB). View file

MODNet/demo/image_matting/colab/inference.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import sys
+import argparse
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from src.models.modnet import MODNet
+if __name__ == '__main__':
+    # define cmd arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input-path', type=str, help='path of input images')
+    parser.add_argument('--output-path', type=str, help='path of output images')
+    parser.add_argument('--ckpt-path', type=str, help='path of pre-trained MODNet')
+    args = parser.parse_args()
+    # check input arguments
+    if not os.path.exists(args.input_path):
+        print('Cannot find input path: {0}'.format(args.input_path))
+        exit()
+    if not os.path.exists(args.output_path):
+        print('Cannot find output path: {0}'.format(args.output_path))
+        exit()
+    if not os.path.exists(args.ckpt_path):
+        print('Cannot find ckpt path: {0}'.format(args.ckpt_path))
+        exit()
+    # define hyper-parameters
+    ref_size = 512
+    # define image to tensor transform
+    im_transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        ]
+    )
+    # create MODNet and load the pre-trained ckpt
+    modnet = MODNet(backbone_pretrained=False)
+    modnet = nn.DataParallel(modnet)
+    if torch.cuda.is_available():
+        modnet = modnet.cuda()
+        weights = torch.load(args.ckpt_path)
+    else:
+        weights = torch.load(args.ckpt_path, map_location=torch.device('cpu'))
+    modnet.load_state_dict(weights)
+    modnet.eval()
+    # inference images
+    im_names = os.listdir(args.input_path)
+    for im_name in im_names:
+        print('Process image: {0}'.format(im_name))
+        # read image
+        im = Image.open(os.path.join(args.input_path, im_name))
+        # unify image channels to 3
+        im = np.asarray(im)
+        if len(im.shape) == 2:
+            im = im[:, :, None]
+        if im.shape[2] == 1:
+            im = np.repeat(im, 3, axis=2)
+        elif im.shape[2] == 4:
+            im = im[:, :, 0:3]
+        # convert image to PyTorch tensor
+        im = Image.fromarray(im)
+        im = im_transform(im)
+        # add mini-batch dim
+        im = im[None, :, :, :]
+        # resize image for input
+        im_b, im_c, im_h, im_w = im.shape
+        if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
+            if im_w >= im_h:
+                im_rh = ref_size
+                im_rw = int(im_w / im_h * ref_size)
+            elif im_w < im_h:
+                im_rw = ref_size
+                im_rh = int(im_h / im_w * ref_size)
+        else:
+            im_rh = im_h
+            im_rw = im_w
+        im_rw = im_rw - im_rw % 32
+        im_rh = im_rh - im_rh % 32
+        im = F.interpolate(im, size=(im_rh, im_rw), mode='area')
+        # inference
+        _, _, matte = modnet(im.cuda() if torch.cuda.is_available() else im, True)
+        # resize and save matte
+        matte = F.interpolate(matte, size=(im_h, im_w), mode='area')
+        matte = matte[0][0].data.cpu().numpy()
+        matte_name = im_name.split('.')[0] + '.png'
+        Image.fromarray(((matte * 255).astype('uint8')), mode='L').save(os.path.join(args.output_path, matte_name))

MODNet/demo/image_matting/colab/input/portrait.jpg ADDED Viewed

Git LFS Details

SHA256: 549c142d020fd62c141e70064eccb64f59e8ce8eba5c8ce85bb9cefd8d91fff9
Pointer size: 132 Bytes
Size of remote file: 2.57 MB

MODNet/demo/video_matting/custom/README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+## MODNet - Custom Portrait Video Matting Demo
+This is a MODNet portrait video matting demo that allows you to process custom videos.
+### 1. Requirements
+The basic requirements for this demo are:
+- Ubuntu System
+- Python 3+
+### 2. Introduction
+We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick.
+For a better experience, please make sure your videos satisfy:
+*   the portrait and background are distinguishable, <i>i.e.</i>, are not similar
+*   captured in soft and bright ambient lighting
+*   the contents do not move too fast
+### 3. Run Demo
+We recommend creating a new conda virtual environment to run this demo, as follow:
+1. Clone the MODNet repository:
+    ```
+    git clone https://github.com/ZHKKKe/MODNet.git
+    cd MODNet
+    ```
+2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`.
+3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example:
+     ```
+    conda create -n modnet python=3.6
+    source activate modnet
+    ```
+4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed):
+    ```
+    pip install -r demo/video_matting/custom/requirements.txt
+    ```
+5. Execute the main code:
+    ```
+    python -m demo.video_matting.custom.run --video YOUR_VIDEO_PATH
+    ```
+    where `YOUR_VIDEO_PATH` is the specific path of your video.
+    There are some optional arguments:
+     - `--result-type (default=fg)` : fg - save the alpha matte; fg - save the foreground
+     - `--fps (default=30)` : fps of the result video

MODNet/demo/video_matting/custom/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy
+Pillow
+opencv-python
+torch >= 1.0.0
+torchvision
+tqdm

MODNet/demo/video_matting/custom/run.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+import cv2
+import argparse
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from src.models.modnet import MODNet
+torch_transforms = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+    ]
+)
+def matting(video, result, alpha_matte=False, fps=30):
+    # video capture
+    vc = cv2.VideoCapture(video)
+    if vc.isOpened():
+        rval, frame = vc.read()
+    else:
+        rval = False
+    if not rval:
+        print('Failed to read the video: {0}'.format(video))
+        exit()
+    num_frame = vc.get(cv2.CAP_PROP_FRAME_COUNT)
+    h, w = frame.shape[:2]
+    if w >= h:
+        rh = 512
+        rw = int(w / h * 512)
+    else:
+        rw = 512
+        rh = int(h / w * 512)
+    rh = rh - rh % 32
+    rw = rw - rw % 32
+    # video writer
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    video_writer = cv2.VideoWriter(result, fourcc, fps, (w, h))
+    print('Start matting...')
+    with tqdm(range(int(num_frame)))as t:
+        for c in t:
+            frame_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frame_np = cv2.resize(frame_np, (rw, rh), cv2.INTER_AREA)
+            frame_PIL = Image.fromarray(frame_np)
+            frame_tensor = torch_transforms(frame_PIL)
+            frame_tensor = frame_tensor[None, :, :, :]
+            if GPU:
+                frame_tensor = frame_tensor.cuda()
+            with torch.no_grad():
+                _, _, matte_tensor = modnet(frame_tensor, True)
+            matte_tensor = matte_tensor.repeat(1, 3, 1, 1)
+            matte_np = matte_tensor[0].data.cpu().numpy().transpose(1, 2, 0)
+            if alpha_matte:
+                view_np = matte_np * np.full(frame_np.shape, 255.0)
+            else:
+                view_np = matte_np * frame_np + (1 - matte_np) * np.full(frame_np.shape, 255.0)
+            view_np = cv2.cvtColor(view_np.astype(np.uint8), cv2.COLOR_RGB2BGR)
+            view_np = cv2.resize(view_np, (w, h))
+            video_writer.write(view_np)
+            rval, frame = vc.read()
+            c += 1
+    video_writer.release()
+    print('Save the result video to {0}'.format(result))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--video', type=str, required=True, help='input video file')
+    parser.add_argument('--result-type', type=str, default='fg', choices=['fg', 'matte'],
+                        help='matte - save the alpha matte; fg - save the foreground')
+    parser.add_argument('--fps', type=int, default=30, help='fps of the result video')
+    print('Get CMD Arguments...')
+    args = parser.parse_args()
+    if not os.path.exists(args.video):
+        print('Cannot find the input video: {0}'.format(args.video))
+        exit()
+    print('Load pre-trained MODNet...')
+    pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
+    modnet = MODNet(backbone_pretrained=False)
+    modnet = nn.DataParallel(modnet)
+    GPU = True if torch.cuda.device_count() > 0 else False
+    if GPU:
+        print('Use GPU...')
+        modnet = modnet.cuda()
+        modnet.load_state_dict(torch.load(pretrained_ckpt))
+    else:
+        print('Use CPU...')
+        modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
+    modnet.eval()
+    result = os.path.splitext(args.video)[0] + '_{0}.mp4'.format(args.result_type)
+    alpha_matte = True if args.result_type == 'matte' else False
+    matting(args.video, result, alpha_matte, args.fps)

MODNet/demo/video_matting/webcam/README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+## MODNet - WebCam-Based Portrait Video Matting Demo
+This is a MODNet portrait video matting demo based on WebCam. It will call your local WebCam and display the matting results in real time. The demo can run under CPU or GPU.
+### 1. Requirements
+The basic requirements for this demo are:
+- Ubuntu System
+- WebCam
+- Python 3+
+**NOTE**: If your device does not satisfy the above conditions, please try our [online Colab demo](https://colab.research.google.com/drive/1Pt3KDSc2q7WxFvekCnCLD8P0gBEbxm6J?usp=sharing).
+### 2. Introduction
+We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick, which will be provided soon.
+For a better experience, please:
+*   make sure the portrait and background are distinguishable, <i>i.e.</i>, are not similar
+*   run in soft and bright ambient lighting
+*   do not be too close or too far from the WebCam
+*   do not move too fast
+### 3. Run Demo
+We recommend creating a new conda virtual environment to run this demo, as follow:
+1. Clone the MODNet repository:
+    ```
+    git clone https://github.com/ZHKKKe/MODNet.git
+    cd MODNet
+    ```
+2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`.
+3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example:
+     ```
+    conda create -n modnet python=3.6
+    source activate modnet
+    ```
+4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed):
+    ```
+    pip install -r demo/video_matting/webcam/requirements.txt
+    ```
+5. Execute the main code:
+    ```
+    python -m demo.video_matting.webcam.run
+    ```
+### 4. Acknowledgement
+We thank [@tkianai](https://github.com/tkianai) and [@mazhar004](https://github.com/mazhar004) for their contributions to making this demo available for CPU use.

MODNet/demo/video_matting/webcam/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+Pillow
+opencv-python
+torch >= 1.0.0
+torchvision

MODNet/demo/video_matting/webcam/run.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import cv2
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from src.models.modnet import MODNet
+torch_transforms = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+    ]
+)
+print('Load pre-trained MODNet...')
+pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
+modnet = MODNet(backbone_pretrained=False)
+modnet = nn.DataParallel(modnet)
+GPU = True if torch.cuda.device_count() > 0 else False
+if GPU:
+    print('Use GPU...')
+    modnet = modnet.cuda()
+    modnet.load_state_dict(torch.load(pretrained_ckpt))
+else:
+    print('Use CPU...')
+    modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
+modnet.eval()
+print('Init WebCam...')
+cap = cv2.VideoCapture(0)
+cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+print('Start matting...')
+while(True):
+    _, frame_np = cap.read()
+    frame_np = cv2.cvtColor(frame_np, cv2.COLOR_BGR2RGB)
+    frame_np = cv2.resize(frame_np, (910, 512), cv2.INTER_AREA)
+    frame_np = frame_np[:, 120:792, :]
+    frame_np = cv2.flip(frame_np, 1)
+    frame_PIL = Image.fromarray(frame_np)
+    frame_tensor = torch_transforms(frame_PIL)
+    frame_tensor = frame_tensor[None, :, :, :]
+    if GPU:
+        frame_tensor = frame_tensor.cuda()
+    with torch.no_grad():
+        _, _, matte_tensor = modnet(frame_tensor, True)
+    matte_tensor = matte_tensor.repeat(1, 3, 1, 1)
+    matte_np = matte_tensor[0].data.cpu().numpy().transpose(1, 2, 0)
+    fg_np = matte_np * frame_np + (1 - matte_np) * np.full(frame_np.shape, 255.0)
+    view_np = np.uint8(np.concatenate((frame_np, fg_np), axis=1))
+    view_np = cv2.cvtColor(view_np, cv2.COLOR_RGB2BGR)
+    cv2.imshow('MODNet - WebCam [Press \'Q\' To Exit]', view_np)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+print('Exit...')

MODNet/doc/gif/commercial_image_matting_model_result.gif ADDED Viewed

Git LFS Details

SHA256: b0193f1e6e70c6324812ee349de7fb6d283381c820ea2685da78667266cc6a35
Pointer size: 133 Bytes
Size of remote file: 11.4 MB

MODNet/doc/gif/commercial_image_matting_website.gif ADDED Viewed

Git LFS Details

SHA256: 4ee4dbeee80d4720f3396370389560748b43604675a01aa8781aa741b7b8e649
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

MODNet/doc/gif/homepage_demo.gif ADDED Viewed

Git LFS Details

SHA256: a18a7bf0fcc50d2ce8fe4e1f1801c714b9cf5a4561897de4760f9ba655400d34
Pointer size: 133 Bytes
Size of remote file: 23.4 MB

MODNet/doc/gif/image_matting_demo.gif ADDED Viewed

Git LFS Details

SHA256: c727629197ab654f9fff02745cc2b64f68fc07202a28de87a723bb15d88f5dbe
Pointer size: 132 Bytes
Size of remote file: 9.68 MB

MODNet/doc/gif/video_matting_demo.gif ADDED Viewed

Git LFS Details

SHA256: e622a2dfa267d0386b258094259dbc1838ca31765dbdefd568b4782e384f347a
Pointer size: 132 Bytes
Size of remote file: 9.02 MB

MODNet/matte.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc9e2e4039d37c912f10c881f6f04f2824153fe80bf333b5c90c84e257cc153c
+size 139057

MODNet/modnet.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .src.models.backbones import SUPPORTED_BACKBONES
+#------------------------------------------------------------------------------
+#  MODNet Basic Modules
+#------------------------------------------------------------------------------
+class IBNorm(nn.Module):
+    """ Combine Instance Norm and Batch Norm into One Layer
+    """
+    def __init__(self, in_channels):
+        super(IBNorm, self).__init__()
+        in_channels = in_channels
+        self.bnorm_channels = int(in_channels / 2)
+        self.inorm_channels = in_channels - self.bnorm_channels
+        self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
+        self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
+    def forward(self, x):
+        bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
+        in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
+        return torch.cat((bn_x, in_x), 1)
+class Conv2dIBNormRelu(nn.Module):
+    """ Convolution + IBNorm + ReLu
+    """
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride=1, padding=0, dilation=1, groups=1, bias=True,
+                 with_ibn=True, with_relu=True):
+        super(Conv2dIBNormRelu, self).__init__()
+        layers = [
+            nn.Conv2d(in_channels, out_channels, kernel_size,
+                      stride=stride, padding=padding, dilation=dilation,
+                      groups=groups, bias=bias)
+        ]
+        if with_ibn:
+            layers.append(IBNorm(out_channels))
+        if with_relu:
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.layers(x)
+class SEBlock(nn.Module):
+    """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
+    """
+    def __init__(self, in_channels, out_channels, reduction=1):
+        super(SEBlock, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, int(in_channels // reduction), bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(int(in_channels // reduction), out_channels, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        w = self.pool(x).view(b, c)
+        w = self.fc(w).view(b, c, 1, 1)
+        return x * w.expand_as(x)
+#------------------------------------------------------------------------------
+#  MODNet Branches
+#------------------------------------------------------------------------------
+class LRBranch(nn.Module):
+    """ Low Resolution Branch of MODNet
+    """
+    def __init__(self, backbone):
+        super(LRBranch, self).__init__()
+        enc_channels = backbone.enc_channels
+        self.backbone = backbone
+        self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
+        self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
+        self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
+    def forward(self, img, inference):
+        enc_features = self.backbone.forward(img)
+        enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
+        enc32x = self.se_block(enc32x)
+        lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr16x = self.conv_lr16x(lr16x)
+        lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr8x = self.conv_lr8x(lr8x)
+        pred_semantic = None
+        if not inference:
+            lr = self.conv_lr(lr8x)
+            pred_semantic = torch.sigmoid(lr)
+        return pred_semantic, lr8x, [enc2x, enc4x]
+class HRBranch(nn.Module):
+    """ High Resolution Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(HRBranch, self).__init__()
+        self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
+        self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
+        self.conv_hr4x = nn.Sequential(
+            Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr2x = nn.Sequential(
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, enc2x, enc4x, lr8x, inference):
+        img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
+        img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
+        enc2x = self.tohr_enc2x(enc2x)
+        hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
+        enc4x = self.tohr_enc4x(enc4x)
+        hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
+        hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
+        pred_detail = None
+        if not inference:
+            hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
+            hr = self.conv_hr(torch.cat((hr, img), dim=1))
+            pred_detail = torch.sigmoid(hr)
+        return pred_detail, hr2x
+class FusionBranch(nn.Module):
+    """ Fusion Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(FusionBranch, self).__init__()
+        self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
+        self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
+        self.conv_f = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, lr8x, hr2x):
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr4x = self.conv_lr4x(lr4x)
+        lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
+        f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
+        f = self.conv_f(torch.cat((f, img), dim=1))
+        pred_matte = torch.sigmoid(f)
+        return pred_matte
+#------------------------------------------------------------------------------
+#  MODNet
+#------------------------------------------------------------------------------
+class MODNet(nn.Module):
+    """ Architecture of MODNet
+    """
+    def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
+        super(MODNet, self).__init__()
+        self.in_channels = in_channels
+        self.hr_channels = hr_channels
+        self.backbone_arch = backbone_arch
+        self.backbone_pretrained = backbone_pretrained
+        self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
+        self.lr_branch = LRBranch(self.backbone)
+        self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
+        self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                self._init_conv(m)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
+                self._init_norm(m)
+        if self.backbone_pretrained:
+            self.backbone.load_pretrained_ckpt()
+    def forward(self, img, inference):
+        pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
+        pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
+        pred_matte = self.f_branch(img, lr8x, hr2x)
+        return pred_semantic, pred_detail, pred_matte
+    def freeze_norm(self):
+        norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
+        for m in self.modules():
+            for n in norm_types:
+                if isinstance(m, n):
+                    m.eval()
+                    continue
+    def _init_conv(self, conv):
+        nn.init.kaiming_uniform_(
+            conv.weight, a=0, mode='fan_in', nonlinearity='relu')
+        if conv.bias is not None:
+            nn.init.constant_(conv.bias, 0)
+    def _init_norm(self, norm):
+        if norm.weight is not None:
+            nn.init.constant_(norm.weight, 1)
+            nn.init.constant_(norm.bias, 0)

MODNet/onnx/README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+## MODNet - ONNX Model
+This ONNX version of MODNet is provided by [@manthan3C273](https://github.com/manthan3C273) from the community.
+Please note that the PyTorch version required for this ONNX export function is higher than the official MODNet code (torch==1.7.1 is recommended).
+You can try **MODNet - Image Matting Demo (ONNX version)** in [this Colab](https://colab.research.google.com/drive/1P3cWtg8fnmu9karZHYDAtmm1vj1rgA-f?usp=sharing).
+You can also download the ONNX version of the official **Image Matting Model** from [this link](https://drive.google.com/file/d/1cgycTQlYXpTh26gB9FTnthE7AvruV8hd/view?usp=sharing).
+To export the ONNX version of MODNet (assuming you are currently in project root directory):
+1. Download the pre-trained **Image Matting Model** from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing) and put the model into the folder `MODNet/pretrained/`.
+2. Install all dependencies by:
+    ```
+    pip install -r onnx/requirements.txt
+    ```
+3. Export the ONNX version of MODNet by:
+    ```shell
+    python -m onnx.export_onnx \
+        --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
+        --output-path=pretrained/modnet_photographic_portrait_matting.onnx
+    ```
+4. Inference the ONNX model by:
+    ```shell
+    python -m onnx.inference_onnx \
+        --image-path=$FILENAME_OF_INPUT_IMAGE$ \
+        --output-path=$FILENAME_OF_OUTPUT_MATTE$ \
+        --model-path=pretrained/modnet_photographic_portrait_matting.onnx
+    ```

MODNet/onnx/__init__.py ADDED Viewed

File without changes

MODNet/onnx/export_onnx.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+Export ONNX model of MODNet with:
+    input shape: (batch_size, 3, height, width)
+    output shape: (batch_size, 1, height, width)
+Arguments:
+    --ckpt-path: path of the checkpoint that will be converted
+    --output-path: path for saving the ONNX model
+Example:
+    python export_onnx.py \
+        --ckpt-path=modnet_photographic_portrait_matting.ckpt \
+        --output-path=modnet_photographic_portrait_matting.onnx
+"""
+import os
+import argparse
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from . import modnet_onnx
+if __name__ == '__main__':
+    # define cmd arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ckpt-path', type=str, required=True, help='path of the checkpoint that will be converted')
+    parser.add_argument('--output-path', type=str, required=True, help='path for saving the ONNX model')
+    args = parser.parse_args()
+    # check input arguments
+    if not os.path.exists(args.ckpt_path):
+        print('Cannot find checkpoint path: {0}'.format(args.ckpt_path))
+        exit()
+    # define model & load checkpoint
+    modnet = modnet_onnx.MODNet(backbone_pretrained=False)
+    modnet = nn.DataParallel(modnet).cuda()
+    state_dict = torch.load(args.ckpt_path)
+    modnet.load_state_dict(state_dict)
+    modnet.eval()
+    # prepare dummy_input
+    batch_size = 1
+    height = 512
+    width = 512
+    dummy_input = Variable(torch.randn(batch_size, 3, height, width)).cuda()
+    # export to onnx model
+    torch.onnx.export(
+        modnet.module, dummy_input, args.output_path, export_params = True,
+        input_names = ['input'], output_names = ['output'],
+        dynamic_axes = {'input': {0:'batch_size', 2:'height', 3:'width'}, 'output': {0: 'batch_size', 2: 'height', 3: 'width'}})

MODNet/onnx/inference_onnx.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+Inference ONNX model of MODNet
+Arguments:
+    --image-path: path of the input image (a file)
+    --output-path: path for saving the predicted alpha matte (a file)
+    --model-path: path of the ONNX model
+Example:
+python inference_onnx.py \
+    --image-path=demo.jpg --output-path=matte.png --model-path=modnet.onnx
+"""
+import os
+import cv2
+import argparse
+import numpy as np
+from PIL import Image
+import onnx
+import onnxruntime
+if __name__ == '__main__':
+    # define cmd arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image-path', type=str, help='path of the input image (a file)')
+    parser.add_argument('--output-path', type=str, help='paht for saving the predicted alpha matte (a file)')
+    parser.add_argument('--model-path', type=str, help='path of the ONNX model')
+    args = parser.parse_args()
+    # check input arguments
+    if not os.path.exists(args.image_path):
+        print('Cannot find the input image: {0}'.format(args.image_path))
+        exit()
+    if not os.path.exists(args.model_path):
+        print('Cannot find the ONXX model: {0}'.format(args.model_path))
+        exit()
+    ref_size = 512
+    # Get x_scale_factor & y_scale_factor to resize image
+    def get_scale_factor(im_h, im_w, ref_size):
+        if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
+            if im_w >= im_h:
+                im_rh = ref_size
+                im_rw = int(im_w / im_h * ref_size)
+            elif im_w < im_h:
+                im_rw = ref_size
+                im_rh = int(im_h / im_w * ref_size)
+        else:
+            im_rh = im_h
+            im_rw = im_w
+        im_rw = im_rw - im_rw % 32
+        im_rh = im_rh - im_rh % 32
+        x_scale_factor = im_rw / im_w
+        y_scale_factor = im_rh / im_h
+        return x_scale_factor, y_scale_factor
+    ##############################################
+    #  Main Inference part
+    ##############################################
+    # read image
+    im = cv2.imread(args.image_path)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    # unify image channels to 3
+    if len(im.shape) == 2:
+        im = im[:, :, None]
+    if im.shape[2] == 1:
+        im = np.repeat(im, 3, axis=2)
+    elif im.shape[2] == 4:
+        im = im[:, :, 0:3]
+    # normalize values to scale it between -1 to 1
+    im = (im - 127.5) / 127.5
+    im_h, im_w, im_c = im.shape
+    x, y = get_scale_factor(im_h, im_w, ref_size)
+    # resize image
+    im = cv2.resize(im, None, fx = x, fy = y, interpolation = cv2.INTER_AREA)
+    # prepare input shape
+    im = np.transpose(im)
+    im = np.swapaxes(im, 1, 2)
+    im = np.expand_dims(im, axis = 0).astype('float32')
+    # Initialize session and get prediction
+    session = onnxruntime.InferenceSession(args.model_path, None)
+    input_name = session.get_inputs()[0].name
+    output_name = session.get_outputs()[0].name
+    result = session.run([output_name], {input_name: im})
+    # refine matte
+    matte = (np.squeeze(result[0]) * 255).astype('uint8')
+    matte = cv2.resize(matte, dsize=(im_w, im_h), interpolation = cv2.INTER_AREA)
+    cv2.imwrite(args.output_path, matte)

MODNet/onnx/modnet_onnx.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+This file contains a modified version of the original file `modnet.py` without
+`pred_semantic` and `pred_details` as these both returns None when `inference=True`
+And it does not contain `inference` argument which will make it easier to
+convert checkpoint to ONNX model.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from src.models.backbones import SUPPORTED_BACKBONES
+#------------------------------------------------------------------------------
+#  MODNet Basic Modules
+#------------------------------------------------------------------------------
+class IBNorm(nn.Module):
+    """ Combine Instance Norm and Batch Norm into One Layer
+    """
+    def __init__(self, in_channels):
+        super(IBNorm, self).__init__()
+        in_channels = in_channels
+        self.bnorm_channels = int(in_channels / 2)
+        self.inorm_channels = in_channels - self.bnorm_channels
+        self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
+        self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
+    def forward(self, x):
+        bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
+        in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
+        return torch.cat((bn_x, in_x), 1)
+class Conv2dIBNormRelu(nn.Module):
+    """ Convolution + IBNorm + ReLu
+    """
+    def __init__(self, in_channels, out_channels, kernel_size,
+                stride=1, padding=0, dilation=1, groups=1, bias=True,
+                with_ibn=True, with_relu=True):
+        super(Conv2dIBNormRelu, self).__init__()
+        layers = [
+            nn.Conv2d(in_channels, out_channels, kernel_size,
+                    stride=stride, padding=padding, dilation=dilation,
+                    groups=groups, bias=bias)
+        ]
+        if with_ibn:
+            layers.append(IBNorm(out_channels))
+        if with_relu:
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.layers(x)
+class SEBlock(nn.Module):
+    """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
+    """
+    def __init__(self, in_channels, out_channels, reduction=1):
+        super(SEBlock, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, int(in_channels // reduction), bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(int(in_channels // reduction), out_channels, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        w = self.pool(x).view(b, c)
+        w = self.fc(w).view(b, c, 1, 1)
+        return x * w.expand_as(x)
+#------------------------------------------------------------------------------
+#  MODNet Branches
+#------------------------------------------------------------------------------
+class LRBranch(nn.Module):
+    """ Low Resolution Branch of MODNet
+    """
+    def __init__(self, backbone):
+        super(LRBranch, self).__init__()
+        enc_channels = backbone.enc_channels
+        self.backbone = backbone
+        self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
+        self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
+        self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
+    def forward(self, img):
+        enc_features = self.backbone.forward(img)
+        enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
+        enc32x = self.se_block(enc32x)
+        lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr16x = self.conv_lr16x(lr16x)
+        lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr8x = self.conv_lr8x(lr8x)
+        return lr8x, [enc2x, enc4x]
+class HRBranch(nn.Module):
+    """ High Resolution Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(HRBranch, self).__init__()
+        self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
+        self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
+        self.conv_hr4x = nn.Sequential(
+            Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr2x = nn.Sequential(
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, enc2x, enc4x, lr8x):
+        img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
+        img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
+        enc2x = self.tohr_enc2x(enc2x)
+        hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
+        enc4x = self.tohr_enc4x(enc4x)
+        hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
+        hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
+        return hr2x
+class FusionBranch(nn.Module):
+    """ Fusion Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(FusionBranch, self).__init__()
+        self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
+        self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
+        self.conv_f = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, lr8x, hr2x):
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr4x = self.conv_lr4x(lr4x)
+        lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
+        f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
+        f = self.conv_f(torch.cat((f, img), dim=1))
+        pred_matte = torch.sigmoid(f)
+        return pred_matte
+#------------------------------------------------------------------------------
+#  MODNet
+#------------------------------------------------------------------------------
+class MODNet(nn.Module):
+    """ Architecture of MODNet
+    """
+    def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
+        super(MODNet, self).__init__()
+        self.in_channels = in_channels
+        self.hr_channels = hr_channels
+        self.backbone_arch = backbone_arch
+        self.backbone_pretrained = backbone_pretrained
+        self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
+        self.lr_branch = LRBranch(self.backbone)
+        self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
+        self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                self._init_conv(m)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
+                self._init_norm(m)
+        if self.backbone_pretrained:
+            self.backbone.load_pretrained_ckpt()
+    def forward(self, img):
+        lr8x, [enc2x, enc4x] = self.lr_branch(img)
+        hr2x = self.hr_branch(img, enc2x, enc4x, lr8x)
+        pred_matte = self.f_branch(img, lr8x, hr2x)
+        return pred_matte
+    def freeze_norm(self):
+        norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
+        for m in self.modules():
+            for n in norm_types:
+                if isinstance(m, n):
+                    m.eval()
+                    continue
+    def _init_conv(self, conv):
+        nn.init.kaiming_uniform_(
+            conv.weight, a=0, mode='fan_in', nonlinearity='relu')
+        if conv.bias is not None:
+            nn.init.constant_(conv.bias, 0)
+    def _init_norm(self, norm):
+        if norm.weight is not None:
+            nn.init.constant_(norm.weight, 1)
+            nn.init.constant_(norm.bias, 0)

MODNet/onnx/requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+onnx==1.8.1
+onnxruntime==1.6.0
+opencv-python==4.5.1.48
+torch==1.7.1

MODNet/pretrained/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ## MODNet - Pre-Trained Models
2	+ This folder is used to save the official pre-trained models of MODNet. You can download them from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing).

MODNet/pretrained/modnet_photographic_portrait_matting.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c22235f0925deba15d4d63e53afcb654c47055bbcd98f56e393ab2584007ed8
+size 26255603

MODNet/src/__init__.py ADDED Viewed

File without changes

MODNet/src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (128 Bytes). View file

MODNet/src/models/__init__.py ADDED Viewed

File without changes

MODNet/src/models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (135 Bytes). View file

MODNet/src/models/__pycache__/modnet.cpython-312.pyc ADDED Viewed

Binary file (14.2 kB). View file

MODNet/src/models/backbones/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .wrapper import *
+#------------------------------------------------------------------------------
+#  Replaceable Backbones
+#------------------------------------------------------------------------------
+SUPPORTED_BACKBONES = {
+    'mobilenetv2': MobileNetV2Backbone,
+}

MODNet/src/models/backbones/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (253 Bytes). View file

MODNet/src/models/backbones/__pycache__/mobilenetv2.cpython-312.pyc ADDED Viewed

Binary file (9.39 kB). View file

MODNet/src/models/backbones/__pycache__/wrapper.cpython-312.pyc ADDED Viewed

Binary file (4.46 kB). View file

MODNet/src/models/backbones/mobilenetv2.py ADDED Viewed

	@@ -0,0 +1,199 @@

+""" This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
+import math
+import json
+from functools import reduce
+import torch
+from torch import nn
+#------------------------------------------------------------------------------
+#  Useful functions
+#------------------------------------------------------------------------------
+def _make_divisible(v, divisor, min_value=None):
+	if min_value is None:
+		min_value = divisor
+	new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+	# Make sure that round down does not go down by more than 10%.
+	if new_v < 0.9 * v:
+		new_v += divisor
+	return new_v
+def conv_bn(inp, oup, stride):
+	return nn.Sequential(
+		nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+		nn.BatchNorm2d(oup),
+		nn.ReLU6(inplace=True)
+	)
+def conv_1x1_bn(inp, oup):
+	return nn.Sequential(
+		nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+		nn.BatchNorm2d(oup),
+		nn.ReLU6(inplace=True)
+	)
+#------------------------------------------------------------------------------
+#  Class of Inverted Residual block
+#------------------------------------------------------------------------------
+class InvertedResidual(nn.Module):
+	def __init__(self, inp, oup, stride, expansion, dilation=1):
+		super(InvertedResidual, self).__init__()
+		self.stride = stride
+		assert stride in [1, 2]
+		hidden_dim = round(inp * expansion)
+		self.use_res_connect = self.stride == 1 and inp == oup
+		if expansion == 1:
+			self.conv = nn.Sequential(
+				# dw
+				nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# pw-linear
+				nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(oup),
+			)
+		else:
+			self.conv = nn.Sequential(
+				# pw
+				nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# dw
+				nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# pw-linear
+				nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(oup),
+			)
+	def forward(self, x):
+		if self.use_res_connect:
+			return x + self.conv(x)
+		else:
+			return self.conv(x)
+#------------------------------------------------------------------------------
+#  Class of MobileNetV2
+#------------------------------------------------------------------------------
+class MobileNetV2(nn.Module):
+	def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
+		super(MobileNetV2, self).__init__()
+		self.in_channels = in_channels
+		self.num_classes = num_classes
+		input_channel = 32
+		last_channel = 1280
+		interverted_residual_setting = [
+			# t, c, n, s
+			[1        , 16, 1, 1],
+			[expansion, 24, 2, 2],
+			[expansion, 32, 3, 2],
+			[expansion, 64, 4, 2],
+			[expansion, 96, 3, 1],
+			[expansion, 160, 3, 2],
+			[expansion, 320, 1, 1],
+		]
+		# building first layer
+		input_channel = _make_divisible(input_channel*alpha, 8)
+		self.last_channel = _make_divisible(last_channel*alpha, 8) if alpha > 1.0 else last_channel
+		self.features = [conv_bn(self.in_channels, input_channel, 2)]
+		# building inverted residual blocks
+		for t, c, n, s in interverted_residual_setting:
+			output_channel = _make_divisible(int(c*alpha), 8)
+			for i in range(n):
+				if i == 0:
+					self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
+				else:
+					self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
+				input_channel = output_channel
+		# building last several layers
+		self.features.append(conv_1x1_bn(input_channel, self.last_channel))
+		# make it nn.Sequential
+		self.features = nn.Sequential(*self.features)
+		# building classifier
+		if self.num_classes is not None:
+			self.classifier = nn.Sequential(
+				nn.Dropout(0.2),
+				nn.Linear(self.last_channel, num_classes),
+			)
+		# Initialize weights
+		self._init_weights()
+	def forward(self, x):
+		# Stage1
+		x = self.features[0](x)
+		x = self.features[1](x)
+		# Stage2
+		x = self.features[2](x)
+		x = self.features[3](x)
+		# Stage3
+		x = self.features[4](x)
+		x = self.features[5](x)
+		x = self.features[6](x)
+		# Stage4
+		x = self.features[7](x)
+		x = self.features[8](x)
+		x = self.features[9](x)
+		x = self.features[10](x)
+		x = self.features[11](x)
+		x = self.features[12](x)
+		x = self.features[13](x)
+		# Stage5
+		x = self.features[14](x)
+		x = self.features[15](x)
+		x = self.features[16](x)
+		x = self.features[17](x)
+		x = self.features[18](x)
+		# Classification
+		if self.num_classes is not None:
+			x = x.mean(dim=(2,3))
+			x = self.classifier(x)
+		# Output
+		return x
+	def _load_pretrained_model(self, pretrained_file):
+		pretrain_dict = torch.load(pretrained_file, map_location='cpu')
+		model_dict = {}
+		state_dict = self.state_dict()
+		print("[MobileNetV2] Loading pretrained model...")
+		for k, v in pretrain_dict.items():
+			if k in state_dict:
+				model_dict[k] = v
+			else:
+				print(k, "is ignored")
+		state_dict.update(model_dict)
+		self.load_state_dict(state_dict)
+	def _init_weights(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+				m.weight.data.normal_(0, math.sqrt(2. / n))
+				if m.bias is not None:
+					m.bias.data.zero_()
+			elif isinstance(m, nn.BatchNorm2d):
+				m.weight.data.fill_(1)
+				m.bias.data.zero_()
+			elif isinstance(m, nn.Linear):
+				n = m.weight.size(1)
+				m.weight.data.normal_(0, 0.01)
+				m.bias.data.zero_()

MODNet/src/models/backbones/wrapper.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+from functools import reduce
+import torch
+import torch.nn as nn
+from .mobilenetv2 import MobileNetV2
+class BaseBackbone(nn.Module):
+    """ Superclass of Replaceable Backbone Model for Semantic Estimation
+    """
+    def __init__(self, in_channels):
+        super(BaseBackbone, self).__init__()
+        self.in_channels = in_channels
+        self.model = None
+        self.enc_channels = []
+    def forward(self, x):
+        raise NotImplementedError
+    def load_pretrained_ckpt(self):
+        raise NotImplementedError
+class MobileNetV2Backbone(BaseBackbone):
+    """ MobileNetV2 Backbone
+    """
+    def __init__(self, in_channels):
+        super(MobileNetV2Backbone, self).__init__(in_channels)
+        self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
+        self.enc_channels = [16, 24, 32, 96, 1280]
+    def forward(self, x):
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
+        x = self.model.features[0](x)
+        x = self.model.features[1](x)
+        enc2x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
+        x = self.model.features[2](x)
+        x = self.model.features[3](x)
+        enc4x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
+        x = self.model.features[4](x)
+        x = self.model.features[5](x)
+        x = self.model.features[6](x)
+        enc8x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
+        x = self.model.features[7](x)
+        x = self.model.features[8](x)
+        x = self.model.features[9](x)
+        x = self.model.features[10](x)
+        x = self.model.features[11](x)
+        x = self.model.features[12](x)
+        x = self.model.features[13](x)
+        enc16x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
+        x = self.model.features[14](x)
+        x = self.model.features[15](x)
+        x = self.model.features[16](x)
+        x = self.model.features[17](x)
+        x = self.model.features[18](x)
+        enc32x = x
+        return [enc2x, enc4x, enc8x, enc16x, enc32x]
+    def load_pretrained_ckpt(self):
+        # the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
+        ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
+        if not os.path.exists(ckpt_path):
+            print('cannot find the pretrained mobilenetv2 backbone')
+            exit()
+        ckpt = torch.load(ckpt_path)
+        self.model.load_state_dict(ckpt)

MODNet/src/trainer.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import math
+import scipy
+import numpy as np
+from scipy.ndimage import grey_dilation, grey_erosion
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = [
+    'supervised_training_iter',
+    'soc_adaptation_iter',
+]
+# ----------------------------------------------------------------------------------
+# Tool Classes/Functions
+# ----------------------------------------------------------------------------------
+class GaussianBlurLayer(nn.Module):
+    """ Add Gaussian Blur to a 4D tensors
+    This layer takes a 4D tensor of {N, C, H, W} as input.
+    The Gaussian blur will be performed in given channel number (C) splitly.
+    """
+    def __init__(self, channels, kernel_size):
+        """
+        Arguments:
+            channels (int): Channel for input tensor
+            kernel_size (int): Size of the kernel used in blurring
+        """
+        super(GaussianBlurLayer, self).__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        assert self.kernel_size % 2 != 0
+        self.op = nn.Sequential(
+            nn.ReflectionPad2d(math.floor(self.kernel_size / 2)),
+            nn.Conv2d(channels, channels, self.kernel_size,
+                      stride=1, padding=0, bias=None, groups=channels)
+        )
+        self._init_kernel()
+    def forward(self, x):
+        """
+        Arguments:
+            x (torch.Tensor): input 4D tensor
+        Returns:
+            torch.Tensor: Blurred version of the input
+        """
+        if not len(list(x.shape)) == 4:
+            print('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
+            exit()
+        elif not x.shape[1] == self.channels:
+            print('In \'GaussianBlurLayer\', the required channel ({0}) is'
+                  'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
+            exit()
+        return self.op(x)
+    def _init_kernel(self):
+        sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
+        n = np.zeros((self.kernel_size, self.kernel_size))
+        i = math.floor(self.kernel_size / 2)
+        n[i, i] = 1
+        kernel = scipy.ndimage.gaussian_filter(n, sigma)
+        for name, param in self.named_parameters():
+            param.data.copy_(torch.from_numpy(kernel))
+# ----------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------
+# MODNet Training Functions
+# ----------------------------------------------------------------------------------
+blurer = GaussianBlurLayer(1, 3).cuda()
+def supervised_training_iter(
+    modnet, optimizer, image, trimap, gt_matte,
+    semantic_scale=10.0, detail_scale=10.0, matte_scale=1.0):
+    """ Supervised training iteration of MODNet
+    This function trains MODNet for one iteration in a labeled dataset.
+    Arguments:
+        modnet (torch.nn.Module): instance of MODNet
+        optimizer (torch.optim.Optimizer): optimizer for supervised training
+        image (torch.autograd.Variable): input RGB image
+                                         its pixel values should be normalized
+        trimap (torch.autograd.Variable): trimap used to calculate the losses
+                                          its pixel values can be 0, 0.5, or 1
+                                          (foreground=1, background=0, unknown=0.5)
+        gt_matte (torch.autograd.Variable): ground truth alpha matte
+                                            its pixel values are between [0, 1]
+        semantic_scale (float): scale of the semantic loss
+                                NOTE: please adjust according to your dataset
+        detail_scale (float): scale of the detail loss
+                              NOTE: please adjust according to your dataset
+        matte_scale (float): scale of the matte loss
+                             NOTE: please adjust according to your dataset
+    Returns:
+        semantic_loss (torch.Tensor): loss of the semantic estimation [Low-Resolution (LR) Branch]
+        detail_loss (torch.Tensor): loss of the detail prediction [High-Resolution (HR) Branch]
+        matte_loss (torch.Tensor): loss of the semantic-detail fusion [Fusion Branch]
+    Example:
+        import torch
+        from src.models.modnet import MODNet
+        from src.trainer import supervised_training_iter
+        bs = 16         # batch size
+        lr = 0.01       # learn rate
+        epochs = 40     # total epochs
+        modnet = torch.nn.DataParallel(MODNet()).cuda()
+        optimizer = torch.optim.SGD(modnet.parameters(), lr=lr, momentum=0.9)
+        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(0.25 * epochs), gamma=0.1)
+        dataloader = CREATE_YOUR_DATALOADER(bs)     # NOTE: please finish this function
+        for epoch in range(0, epochs):
+            for idx, (image, trimap, gt_matte) in enumerate(dataloader):
+                semantic_loss, detail_loss, matte_loss = \
+                    supervised_training_iter(modnet, optimizer, image, trimap, gt_matte)
+            lr_scheduler.step()
+    """
+    global blurer
+    # set the model to train mode and clear the optimizer
+    modnet.train()
+    optimizer.zero_grad()
+    # forward the model
+    pred_semantic, pred_detail, pred_matte = modnet(image, False)
+    # calculate the boundary mask from the trimap
+    boundaries = (trimap < 0.5) + (trimap > 0.5)
+    # calculate the semantic loss
+    gt_semantic = F.interpolate(gt_matte, scale_factor=1/16, mode='bilinear')
+    gt_semantic = blurer(gt_semantic)
+    semantic_loss = torch.mean(F.mse_loss(pred_semantic, gt_semantic))
+    semantic_loss = semantic_scale * semantic_loss
+    # calculate the detail loss
+    pred_boundary_detail = torch.where(boundaries, trimap, pred_detail)
+    gt_detail = torch.where(boundaries, trimap, gt_matte)
+    detail_loss = torch.mean(F.l1_loss(pred_boundary_detail, gt_detail))
+    detail_loss = detail_scale * detail_loss
+    # calculate the matte loss
+    pred_boundary_matte = torch.where(boundaries, trimap, pred_matte)
+    matte_l1_loss = F.l1_loss(pred_matte, gt_matte) + 4.0 * F.l1_loss(pred_boundary_matte, gt_matte)
+    matte_compositional_loss = F.l1_loss(image * pred_matte, image * gt_matte) \
+        + 4.0 * F.l1_loss(image * pred_boundary_matte, image * gt_matte)
+    matte_loss = torch.mean(matte_l1_loss + matte_compositional_loss)
+    matte_loss = matte_scale * matte_loss
+    # calculate the final loss, backward the loss, and update the model
+    loss = semantic_loss + detail_loss + matte_loss
+    loss.backward()
+    optimizer.step()
+    # for test
+    return semantic_loss, detail_loss, matte_loss
+def soc_adaptation_iter(
+    modnet, backup_modnet, optimizer, image,
+    soc_semantic_scale=100.0, soc_detail_scale=1.0):
+    """ Self-Supervised sub-objective consistency (SOC) adaptation iteration of MODNet
+    This function fine-tunes MODNet for one iteration in an unlabeled dataset.
+    Note that SOC can only fine-tune a converged MODNet, i.e., MODNet that has been
+    trained in a labeled dataset.
+    Arguments:
+        modnet (torch.nn.Module): instance of MODNet
+        backup_modnet (torch.nn.Module): backup of the trained MODNet
+        optimizer (torch.optim.Optimizer): optimizer for self-supervised SOC
+        image (torch.autograd.Variable): input RGB image
+                                         its pixel values should be normalized
+        soc_semantic_scale (float): scale of the SOC semantic loss
+                                    NOTE: please adjust according to your dataset
+        soc_detail_scale (float): scale of the SOC detail loss
+                                  NOTE: please adjust according to your dataset
+    Returns:
+        soc_semantic_loss (torch.Tensor): loss of the semantic SOC
+        soc_detail_loss (torch.Tensor): loss of the detail SOC
+    Example:
+        import copy
+        import torch
+        from src.models.modnet import MODNet
+        from src.trainer import soc_adaptation_iter
+        bs = 1          # batch size
+        lr = 0.00001    # learn rate
+        epochs = 10     # total epochs
+        modnet = torch.nn.DataParallel(MODNet()).cuda()
+        modnet = LOAD_TRAINED_CKPT()    # NOTE: please finish this function
+        optimizer = torch.optim.Adam(modnet.parameters(), lr=lr, betas=(0.9, 0.99))
+        dataloader = CREATE_YOUR_DATALOADER(bs)     # NOTE: please finish this function
+        for epoch in range(0, epochs):
+            backup_modnet = copy.deepcopy(modnet)
+            for idx, (image) in enumerate(dataloader):
+                soc_semantic_loss, soc_detail_loss = \
+                    soc_adaptation_iter(modnet, backup_modnet, optimizer, image)
+    """
+    global blurer
+    # set the backup model to eval mode
+    backup_modnet.eval()
+    # set the main model to train mode and freeze its norm layers
+    modnet.train()
+    modnet.module.freeze_norm()
+    # clear the optimizer
+    optimizer.zero_grad()
+    # forward the main model
+    pred_semantic, pred_detail, pred_matte = modnet(image, False)
+    # forward the backup model
+    with torch.no_grad():
+        _, pred_backup_detail, pred_backup_matte = backup_modnet(image, False)
+    # calculate the boundary mask from `pred_matte` and `pred_semantic`
+    pred_matte_fg = (pred_matte.detach() > 0.1).float()
+    pred_semantic_fg = (pred_semantic.detach() > 0.1).float()
+    pred_semantic_fg = F.interpolate(pred_semantic_fg, scale_factor=16, mode='bilinear')
+    pred_fg = pred_matte_fg * pred_semantic_fg
+    n, c, h, w = pred_matte.shape
+    np_pred_fg = pred_fg.data.cpu().numpy()
+    np_boundaries = np.zeros([n, c, h, w])
+    for sdx in range(0, n):
+        sample_np_boundaries = np_boundaries[sdx, 0, ...]
+        sample_np_pred_fg = np_pred_fg[sdx, 0, ...]
+        side = int((h + w) / 2 * 0.05)
+        dilated = grey_dilation(sample_np_pred_fg, size=(side, side))
+        eroded = grey_erosion(sample_np_pred_fg, size=(side, side))
+        sample_np_boundaries[np.where(dilated - eroded != 0)] = 1
+        np_boundaries[sdx, 0, ...] = sample_np_boundaries
+    boundaries = torch.tensor(np_boundaries).float().cuda()
+    # sub-objectives consistency between `pred_semantic` and `pred_matte`
+    # generate pseudo ground truth for `pred_semantic`
+    downsampled_pred_matte = blurer(F.interpolate(pred_matte, scale_factor=1/16, mode='bilinear'))
+    pseudo_gt_semantic = downsampled_pred_matte.detach()
+    pseudo_gt_semantic = pseudo_gt_semantic * (pseudo_gt_semantic > 0.01).float()
+    # generate pseudo ground truth for `pred_matte`
+    pseudo_gt_matte = pred_semantic.detach()
+    pseudo_gt_matte = pseudo_gt_matte * (pseudo_gt_matte > 0.01).float()
+    # calculate the SOC semantic loss
+    soc_semantic_loss = F.mse_loss(pred_semantic, pseudo_gt_semantic) + F.mse_loss(downsampled_pred_matte, pseudo_gt_matte)
+    soc_semantic_loss = soc_semantic_scale * torch.mean(soc_semantic_loss)
+    # NOTE: using the formulas in our paper to calculate the following losses has similar results
+    # sub-objectives consistency between `pred_detail` and `pred_backup_detail` (on boundaries only)
+    backup_detail_loss = boundaries * F.l1_loss(pred_detail, pred_backup_detail, reduction='none')
+    backup_detail_loss = torch.sum(backup_detail_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
+    backup_detail_loss = torch.mean(backup_detail_loss)
+    # sub-objectives consistency between pred_matte` and `pred_backup_matte` (on boundaries only)
+    backup_matte_loss = boundaries * F.l1_loss(pred_matte, pred_backup_matte, reduction='none')
+    backup_matte_loss = torch.sum(backup_matte_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
+    backup_matte_loss = torch.mean(backup_matte_loss)
+    soc_detail_loss = soc_detail_scale * (backup_detail_loss + backup_matte_loss)
+    # calculate the final loss, backward the loss, and update the model
+    loss = soc_semantic_loss + soc_detail_loss
+    loss.backward()
+    optimizer.step()
+    return soc_semantic_loss, soc_detail_loss
+# ----------------------------------------------------------------------------------

MODNet/torchscript/README.md ADDED Viewed

	@@ -0,0 +1,18 @@

+## MODNet - TorchScript Model
+This TorchScript version of MODNet is provided by [@yarkable](https://github.com/yarkable) from the community.
+Please note that the PyTorch version required for this TorchScript export function is higher than the official MODNet code (torch>=1.2.0).
+You can also download the TorchScript version of the official **Image Matting Model** from [this link](https://pan.baidu.com/s/1kOmmmbG7lSZiSmDdE7CaRw) with the exextraction code `dm9e`.
+To export the TorchScript version of MODNet (assuming you are currently in project root directory):
+1. Download the pre-trained **Image Matting Model** from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing) and put the model into the folder `MODNet/pretrained/`.
+2. Ensure your PyTorch version >= 1.2.0.
+3. Export the TorchScript version of MODNet by:
+    ```shell
+    python -m torchscript.export_torchscript \
+        --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
+        --output-path=pretrained/modnet_photographic_portrait_matting.torchscript
+    ```

MODNet/torchscript/__init__.py ADDED Viewed

File without changes

MODNet/torchscript/export_torchscript.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+Export TorchScript model of MODNet
+Arguments:
+    --ckpt-path: path of the checkpoint that will be converted
+    --output-path: path for saving the TorchScript model
+Example:
+    python export_torchscript.py \
+        --ckpt-path=modnet_photographic_portrait_matting.ckpt \
+        --output-path=modnet_photographic_portrait_matting.torchscript
+"""
+import os
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from . import modnet_torchscript
+if __name__ == '__main__':
+    # define cmd arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ckpt-path', type=str, required=True, help='path of the checkpoint that will be converted')
+    parser.add_argument('--output-path', type=str, required=True, help='path for saving the TorchScript model')
+    args = parser.parse_args()
+    # check input arguments
+    if not os.path.exists(args.ckpt_path):
+        print(args.ckpt_path)
+        print('Cannot find checkpoint path: {0}'.format(args.ckpt_path))
+        exit()
+    # create MODNet and load the pre-trained ckpt
+    modnet = modnet_torchscript.MODNet(backbone_pretrained=False)
+    modnet = nn.DataParallel(modnet).cuda()
+    state_dict = torch.load(args.ckpt_path)
+    modnet.load_state_dict(state_dict)
+    modnet.eval()
+    # export to TorchScript model
+    scripted_model = torch.jit.script(modnet.module)
+    torch.jit.save(scripted_model, os.path.join(args.output_path))

MODNet/torchscript/modnet_torchscript.py ADDED Viewed

	@@ -0,0 +1,258 @@

+"""
+This file contains a modified version of the original file `modnet.py` without
+`pred_semantic` and `pred_details` as these both returns None when `inference=True`
+And it does not contain `inference` argument which will make it easier to
+convert checkpoint to TorchScript model.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from src.models.backbones import SUPPORTED_BACKBONES
+#------------------------------------------------------------------------------
+#  MODNet Basic Modules
+#------------------------------------------------------------------------------
+class IBNorm(nn.Module):
+    """ Combine Instance Norm and Batch Norm into One Layer
+    """
+    def __init__(self, in_channels):
+        super(IBNorm, self).__init__()
+        in_channels = in_channels
+        self.bnorm_channels = int(in_channels / 2)
+        self.inorm_channels = in_channels - self.bnorm_channels
+        self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
+        self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
+    def forward(self, x):
+        bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
+        in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
+        return torch.cat((bn_x, in_x), 1)
+class Conv2dIBNormRelu(nn.Module):
+    """ Convolution + IBNorm + ReLu
+    """
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride=1, padding=0, dilation=1, groups=1, bias=True,
+                 with_ibn=True, with_relu=True):
+        super(Conv2dIBNormRelu, self).__init__()
+        layers = [
+            nn.Conv2d(in_channels, out_channels, kernel_size,
+                      stride=stride, padding=padding, dilation=dilation,
+                      groups=groups, bias=bias)
+        ]
+        if with_ibn:
+            layers.append(IBNorm(out_channels))
+        if with_relu:
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.layers(x)
+class SEBlock(nn.Module):
+    """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
+    """
+    def __init__(self, in_channels, out_channels, reduction=1):
+        super(SEBlock, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, int(in_channels // reduction), bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(int(in_channels // reduction), out_channels, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        w = self.pool(x).view(b, c)
+        w = self.fc(w).view(b, c, 1, 1)
+        return x * w.expand_as(x)
+#------------------------------------------------------------------------------
+#  MODNet Branches
+#------------------------------------------------------------------------------
+class LRBranch(nn.Module):
+    """ Low Resolution Branch of MODNet
+    """
+    def __init__(self, backbone):
+        super(LRBranch, self).__init__()
+        enc_channels = backbone.enc_channels
+        self.backbone = backbone
+        self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
+        self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
+        self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
+    def forward(self, img):
+        enc_features = self.backbone.forward(img)
+        enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
+        enc32x = self.se_block(enc32x)
+        lr16x = F.interpolate(enc32x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        lr16x = self.conv_lr16x(lr16x)
+        lr8x = F.interpolate(lr16x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        lr8x = self.conv_lr8x(lr8x)
+        return lr8x, enc2x, enc4x
+class HRBranch(nn.Module):
+    """ High Resolution Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(HRBranch, self).__init__()
+        self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
+        self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
+        self.conv_hr4x = nn.Sequential(
+            Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr2x = nn.Sequential(
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, enc2x, enc4x, lr8x):
+        img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
+        img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
+        enc2x = self.tohr_enc2x(enc2x)
+        hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
+        enc4x = self.tohr_enc4x(enc4x)
+        hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
+        lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
+        hr2x = F.interpolate(hr4x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
+        return hr2x
+class FusionBranch(nn.Module):
+    """ Fusion Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(FusionBranch, self).__init__()
+        self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
+        self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
+        self.conv_f = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, lr8x, hr2x):
+        lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        lr4x = self.conv_lr4x(lr4x)
+        lr2x = F.interpolate(lr4x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
+        f = F.interpolate(f2x, scale_factor=2.0, mode='bilinear', align_corners=False)
+        f = self.conv_f(torch.cat((f, img), dim=1))
+        pred_matte = torch.sigmoid(f)
+        return pred_matte
+#------------------------------------------------------------------------------
+#  MODNet
+#------------------------------------------------------------------------------
+class MODNet(nn.Module):
+    """ Architecture of MODNet
+    """
+    def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
+        super(MODNet, self).__init__()
+        self.in_channels = in_channels
+        self.hr_channels = hr_channels
+        self.backbone_arch = backbone_arch
+        self.backbone_pretrained = backbone_pretrained
+        self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
+        self.lr_branch = LRBranch(self.backbone)
+        self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
+        self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                self._init_conv(m)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
+                self._init_norm(m)
+        if self.backbone_pretrained:
+            self.backbone.load_pretrained_ckpt()
+    def forward(self, img):
+        # NOTE
+        lr_out = self.lr_branch(img)
+        lr8x = lr_out[0]
+        enc2x = lr_out[1]
+        enc4x = lr_out[2]
+        hr2x = self.hr_branch(img, enc2x, enc4x, lr8x)
+        pred_matte = self.f_branch(img, lr8x, hr2x)
+        return pred_matte
+    def freeze_norm(self):
+        norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
+        for m in self.modules():
+            for n in norm_types:
+                if isinstance(m, n):
+                    m.eval()
+                    continue
+    def _init_conv(self, conv):
+        nn.init.kaiming_uniform_(
+            conv.weight, a=0, mode='fan_in', nonlinearity='relu')
+        if conv.bias is not None:
+            nn.init.constant_(conv.bias, 0)
+    def _init_norm(self, norm):
+        if norm.weight is not None:
+            nn.init.constant_(norm.weight, 1)
+            nn.init.constant_(norm.bias, 0)

config.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "model_type": "modnet",
   "backbone": "mobilenetv2",
-  "architectures": ["HF_MODNet"],
-  "auto_map": {
-    "AutoConfig": "configuration_modnet.MODNetConfig",
-    "AutoModel": "modeling_modnet.HF_MODNet"
-  }
 }

 {
+  "architectures": [
+    "HF_MODNet"
+  ],
   "backbone": "mobilenetv2",
+  "dtype": "float32",
+  "model_type": "modnet",
+  "transformers_version": "4.57.6"
 }

modeling_modnet.py CHANGED Viewed

@@ -1,17 +1,17 @@
-import torch
-from torch import nn
-from transformers import PreTrainedModel, PretrainedConfig
-from .configuration_modnet import MODNetConfig
-from .MODNet.modnet import MODNet
-class HF_MODNet(PreTrainedModel):
-    config_class = MODNetConfig
-    def __init__(self, config):
-        super().__init__(config)
-        self.modnet = MODNet(backbone_pretrained=False)
-    def forward(self, x, inference=True):
         return self.modnet(x, inference)

+import torch
+from torch import nn
+from transformers import PreTrainedModel, PretrainedConfig
+from .configuration_modnet import MODNetConfig
+from .MODNet.modnet import MODNet
+class HF_MODNet(PreTrainedModel):
+    config_class = MODNetConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.modnet = MODNet(backbone_pretrained=False)
+    def forward(self, x, inference=True):
         return self.modnet(x, inference)