boopathiraj commited on
Commit
9285cd7
·
verified ·
1 Parent(s): 8ac21af

Upload folder using huggingface_hub

Browse files
Files changed (48) hide show
  1. .gitattributes +6 -0
  2. MODNet/.gitignore +97 -0
  3. MODNet/LICENSE +201 -0
  4. MODNet/README.md +129 -0
  5. MODNet/__pycache__/modnet.cpython-312.pyc +0 -0
  6. MODNet/demo/image_matting/colab/README.md +2 -0
  7. MODNet/demo/image_matting/colab/__pycache__/inference.cpython-312.pyc +0 -0
  8. MODNet/demo/image_matting/colab/inference.py +105 -0
  9. MODNet/demo/image_matting/colab/input/portrait.jpg +3 -0
  10. MODNet/demo/video_matting/custom/README.md +50 -0
  11. MODNet/demo/video_matting/custom/requirements.txt +6 -0
  12. MODNet/demo/video_matting/custom/run.py +114 -0
  13. MODNet/demo/video_matting/webcam/README.md +52 -0
  14. MODNet/demo/video_matting/webcam/requirements.txt +5 -0
  15. MODNet/demo/video_matting/webcam/run.py +67 -0
  16. MODNet/doc/gif/commercial_image_matting_model_result.gif +3 -0
  17. MODNet/doc/gif/commercial_image_matting_website.gif +3 -0
  18. MODNet/doc/gif/homepage_demo.gif +3 -0
  19. MODNet/doc/gif/image_matting_demo.gif +3 -0
  20. MODNet/doc/gif/video_matting_demo.gif +3 -0
  21. MODNet/matte.zip +3 -0
  22. MODNet/modnet.py +255 -0
  23. MODNet/onnx/README.md +30 -0
  24. MODNet/onnx/__init__.py +0 -0
  25. MODNet/onnx/export_onnx.py +55 -0
  26. MODNet/onnx/inference_onnx.py +104 -0
  27. MODNet/onnx/modnet_onnx.py +252 -0
  28. MODNet/onnx/requirements.txt +4 -0
  29. MODNet/pretrained/README.md +2 -0
  30. MODNet/pretrained/modnet_photographic_portrait_matting.ckpt +3 -0
  31. MODNet/src/__init__.py +0 -0
  32. MODNet/src/__pycache__/__init__.cpython-312.pyc +0 -0
  33. MODNet/src/models/__init__.py +0 -0
  34. MODNet/src/models/__pycache__/__init__.cpython-312.pyc +0 -0
  35. MODNet/src/models/__pycache__/modnet.cpython-312.pyc +0 -0
  36. MODNet/src/models/backbones/__init__.py +10 -0
  37. MODNet/src/models/backbones/__pycache__/__init__.cpython-312.pyc +0 -0
  38. MODNet/src/models/backbones/__pycache__/mobilenetv2.cpython-312.pyc +0 -0
  39. MODNet/src/models/backbones/__pycache__/wrapper.cpython-312.pyc +0 -0
  40. MODNet/src/models/backbones/mobilenetv2.py +199 -0
  41. MODNet/src/models/backbones/wrapper.py +82 -0
  42. MODNet/src/trainer.py +299 -0
  43. MODNet/torchscript/README.md +18 -0
  44. MODNet/torchscript/__init__.py +0 -0
  45. MODNet/torchscript/export_torchscript.py +46 -0
  46. MODNet/torchscript/modnet_torchscript.py +258 -0
  47. config.json +6 -6
  48. modeling_modnet.py +16 -16
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ MODNet/demo/image_matting/colab/input/portrait.jpg filter=lfs diff=lfs merge=lfs -text
37
+ MODNet/doc/gif/commercial_image_matting_model_result.gif filter=lfs diff=lfs merge=lfs -text
38
+ MODNet/doc/gif/commercial_image_matting_website.gif filter=lfs diff=lfs merge=lfs -text
39
+ MODNet/doc/gif/homepage_demo.gif filter=lfs diff=lfs merge=lfs -text
40
+ MODNet/doc/gif/image_matting_demo.gif filter=lfs diff=lfs merge=lfs -text
41
+ MODNet/doc/gif/video_matting_demo.gif filter=lfs diff=lfs merge=lfs -text
MODNet/.gitignore ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Temporary directories and files
2
+ *.ckpt
3
+ *.onnx
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ env/
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *,cover
50
+ .hypothesis/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+
60
+ # Flask stuff:
61
+ instance/
62
+ .webassets-cache
63
+
64
+ # Scrapy stuff:
65
+ .scrapy
66
+
67
+ # Sphinx documentation
68
+ docs/_build/
69
+
70
+ # PyBuilder
71
+ target/
72
+
73
+ # IPython Notebook
74
+ .ipynb_checkpoints
75
+
76
+ # pyenv
77
+ .python-version
78
+
79
+ # celery beat schedule file
80
+ celerybeat-schedule
81
+
82
+ # dotenv
83
+ .env
84
+
85
+ # virtualenv
86
+ venv/
87
+ ENV/
88
+
89
+ # Spyder project settings
90
+ .spyderproject
91
+
92
+ # Rope project settings
93
+ .ropeproject
94
+
95
+
96
+ # Project files
97
+ .vscode
MODNet/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
MODNet/README.md ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h2 align="center">MODNet: Trimap-Free Portrait Matting in Real Time</h2>
2
+
3
+ <div align="center"><i>MODNet: Real-Time Trimap-Free Portrait Matting via Objective Decomposition (AAAI 2022)</i></div>
4
+
5
+ <br />
6
+
7
+ <img src="doc/gif/homepage_demo.gif" width="100%">
8
+
9
+ <div align="center">MODNet is a model for <b>real-time</b> portrait matting with <b>only RGB image input</b></div>
10
+ <div align="center">MODNet是一个<b>仅需RGB图片输入</b>的<b>实时</b>人像抠图模型</div>
11
+
12
+ <br />
13
+
14
+ <p align="center">
15
+ <a href="#online-application-在线应用">Online Application (在线应用)</a> |
16
+ <a href="#research-demo">Research Demo</a> |
17
+ <a href="https://arxiv.org/pdf/2011.11961.pdf">AAAI 2022 Paper</a> |
18
+ <a href="https://youtu.be/PqJ3BRHX3Lc">Supplementary Video</a>
19
+ </p>
20
+
21
+ <p align="center">
22
+ <a href="#community">Community</a> |
23
+ <a href="#code">Code</a> |
24
+ <a href="#ppm-benchmark">PPM Benchmark</a> |
25
+ <a href="#license">License</a> |
26
+ <a href="#acknowledgement">Acknowledgement</a> |
27
+ <a href="#citation">Citation</a> |
28
+ <a href="#contact">Contact</a>
29
+ </p>
30
+
31
+ ---
32
+
33
+
34
+ ## Online Application (在线应用)
35
+
36
+ The model used in the online demo (unpublished) is only **7M**! Process **2K** resolution image with a **Fast** speed on common PCs or Mobiles! **Beter** than research demos!
37
+ Please try online portrait image matting on [my personal homepage](https://zhke.io/#/?modnet_demo) for fun!
38
+
39
+ 在线应用中使用的模型(未发布)大小仅为**7M**!可以在普通PC或移动设备上**快速**处理具有**2K**分辨率的图像!效果比研究示例**更好**!
40
+ 请通过[我的主页](https://zhke.io/#/?modnet_demo)在线尝试图片抠像!
41
+
42
+
43
+ ## Research Demo
44
+
45
+ All the models behind the following demos are trained on the datasets mentioned in [our paper](https://arxiv.org/pdf/2011.11961.pdf).
46
+
47
+ ### Portrait Image Matting
48
+ We provide an [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing) for portrait image matting.
49
+ It allows you to upload portrait images and predict/visualize/download the alpha mattes.
50
+
51
+ <!-- <img src="doc/gif/image_matting_demo.gif" width='40%'> -->
52
+
53
+ ### Portrait Video Matting
54
+ We provide two real-time portrait video matting demos based on WebCam. When using the demo, you can move the WebCam around at will.
55
+ If you have an Ubuntu system, we recommend you to try the [offline demo](demo/video_matting/webcam) to get a higher *fps*. Otherwise, you can access the [online Colab demo](https://colab.research.google.com/drive/1Pt3KDSc2q7WxFvekCnCLD8P0gBEbxm6J?usp=sharing).
56
+ We also provide an [offline demo](demo/video_matting/custom) that allows you to process custom videos.
57
+
58
+ <!-- <img src="doc/gif/video_matting_demo.gif" width='60%'> -->
59
+
60
+
61
+ ## Community
62
+
63
+ We share some cool applications/extentions of MODNet built by the community.
64
+
65
+ <!-- - **WebGUI for Portrait Image Matting** -->
66
+ <!-- You can try [this WebGUI](https://www.gradio.app/hub/aliabd/modnet) (hosted on [Gradio](https://www.gradio.app/)) for portrait image matting from your browser without code! -->
67
+
68
+ - **Colab Demo of Bokeh (Blur Background)**
69
+ You can try [this Colab demo](https://colab.research.google.com/github/eyaler/avatars4all/blob/master/yarok.ipynb) (built by [@eyaler](https://github.com/eyaler)) to blur the backgroud based on MODNet!
70
+
71
+ - **ONNX Version of MODNet**
72
+ You can convert the pre-trained MODNet to an ONNX model by using [this code](onnx) (provided by [@manthan3C273](https://github.com/manthan3C273)). You can also try [this Colab demo](https://colab.research.google.com/drive/1P3cWtg8fnmu9karZHYDAtmm1vj1rgA-f?usp=sharing) for MODNet image matting (ONNX version).
73
+
74
+ - **TorchScript Version of MODNet**
75
+ You can convert the pre-trained MODNet to an TorchScript model by using [this code](torchscript) (provided by [@yarkable](https://github.com/yarkable)).
76
+
77
+ - **TensorRT Version of MODNet**
78
+ You can access [this Github repository](https://github.com/jkjung-avt/tensorrt_demos) to try the TensorRT version of MODNet (provided by [@jkjung-avt](https://github.com/jkjung-avt)).
79
+
80
+ - **Docker Container for MODnet**
81
+ You can access [this Github repository](https://github.com/nahidalam/modnet_docker) for a containerized version of MODNet with the Docker environment (provided by [@nahidalam](https://github.com/nahidalam)).
82
+
83
+
84
+ There are some resources about MODNet from the community.
85
+ - [Video from What's AI YouTube Channel](https://youtu.be/rUo0wuVyefU)
86
+ - [Article from Louis Bouchard's Blog](https://www.louisbouchard.ai/remove-background/)
87
+
88
+
89
+ ## Code
90
+ We provide the [code](src/trainer.py) of MODNet training iteration, including:
91
+ - **Supervised Training**: Train MODNet on a labeled matting dataset
92
+ - **SOC Adaptation**: Adapt a trained MODNet to an unlabeled dataset
93
+
94
+ In code comments, we provide examples for using the functions.
95
+
96
+
97
+ ## PPM Benchmark
98
+ The PPM benchmark is released in a separate repository [PPM](https://github.com/ZHKKKe/PPM).
99
+
100
+
101
+ ## License
102
+ The code, models, and demos in this repository (excluding GIF files under the folder `doc/gif`) are released under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) license.
103
+
104
+
105
+ ## Acknowledgement
106
+ - We thank
107
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[@yzhou0919](https://github.com/yzhou0919), [@eyaler](https://github.com/eyaler), [@manthan3C273](https://github.com/manthan3C273), [@yarkable](https://github.com/yarkable), [@jkjung-avt](https://github.com/jkjung-avt), [@manzke](https://github.com/manzke), [@nahidalam](https://github.com/nahidalam),
108
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[the Gradio team](https://github.com/gradio-app/gradio), [What's AI YouTube Channel](https://www.youtube.com/channel/UCUzGQrN-lyyc0BWTYoJM_Sg), [Louis Bouchard's Blog](https://www.louisbouchard.ai),
109
+ for their contributions to this repository or their cool applications/extentions/resources of MODNet.
110
+
111
+
112
+ ## Citation
113
+ If this work helps your research, please consider to cite:
114
+
115
+ ```bibtex
116
+ @InProceedings{MODNet,
117
+ author = {Zhanghan Ke and Jiayu Sun and Kaican Li and Qiong Yan and Rynson W.H. Lau},
118
+ title = {MODNet: Real-Time Trimap-Free Portrait Matting via Objective Decomposition},
119
+ booktitle = {AAAI},
120
+ year = {2022},
121
+ }
122
+ ```
123
+
124
+
125
+ ## Contact
126
+ This repository is maintained by Zhanghan Ke ([@ZHKKKe](https://github.com/ZHKKKe)).
127
+ For questions, please contact `kezhanghan@outlook.com`.
128
+
129
+ <!-- <img src="doc/gif/commercial_image_matting_model_result.gif" width='100%'> -->
MODNet/__pycache__/modnet.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
MODNet/demo/image_matting/colab/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ## MODNet - Portrait Image Matting Demo
2
+ Please try MODNet portrait image matting demo through our [online Colab demo](https://colab.research.google.com/drive/1GANpbKT06aEFiW-Ssx0DQnnEADcXwQG6?usp=sharing).
MODNet/demo/image_matting/colab/__pycache__/inference.cpython-312.pyc ADDED
Binary file (4.85 kB). View file
 
MODNet/demo/image_matting/colab/inference.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import argparse
4
+ import numpy as np
5
+ from PIL import Image
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import torchvision.transforms as transforms
11
+
12
+ from src.models.modnet import MODNet
13
+
14
+
15
+ if __name__ == '__main__':
16
+ # define cmd arguments
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument('--input-path', type=str, help='path of input images')
19
+ parser.add_argument('--output-path', type=str, help='path of output images')
20
+ parser.add_argument('--ckpt-path', type=str, help='path of pre-trained MODNet')
21
+ args = parser.parse_args()
22
+
23
+ # check input arguments
24
+ if not os.path.exists(args.input_path):
25
+ print('Cannot find input path: {0}'.format(args.input_path))
26
+ exit()
27
+ if not os.path.exists(args.output_path):
28
+ print('Cannot find output path: {0}'.format(args.output_path))
29
+ exit()
30
+ if not os.path.exists(args.ckpt_path):
31
+ print('Cannot find ckpt path: {0}'.format(args.ckpt_path))
32
+ exit()
33
+
34
+ # define hyper-parameters
35
+ ref_size = 512
36
+
37
+ # define image to tensor transform
38
+ im_transform = transforms.Compose(
39
+ [
40
+ transforms.ToTensor(),
41
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
42
+ ]
43
+ )
44
+
45
+ # create MODNet and load the pre-trained ckpt
46
+ modnet = MODNet(backbone_pretrained=False)
47
+ modnet = nn.DataParallel(modnet)
48
+
49
+ if torch.cuda.is_available():
50
+ modnet = modnet.cuda()
51
+ weights = torch.load(args.ckpt_path)
52
+ else:
53
+ weights = torch.load(args.ckpt_path, map_location=torch.device('cpu'))
54
+ modnet.load_state_dict(weights)
55
+ modnet.eval()
56
+
57
+ # inference images
58
+ im_names = os.listdir(args.input_path)
59
+ for im_name in im_names:
60
+ print('Process image: {0}'.format(im_name))
61
+
62
+ # read image
63
+ im = Image.open(os.path.join(args.input_path, im_name))
64
+
65
+ # unify image channels to 3
66
+ im = np.asarray(im)
67
+ if len(im.shape) == 2:
68
+ im = im[:, :, None]
69
+ if im.shape[2] == 1:
70
+ im = np.repeat(im, 3, axis=2)
71
+ elif im.shape[2] == 4:
72
+ im = im[:, :, 0:3]
73
+
74
+ # convert image to PyTorch tensor
75
+ im = Image.fromarray(im)
76
+ im = im_transform(im)
77
+
78
+ # add mini-batch dim
79
+ im = im[None, :, :, :]
80
+
81
+ # resize image for input
82
+ im_b, im_c, im_h, im_w = im.shape
83
+ if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
84
+ if im_w >= im_h:
85
+ im_rh = ref_size
86
+ im_rw = int(im_w / im_h * ref_size)
87
+ elif im_w < im_h:
88
+ im_rw = ref_size
89
+ im_rh = int(im_h / im_w * ref_size)
90
+ else:
91
+ im_rh = im_h
92
+ im_rw = im_w
93
+
94
+ im_rw = im_rw - im_rw % 32
95
+ im_rh = im_rh - im_rh % 32
96
+ im = F.interpolate(im, size=(im_rh, im_rw), mode='area')
97
+
98
+ # inference
99
+ _, _, matte = modnet(im.cuda() if torch.cuda.is_available() else im, True)
100
+
101
+ # resize and save matte
102
+ matte = F.interpolate(matte, size=(im_h, im_w), mode='area')
103
+ matte = matte[0][0].data.cpu().numpy()
104
+ matte_name = im_name.split('.')[0] + '.png'
105
+ Image.fromarray(((matte * 255).astype('uint8')), mode='L').save(os.path.join(args.output_path, matte_name))
MODNet/demo/image_matting/colab/input/portrait.jpg ADDED

Git LFS Details

  • SHA256: 549c142d020fd62c141e70064eccb64f59e8ce8eba5c8ce85bb9cefd8d91fff9
  • Pointer size: 132 Bytes
  • Size of remote file: 2.57 MB
MODNet/demo/video_matting/custom/README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## MODNet - Custom Portrait Video Matting Demo
2
+ This is a MODNet portrait video matting demo that allows you to process custom videos.
3
+
4
+ ### 1. Requirements
5
+ The basic requirements for this demo are:
6
+ - Ubuntu System
7
+ - Python 3+
8
+
9
+
10
+ ### 2. Introduction
11
+ We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick.
12
+
13
+
14
+ For a better experience, please make sure your videos satisfy:
15
+
16
+ * the portrait and background are distinguishable, <i>i.e.</i>, are not similar
17
+ * captured in soft and bright ambient lighting
18
+ * the contents do not move too fast
19
+
20
+ ### 3. Run Demo
21
+ We recommend creating a new conda virtual environment to run this demo, as follow:
22
+
23
+ 1. Clone the MODNet repository:
24
+ ```
25
+ git clone https://github.com/ZHKKKe/MODNet.git
26
+ cd MODNet
27
+ ```
28
+
29
+ 2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`.
30
+
31
+
32
+ 3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example:
33
+ ```
34
+ conda create -n modnet python=3.6
35
+ source activate modnet
36
+ ```
37
+
38
+ 4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed):
39
+ ```
40
+ pip install -r demo/video_matting/custom/requirements.txt
41
+ ```
42
+
43
+ 5. Execute the main code:
44
+ ```
45
+ python -m demo.video_matting.custom.run --video YOUR_VIDEO_PATH
46
+ ```
47
+ where `YOUR_VIDEO_PATH` is the specific path of your video.
48
+ There are some optional arguments:
49
+ - `--result-type (default=fg)` : fg - save the alpha matte; fg - save the foreground
50
+ - `--fps (default=30)` : fps of the result video
MODNet/demo/video_matting/custom/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ Pillow
3
+ opencv-python
4
+ torch >= 1.0.0
5
+ torchvision
6
+ tqdm
MODNet/demo/video_matting/custom/run.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import argparse
4
+ import numpy as np
5
+ from PIL import Image
6
+ from tqdm import tqdm
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torchvision.transforms as transforms
11
+
12
+ from src.models.modnet import MODNet
13
+
14
+
15
+ torch_transforms = transforms.Compose(
16
+ [
17
+ transforms.ToTensor(),
18
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
19
+ ]
20
+ )
21
+
22
+
23
+ def matting(video, result, alpha_matte=False, fps=30):
24
+ # video capture
25
+ vc = cv2.VideoCapture(video)
26
+
27
+ if vc.isOpened():
28
+ rval, frame = vc.read()
29
+ else:
30
+ rval = False
31
+
32
+ if not rval:
33
+ print('Failed to read the video: {0}'.format(video))
34
+ exit()
35
+
36
+ num_frame = vc.get(cv2.CAP_PROP_FRAME_COUNT)
37
+ h, w = frame.shape[:2]
38
+ if w >= h:
39
+ rh = 512
40
+ rw = int(w / h * 512)
41
+ else:
42
+ rw = 512
43
+ rh = int(h / w * 512)
44
+ rh = rh - rh % 32
45
+ rw = rw - rw % 32
46
+
47
+ # video writer
48
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
49
+ video_writer = cv2.VideoWriter(result, fourcc, fps, (w, h))
50
+
51
+ print('Start matting...')
52
+ with tqdm(range(int(num_frame)))as t:
53
+ for c in t:
54
+ frame_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
55
+ frame_np = cv2.resize(frame_np, (rw, rh), cv2.INTER_AREA)
56
+
57
+ frame_PIL = Image.fromarray(frame_np)
58
+ frame_tensor = torch_transforms(frame_PIL)
59
+ frame_tensor = frame_tensor[None, :, :, :]
60
+ if GPU:
61
+ frame_tensor = frame_tensor.cuda()
62
+
63
+ with torch.no_grad():
64
+ _, _, matte_tensor = modnet(frame_tensor, True)
65
+
66
+ matte_tensor = matte_tensor.repeat(1, 3, 1, 1)
67
+ matte_np = matte_tensor[0].data.cpu().numpy().transpose(1, 2, 0)
68
+ if alpha_matte:
69
+ view_np = matte_np * np.full(frame_np.shape, 255.0)
70
+ else:
71
+ view_np = matte_np * frame_np + (1 - matte_np) * np.full(frame_np.shape, 255.0)
72
+ view_np = cv2.cvtColor(view_np.astype(np.uint8), cv2.COLOR_RGB2BGR)
73
+ view_np = cv2.resize(view_np, (w, h))
74
+ video_writer.write(view_np)
75
+
76
+ rval, frame = vc.read()
77
+ c += 1
78
+
79
+ video_writer.release()
80
+ print('Save the result video to {0}'.format(result))
81
+
82
+
83
+ if __name__ == '__main__':
84
+ parser = argparse.ArgumentParser()
85
+ parser.add_argument('--video', type=str, required=True, help='input video file')
86
+ parser.add_argument('--result-type', type=str, default='fg', choices=['fg', 'matte'],
87
+ help='matte - save the alpha matte; fg - save the foreground')
88
+ parser.add_argument('--fps', type=int, default=30, help='fps of the result video')
89
+
90
+ print('Get CMD Arguments...')
91
+ args = parser.parse_args()
92
+
93
+ if not os.path.exists(args.video):
94
+ print('Cannot find the input video: {0}'.format(args.video))
95
+ exit()
96
+
97
+ print('Load pre-trained MODNet...')
98
+ pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
99
+ modnet = MODNet(backbone_pretrained=False)
100
+ modnet = nn.DataParallel(modnet)
101
+
102
+ GPU = True if torch.cuda.device_count() > 0 else False
103
+ if GPU:
104
+ print('Use GPU...')
105
+ modnet = modnet.cuda()
106
+ modnet.load_state_dict(torch.load(pretrained_ckpt))
107
+ else:
108
+ print('Use CPU...')
109
+ modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
110
+ modnet.eval()
111
+
112
+ result = os.path.splitext(args.video)[0] + '_{0}.mp4'.format(args.result_type)
113
+ alpha_matte = True if args.result_type == 'matte' else False
114
+ matting(args.video, result, alpha_matte, args.fps)
MODNet/demo/video_matting/webcam/README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## MODNet - WebCam-Based Portrait Video Matting Demo
2
+ This is a MODNet portrait video matting demo based on WebCam. It will call your local WebCam and display the matting results in real time. The demo can run under CPU or GPU.
3
+
4
+ ### 1. Requirements
5
+ The basic requirements for this demo are:
6
+ - Ubuntu System
7
+ - WebCam
8
+ - Python 3+
9
+
10
+ **NOTE**: If your device does not satisfy the above conditions, please try our [online Colab demo](https://colab.research.google.com/drive/1Pt3KDSc2q7WxFvekCnCLD8P0gBEbxm6J?usp=sharing).
11
+
12
+
13
+ ### 2. Introduction
14
+ We use ~400 unlabeled video clips (divided into ~50,000 frames) downloaded from the internet to perform SOC to adapt MODNet to the video domain. **Nonetheless, due to insufficient labeled training data (~3k labeled foregrounds), our model may still make errors in portrait semantics estimation under challenging scenes.** Besides, this demo does not currently support the OFD trick, which will be provided soon.
15
+
16
+ For a better experience, please:
17
+
18
+ * make sure the portrait and background are distinguishable, <i>i.e.</i>, are not similar
19
+ * run in soft and bright ambient lighting
20
+ * do not be too close or too far from the WebCam
21
+ * do not move too fast
22
+
23
+ ### 3. Run Demo
24
+ We recommend creating a new conda virtual environment to run this demo, as follow:
25
+
26
+ 1. Clone the MODNet repository:
27
+ ```
28
+ git clone https://github.com/ZHKKKe/MODNet.git
29
+ cd MODNet
30
+ ```
31
+
32
+ 2. Download the pre-trained model from this [link](https://drive.google.com/file/d/1Nf1ZxeJZJL8Qx9KadcYYyEmmlKhTADxX/view?usp=sharing) and put it into the folder `MODNet/pretrained/`.
33
+
34
+
35
+ 3. Create a conda virtual environment named `modnet` (if it doesn't exist) and activate it. Here we use `python=3.6` as an example:
36
+ ```
37
+ conda create -n modnet python=3.6
38
+ source activate modnet
39
+ ```
40
+
41
+ 4. Install the required python dependencies (please make sure your CUDA version is supported by the PyTorch version installed):
42
+ ```
43
+ pip install -r demo/video_matting/webcam/requirements.txt
44
+ ```
45
+
46
+ 5. Execute the main code:
47
+ ```
48
+ python -m demo.video_matting.webcam.run
49
+ ```
50
+
51
+ ### 4. Acknowledgement
52
+ We thank [@tkianai](https://github.com/tkianai) and [@mazhar004](https://github.com/mazhar004) for their contributions to making this demo available for CPU use.
MODNet/demo/video_matting/webcam/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ Pillow
3
+ opencv-python
4
+ torch >= 1.0.0
5
+ torchvision
MODNet/demo/video_matting/webcam/run.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from PIL import Image
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ import torchvision.transforms as transforms
8
+
9
+ from src.models.modnet import MODNet
10
+
11
+
12
+ torch_transforms = transforms.Compose(
13
+ [
14
+ transforms.ToTensor(),
15
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
16
+ ]
17
+ )
18
+
19
+ print('Load pre-trained MODNet...')
20
+ pretrained_ckpt = './pretrained/modnet_webcam_portrait_matting.ckpt'
21
+ modnet = MODNet(backbone_pretrained=False)
22
+ modnet = nn.DataParallel(modnet)
23
+
24
+ GPU = True if torch.cuda.device_count() > 0 else False
25
+ if GPU:
26
+ print('Use GPU...')
27
+ modnet = modnet.cuda()
28
+ modnet.load_state_dict(torch.load(pretrained_ckpt))
29
+ else:
30
+ print('Use CPU...')
31
+ modnet.load_state_dict(torch.load(pretrained_ckpt, map_location=torch.device('cpu')))
32
+
33
+ modnet.eval()
34
+
35
+ print('Init WebCam...')
36
+ cap = cv2.VideoCapture(0)
37
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
38
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
39
+
40
+ print('Start matting...')
41
+ while(True):
42
+ _, frame_np = cap.read()
43
+ frame_np = cv2.cvtColor(frame_np, cv2.COLOR_BGR2RGB)
44
+ frame_np = cv2.resize(frame_np, (910, 512), cv2.INTER_AREA)
45
+ frame_np = frame_np[:, 120:792, :]
46
+ frame_np = cv2.flip(frame_np, 1)
47
+
48
+ frame_PIL = Image.fromarray(frame_np)
49
+ frame_tensor = torch_transforms(frame_PIL)
50
+ frame_tensor = frame_tensor[None, :, :, :]
51
+ if GPU:
52
+ frame_tensor = frame_tensor.cuda()
53
+
54
+ with torch.no_grad():
55
+ _, _, matte_tensor = modnet(frame_tensor, True)
56
+
57
+ matte_tensor = matte_tensor.repeat(1, 3, 1, 1)
58
+ matte_np = matte_tensor[0].data.cpu().numpy().transpose(1, 2, 0)
59
+ fg_np = matte_np * frame_np + (1 - matte_np) * np.full(frame_np.shape, 255.0)
60
+ view_np = np.uint8(np.concatenate((frame_np, fg_np), axis=1))
61
+ view_np = cv2.cvtColor(view_np, cv2.COLOR_RGB2BGR)
62
+
63
+ cv2.imshow('MODNet - WebCam [Press \'Q\' To Exit]', view_np)
64
+ if cv2.waitKey(1) & 0xFF == ord('q'):
65
+ break
66
+
67
+ print('Exit...')
MODNet/doc/gif/commercial_image_matting_model_result.gif ADDED

Git LFS Details

  • SHA256: b0193f1e6e70c6324812ee349de7fb6d283381c820ea2685da78667266cc6a35
  • Pointer size: 133 Bytes
  • Size of remote file: 11.4 MB
MODNet/doc/gif/commercial_image_matting_website.gif ADDED

Git LFS Details

  • SHA256: 4ee4dbeee80d4720f3396370389560748b43604675a01aa8781aa741b7b8e649
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
MODNet/doc/gif/homepage_demo.gif ADDED

Git LFS Details

  • SHA256: a18a7bf0fcc50d2ce8fe4e1f1801c714b9cf5a4561897de4760f9ba655400d34
  • Pointer size: 133 Bytes
  • Size of remote file: 23.4 MB
MODNet/doc/gif/image_matting_demo.gif ADDED

Git LFS Details

  • SHA256: c727629197ab654f9fff02745cc2b64f68fc07202a28de87a723bb15d88f5dbe
  • Pointer size: 132 Bytes
  • Size of remote file: 9.68 MB
MODNet/doc/gif/video_matting_demo.gif ADDED

Git LFS Details

  • SHA256: e622a2dfa267d0386b258094259dbc1838ca31765dbdefd568b4782e384f347a
  • Pointer size: 132 Bytes
  • Size of remote file: 9.02 MB
MODNet/matte.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9e2e4039d37c912f10c881f6f04f2824153fe80bf333b5c90c84e257cc153c
3
+ size 139057
MODNet/modnet.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from .src.models.backbones import SUPPORTED_BACKBONES
6
+
7
+
8
+ #------------------------------------------------------------------------------
9
+ # MODNet Basic Modules
10
+ #------------------------------------------------------------------------------
11
+
12
+ class IBNorm(nn.Module):
13
+ """ Combine Instance Norm and Batch Norm into One Layer
14
+ """
15
+
16
+ def __init__(self, in_channels):
17
+ super(IBNorm, self).__init__()
18
+ in_channels = in_channels
19
+ self.bnorm_channels = int(in_channels / 2)
20
+ self.inorm_channels = in_channels - self.bnorm_channels
21
+
22
+ self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
23
+ self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
24
+
25
+ def forward(self, x):
26
+ bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
27
+ in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
28
+
29
+ return torch.cat((bn_x, in_x), 1)
30
+
31
+
32
+ class Conv2dIBNormRelu(nn.Module):
33
+ """ Convolution + IBNorm + ReLu
34
+ """
35
+
36
+ def __init__(self, in_channels, out_channels, kernel_size,
37
+ stride=1, padding=0, dilation=1, groups=1, bias=True,
38
+ with_ibn=True, with_relu=True):
39
+ super(Conv2dIBNormRelu, self).__init__()
40
+
41
+ layers = [
42
+ nn.Conv2d(in_channels, out_channels, kernel_size,
43
+ stride=stride, padding=padding, dilation=dilation,
44
+ groups=groups, bias=bias)
45
+ ]
46
+
47
+ if with_ibn:
48
+ layers.append(IBNorm(out_channels))
49
+ if with_relu:
50
+ layers.append(nn.ReLU(inplace=True))
51
+
52
+ self.layers = nn.Sequential(*layers)
53
+
54
+ def forward(self, x):
55
+ return self.layers(x)
56
+
57
+
58
+ class SEBlock(nn.Module):
59
+ """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
60
+ """
61
+
62
+ def __init__(self, in_channels, out_channels, reduction=1):
63
+ super(SEBlock, self).__init__()
64
+ self.pool = nn.AdaptiveAvgPool2d(1)
65
+ self.fc = nn.Sequential(
66
+ nn.Linear(in_channels, int(in_channels // reduction), bias=False),
67
+ nn.ReLU(inplace=True),
68
+ nn.Linear(int(in_channels // reduction), out_channels, bias=False),
69
+ nn.Sigmoid()
70
+ )
71
+
72
+ def forward(self, x):
73
+ b, c, _, _ = x.size()
74
+ w = self.pool(x).view(b, c)
75
+ w = self.fc(w).view(b, c, 1, 1)
76
+
77
+ return x * w.expand_as(x)
78
+
79
+
80
+ #------------------------------------------------------------------------------
81
+ # MODNet Branches
82
+ #------------------------------------------------------------------------------
83
+
84
+ class LRBranch(nn.Module):
85
+ """ Low Resolution Branch of MODNet
86
+ """
87
+
88
+ def __init__(self, backbone):
89
+ super(LRBranch, self).__init__()
90
+
91
+ enc_channels = backbone.enc_channels
92
+
93
+ self.backbone = backbone
94
+ self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
95
+ self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
96
+ self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
97
+ self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
98
+
99
+ def forward(self, img, inference):
100
+ enc_features = self.backbone.forward(img)
101
+ enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
102
+
103
+ enc32x = self.se_block(enc32x)
104
+ lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
105
+ lr16x = self.conv_lr16x(lr16x)
106
+ lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
107
+ lr8x = self.conv_lr8x(lr8x)
108
+
109
+ pred_semantic = None
110
+ if not inference:
111
+ lr = self.conv_lr(lr8x)
112
+ pred_semantic = torch.sigmoid(lr)
113
+
114
+ return pred_semantic, lr8x, [enc2x, enc4x]
115
+
116
+
117
+ class HRBranch(nn.Module):
118
+ """ High Resolution Branch of MODNet
119
+ """
120
+
121
+ def __init__(self, hr_channels, enc_channels):
122
+ super(HRBranch, self).__init__()
123
+
124
+ self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
125
+ self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
126
+
127
+ self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
128
+ self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
129
+
130
+ self.conv_hr4x = nn.Sequential(
131
+ Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
132
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
133
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
134
+ )
135
+
136
+ self.conv_hr2x = nn.Sequential(
137
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
138
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
139
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
140
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
141
+ )
142
+
143
+ self.conv_hr = nn.Sequential(
144
+ Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
145
+ Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
146
+ )
147
+
148
+ def forward(self, img, enc2x, enc4x, lr8x, inference):
149
+ img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
150
+ img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
151
+
152
+ enc2x = self.tohr_enc2x(enc2x)
153
+ hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
154
+
155
+ enc4x = self.tohr_enc4x(enc4x)
156
+ hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
157
+
158
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
159
+ hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
160
+
161
+ hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
162
+ hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
163
+
164
+ pred_detail = None
165
+ if not inference:
166
+ hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
167
+ hr = self.conv_hr(torch.cat((hr, img), dim=1))
168
+ pred_detail = torch.sigmoid(hr)
169
+
170
+ return pred_detail, hr2x
171
+
172
+
173
+ class FusionBranch(nn.Module):
174
+ """ Fusion Branch of MODNet
175
+ """
176
+
177
+ def __init__(self, hr_channels, enc_channels):
178
+ super(FusionBranch, self).__init__()
179
+ self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
180
+
181
+ self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
182
+ self.conv_f = nn.Sequential(
183
+ Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
184
+ Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
185
+ )
186
+
187
+ def forward(self, img, lr8x, hr2x):
188
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
189
+ lr4x = self.conv_lr4x(lr4x)
190
+ lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
191
+
192
+ f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
193
+ f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
194
+ f = self.conv_f(torch.cat((f, img), dim=1))
195
+ pred_matte = torch.sigmoid(f)
196
+
197
+ return pred_matte
198
+
199
+
200
+ #------------------------------------------------------------------------------
201
+ # MODNet
202
+ #------------------------------------------------------------------------------
203
+
204
+ class MODNet(nn.Module):
205
+ """ Architecture of MODNet
206
+ """
207
+
208
+ def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
209
+ super(MODNet, self).__init__()
210
+
211
+ self.in_channels = in_channels
212
+ self.hr_channels = hr_channels
213
+ self.backbone_arch = backbone_arch
214
+ self.backbone_pretrained = backbone_pretrained
215
+
216
+ self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
217
+
218
+ self.lr_branch = LRBranch(self.backbone)
219
+ self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
220
+ self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
221
+
222
+ for m in self.modules():
223
+ if isinstance(m, nn.Conv2d):
224
+ self._init_conv(m)
225
+ elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
226
+ self._init_norm(m)
227
+
228
+ if self.backbone_pretrained:
229
+ self.backbone.load_pretrained_ckpt()
230
+
231
+ def forward(self, img, inference):
232
+ pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
233
+ pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
234
+ pred_matte = self.f_branch(img, lr8x, hr2x)
235
+
236
+ return pred_semantic, pred_detail, pred_matte
237
+
238
+ def freeze_norm(self):
239
+ norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
240
+ for m in self.modules():
241
+ for n in norm_types:
242
+ if isinstance(m, n):
243
+ m.eval()
244
+ continue
245
+
246
+ def _init_conv(self, conv):
247
+ nn.init.kaiming_uniform_(
248
+ conv.weight, a=0, mode='fan_in', nonlinearity='relu')
249
+ if conv.bias is not None:
250
+ nn.init.constant_(conv.bias, 0)
251
+
252
+ def _init_norm(self, norm):
253
+ if norm.weight is not None:
254
+ nn.init.constant_(norm.weight, 1)
255
+ nn.init.constant_(norm.bias, 0)
MODNet/onnx/README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## MODNet - ONNX Model
2
+
3
+ This ONNX version of MODNet is provided by [@manthan3C273](https://github.com/manthan3C273) from the community.
4
+ Please note that the PyTorch version required for this ONNX export function is higher than the official MODNet code (torch==1.7.1 is recommended).
5
+
6
+ You can try **MODNet - Image Matting Demo (ONNX version)** in [this Colab](https://colab.research.google.com/drive/1P3cWtg8fnmu9karZHYDAtmm1vj1rgA-f?usp=sharing).
7
+ You can also download the ONNX version of the official **Image Matting Model** from [this link](https://drive.google.com/file/d/1cgycTQlYXpTh26gB9FTnthE7AvruV8hd/view?usp=sharing).
8
+
9
+ To export the ONNX version of MODNet (assuming you are currently in project root directory):
10
+ 1. Download the pre-trained **Image Matting Model** from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing) and put the model into the folder `MODNet/pretrained/`.
11
+
12
+ 2. Install all dependencies by:
13
+ ```
14
+ pip install -r onnx/requirements.txt
15
+ ```
16
+
17
+ 3. Export the ONNX version of MODNet by:
18
+ ```shell
19
+ python -m onnx.export_onnx \
20
+ --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
21
+ --output-path=pretrained/modnet_photographic_portrait_matting.onnx
22
+ ```
23
+
24
+ 4. Inference the ONNX model by:
25
+ ```shell
26
+ python -m onnx.inference_onnx \
27
+ --image-path=$FILENAME_OF_INPUT_IMAGE$ \
28
+ --output-path=$FILENAME_OF_OUTPUT_MATTE$ \
29
+ --model-path=pretrained/modnet_photographic_portrait_matting.onnx
30
+ ```
MODNet/onnx/__init__.py ADDED
File without changes
MODNet/onnx/export_onnx.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Export ONNX model of MODNet with:
3
+ input shape: (batch_size, 3, height, width)
4
+ output shape: (batch_size, 1, height, width)
5
+
6
+ Arguments:
7
+ --ckpt-path: path of the checkpoint that will be converted
8
+ --output-path: path for saving the ONNX model
9
+
10
+ Example:
11
+ python export_onnx.py \
12
+ --ckpt-path=modnet_photographic_portrait_matting.ckpt \
13
+ --output-path=modnet_photographic_portrait_matting.onnx
14
+ """
15
+
16
+ import os
17
+ import argparse
18
+
19
+ import torch
20
+ import torch.nn as nn
21
+ from torch.autograd import Variable
22
+
23
+ from . import modnet_onnx
24
+
25
+
26
+ if __name__ == '__main__':
27
+ # define cmd arguments
28
+ parser = argparse.ArgumentParser()
29
+ parser.add_argument('--ckpt-path', type=str, required=True, help='path of the checkpoint that will be converted')
30
+ parser.add_argument('--output-path', type=str, required=True, help='path for saving the ONNX model')
31
+ args = parser.parse_args()
32
+
33
+ # check input arguments
34
+ if not os.path.exists(args.ckpt_path):
35
+ print('Cannot find checkpoint path: {0}'.format(args.ckpt_path))
36
+ exit()
37
+
38
+ # define model & load checkpoint
39
+ modnet = modnet_onnx.MODNet(backbone_pretrained=False)
40
+ modnet = nn.DataParallel(modnet).cuda()
41
+ state_dict = torch.load(args.ckpt_path)
42
+ modnet.load_state_dict(state_dict)
43
+ modnet.eval()
44
+
45
+ # prepare dummy_input
46
+ batch_size = 1
47
+ height = 512
48
+ width = 512
49
+ dummy_input = Variable(torch.randn(batch_size, 3, height, width)).cuda()
50
+
51
+ # export to onnx model
52
+ torch.onnx.export(
53
+ modnet.module, dummy_input, args.output_path, export_params = True,
54
+ input_names = ['input'], output_names = ['output'],
55
+ dynamic_axes = {'input': {0:'batch_size', 2:'height', 3:'width'}, 'output': {0: 'batch_size', 2: 'height', 3: 'width'}})
MODNet/onnx/inference_onnx.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Inference ONNX model of MODNet
3
+
4
+ Arguments:
5
+ --image-path: path of the input image (a file)
6
+ --output-path: path for saving the predicted alpha matte (a file)
7
+ --model-path: path of the ONNX model
8
+
9
+ Example:
10
+ python inference_onnx.py \
11
+ --image-path=demo.jpg --output-path=matte.png --model-path=modnet.onnx
12
+ """
13
+
14
+ import os
15
+ import cv2
16
+ import argparse
17
+ import numpy as np
18
+ from PIL import Image
19
+
20
+ import onnx
21
+ import onnxruntime
22
+
23
+
24
+ if __name__ == '__main__':
25
+ # define cmd arguments
26
+ parser = argparse.ArgumentParser()
27
+ parser.add_argument('--image-path', type=str, help='path of the input image (a file)')
28
+ parser.add_argument('--output-path', type=str, help='paht for saving the predicted alpha matte (a file)')
29
+ parser.add_argument('--model-path', type=str, help='path of the ONNX model')
30
+ args = parser.parse_args()
31
+
32
+ # check input arguments
33
+ if not os.path.exists(args.image_path):
34
+ print('Cannot find the input image: {0}'.format(args.image_path))
35
+ exit()
36
+ if not os.path.exists(args.model_path):
37
+ print('Cannot find the ONXX model: {0}'.format(args.model_path))
38
+ exit()
39
+
40
+ ref_size = 512
41
+
42
+ # Get x_scale_factor & y_scale_factor to resize image
43
+ def get_scale_factor(im_h, im_w, ref_size):
44
+
45
+ if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
46
+ if im_w >= im_h:
47
+ im_rh = ref_size
48
+ im_rw = int(im_w / im_h * ref_size)
49
+ elif im_w < im_h:
50
+ im_rw = ref_size
51
+ im_rh = int(im_h / im_w * ref_size)
52
+ else:
53
+ im_rh = im_h
54
+ im_rw = im_w
55
+
56
+ im_rw = im_rw - im_rw % 32
57
+ im_rh = im_rh - im_rh % 32
58
+
59
+ x_scale_factor = im_rw / im_w
60
+ y_scale_factor = im_rh / im_h
61
+
62
+ return x_scale_factor, y_scale_factor
63
+
64
+ ##############################################
65
+ # Main Inference part
66
+ ##############################################
67
+
68
+ # read image
69
+ im = cv2.imread(args.image_path)
70
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
71
+
72
+ # unify image channels to 3
73
+ if len(im.shape) == 2:
74
+ im = im[:, :, None]
75
+ if im.shape[2] == 1:
76
+ im = np.repeat(im, 3, axis=2)
77
+ elif im.shape[2] == 4:
78
+ im = im[:, :, 0:3]
79
+
80
+ # normalize values to scale it between -1 to 1
81
+ im = (im - 127.5) / 127.5
82
+
83
+ im_h, im_w, im_c = im.shape
84
+ x, y = get_scale_factor(im_h, im_w, ref_size)
85
+
86
+ # resize image
87
+ im = cv2.resize(im, None, fx = x, fy = y, interpolation = cv2.INTER_AREA)
88
+
89
+ # prepare input shape
90
+ im = np.transpose(im)
91
+ im = np.swapaxes(im, 1, 2)
92
+ im = np.expand_dims(im, axis = 0).astype('float32')
93
+
94
+ # Initialize session and get prediction
95
+ session = onnxruntime.InferenceSession(args.model_path, None)
96
+ input_name = session.get_inputs()[0].name
97
+ output_name = session.get_outputs()[0].name
98
+ result = session.run([output_name], {input_name: im})
99
+
100
+ # refine matte
101
+ matte = (np.squeeze(result[0]) * 255).astype('uint8')
102
+ matte = cv2.resize(matte, dsize=(im_w, im_h), interpolation = cv2.INTER_AREA)
103
+
104
+ cv2.imwrite(args.output_path, matte)
MODNet/onnx/modnet_onnx.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains a modified version of the original file `modnet.py` without
3
+ `pred_semantic` and `pred_details` as these both returns None when `inference=True`
4
+
5
+ And it does not contain `inference` argument which will make it easier to
6
+ convert checkpoint to ONNX model.
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+
13
+ from src.models.backbones import SUPPORTED_BACKBONES
14
+
15
+
16
+ #------------------------------------------------------------------------------
17
+ # MODNet Basic Modules
18
+ #------------------------------------------------------------------------------
19
+
20
+ class IBNorm(nn.Module):
21
+ """ Combine Instance Norm and Batch Norm into One Layer
22
+ """
23
+
24
+ def __init__(self, in_channels):
25
+ super(IBNorm, self).__init__()
26
+ in_channels = in_channels
27
+ self.bnorm_channels = int(in_channels / 2)
28
+ self.inorm_channels = in_channels - self.bnorm_channels
29
+
30
+ self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
31
+ self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
32
+
33
+ def forward(self, x):
34
+ bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
35
+ in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
36
+
37
+ return torch.cat((bn_x, in_x), 1)
38
+
39
+
40
+ class Conv2dIBNormRelu(nn.Module):
41
+ """ Convolution + IBNorm + ReLu
42
+ """
43
+
44
+ def __init__(self, in_channels, out_channels, kernel_size,
45
+ stride=1, padding=0, dilation=1, groups=1, bias=True,
46
+ with_ibn=True, with_relu=True):
47
+ super(Conv2dIBNormRelu, self).__init__()
48
+
49
+ layers = [
50
+ nn.Conv2d(in_channels, out_channels, kernel_size,
51
+ stride=stride, padding=padding, dilation=dilation,
52
+ groups=groups, bias=bias)
53
+ ]
54
+
55
+ if with_ibn:
56
+ layers.append(IBNorm(out_channels))
57
+ if with_relu:
58
+ layers.append(nn.ReLU(inplace=True))
59
+
60
+ self.layers = nn.Sequential(*layers)
61
+
62
+ def forward(self, x):
63
+ return self.layers(x)
64
+
65
+
66
+ class SEBlock(nn.Module):
67
+ """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
68
+ """
69
+
70
+ def __init__(self, in_channels, out_channels, reduction=1):
71
+ super(SEBlock, self).__init__()
72
+ self.pool = nn.AdaptiveAvgPool2d(1)
73
+ self.fc = nn.Sequential(
74
+ nn.Linear(in_channels, int(in_channels // reduction), bias=False),
75
+ nn.ReLU(inplace=True),
76
+ nn.Linear(int(in_channels // reduction), out_channels, bias=False),
77
+ nn.Sigmoid()
78
+ )
79
+
80
+ def forward(self, x):
81
+ b, c, _, _ = x.size()
82
+ w = self.pool(x).view(b, c)
83
+ w = self.fc(w).view(b, c, 1, 1)
84
+
85
+ return x * w.expand_as(x)
86
+
87
+
88
+ #------------------------------------------------------------------------------
89
+ # MODNet Branches
90
+ #------------------------------------------------------------------------------
91
+
92
+ class LRBranch(nn.Module):
93
+ """ Low Resolution Branch of MODNet
94
+ """
95
+
96
+ def __init__(self, backbone):
97
+ super(LRBranch, self).__init__()
98
+
99
+ enc_channels = backbone.enc_channels
100
+
101
+ self.backbone = backbone
102
+ self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
103
+ self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
104
+ self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
105
+ self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
106
+
107
+ def forward(self, img):
108
+ enc_features = self.backbone.forward(img)
109
+ enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
110
+
111
+ enc32x = self.se_block(enc32x)
112
+ lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
113
+ lr16x = self.conv_lr16x(lr16x)
114
+ lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
115
+ lr8x = self.conv_lr8x(lr8x)
116
+
117
+ return lr8x, [enc2x, enc4x]
118
+
119
+
120
+ class HRBranch(nn.Module):
121
+ """ High Resolution Branch of MODNet
122
+ """
123
+
124
+ def __init__(self, hr_channels, enc_channels):
125
+ super(HRBranch, self).__init__()
126
+
127
+ self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
128
+ self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
129
+
130
+ self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
131
+ self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
132
+
133
+ self.conv_hr4x = nn.Sequential(
134
+ Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
135
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
136
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
137
+ )
138
+
139
+ self.conv_hr2x = nn.Sequential(
140
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
141
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
142
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
143
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
144
+ )
145
+
146
+ self.conv_hr = nn.Sequential(
147
+ Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
148
+ Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
149
+ )
150
+
151
+ def forward(self, img, enc2x, enc4x, lr8x):
152
+ img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
153
+ img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
154
+
155
+ enc2x = self.tohr_enc2x(enc2x)
156
+ hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
157
+
158
+ enc4x = self.tohr_enc4x(enc4x)
159
+ hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
160
+
161
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
162
+ hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
163
+
164
+ hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
165
+ hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
166
+
167
+ return hr2x
168
+
169
+
170
+ class FusionBranch(nn.Module):
171
+ """ Fusion Branch of MODNet
172
+ """
173
+
174
+ def __init__(self, hr_channels, enc_channels):
175
+ super(FusionBranch, self).__init__()
176
+ self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
177
+
178
+ self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
179
+ self.conv_f = nn.Sequential(
180
+ Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
181
+ Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
182
+ )
183
+
184
+ def forward(self, img, lr8x, hr2x):
185
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
186
+ lr4x = self.conv_lr4x(lr4x)
187
+ lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
188
+
189
+ f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
190
+ f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
191
+ f = self.conv_f(torch.cat((f, img), dim=1))
192
+ pred_matte = torch.sigmoid(f)
193
+
194
+ return pred_matte
195
+
196
+
197
+ #------------------------------------------------------------------------------
198
+ # MODNet
199
+ #------------------------------------------------------------------------------
200
+
201
+ class MODNet(nn.Module):
202
+ """ Architecture of MODNet
203
+ """
204
+
205
+ def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
206
+ super(MODNet, self).__init__()
207
+
208
+ self.in_channels = in_channels
209
+ self.hr_channels = hr_channels
210
+ self.backbone_arch = backbone_arch
211
+ self.backbone_pretrained = backbone_pretrained
212
+
213
+ self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
214
+
215
+ self.lr_branch = LRBranch(self.backbone)
216
+ self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
217
+ self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
218
+
219
+ for m in self.modules():
220
+ if isinstance(m, nn.Conv2d):
221
+ self._init_conv(m)
222
+ elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
223
+ self._init_norm(m)
224
+
225
+ if self.backbone_pretrained:
226
+ self.backbone.load_pretrained_ckpt()
227
+
228
+ def forward(self, img):
229
+ lr8x, [enc2x, enc4x] = self.lr_branch(img)
230
+ hr2x = self.hr_branch(img, enc2x, enc4x, lr8x)
231
+ pred_matte = self.f_branch(img, lr8x, hr2x)
232
+
233
+ return pred_matte
234
+
235
+ def freeze_norm(self):
236
+ norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
237
+ for m in self.modules():
238
+ for n in norm_types:
239
+ if isinstance(m, n):
240
+ m.eval()
241
+ continue
242
+
243
+ def _init_conv(self, conv):
244
+ nn.init.kaiming_uniform_(
245
+ conv.weight, a=0, mode='fan_in', nonlinearity='relu')
246
+ if conv.bias is not None:
247
+ nn.init.constant_(conv.bias, 0)
248
+
249
+ def _init_norm(self, norm):
250
+ if norm.weight is not None:
251
+ nn.init.constant_(norm.weight, 1)
252
+ nn.init.constant_(norm.bias, 0)
MODNet/onnx/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ onnx==1.8.1
2
+ onnxruntime==1.6.0
3
+ opencv-python==4.5.1.48
4
+ torch==1.7.1
MODNet/pretrained/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ## MODNet - Pre-Trained Models
2
+ This folder is used to save the official pre-trained models of MODNet. You can download them from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing).
MODNet/pretrained/modnet_photographic_portrait_matting.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c22235f0925deba15d4d63e53afcb654c47055bbcd98f56e393ab2584007ed8
3
+ size 26255603
MODNet/src/__init__.py ADDED
File without changes
MODNet/src/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (128 Bytes). View file
 
MODNet/src/models/__init__.py ADDED
File without changes
MODNet/src/models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (135 Bytes). View file
 
MODNet/src/models/__pycache__/modnet.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
MODNet/src/models/backbones/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .wrapper import *
2
+
3
+
4
+ #------------------------------------------------------------------------------
5
+ # Replaceable Backbones
6
+ #------------------------------------------------------------------------------
7
+
8
+ SUPPORTED_BACKBONES = {
9
+ 'mobilenetv2': MobileNetV2Backbone,
10
+ }
MODNet/src/models/backbones/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (253 Bytes). View file
 
MODNet/src/models/backbones/__pycache__/mobilenetv2.cpython-312.pyc ADDED
Binary file (9.39 kB). View file
 
MODNet/src/models/backbones/__pycache__/wrapper.cpython-312.pyc ADDED
Binary file (4.46 kB). View file
 
MODNet/src/models/backbones/mobilenetv2.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
2
+
3
+ import math
4
+ import json
5
+ from functools import reduce
6
+
7
+ import torch
8
+ from torch import nn
9
+
10
+
11
+ #------------------------------------------------------------------------------
12
+ # Useful functions
13
+ #------------------------------------------------------------------------------
14
+
15
+ def _make_divisible(v, divisor, min_value=None):
16
+ if min_value is None:
17
+ min_value = divisor
18
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
19
+ # Make sure that round down does not go down by more than 10%.
20
+ if new_v < 0.9 * v:
21
+ new_v += divisor
22
+ return new_v
23
+
24
+
25
+ def conv_bn(inp, oup, stride):
26
+ return nn.Sequential(
27
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
28
+ nn.BatchNorm2d(oup),
29
+ nn.ReLU6(inplace=True)
30
+ )
31
+
32
+
33
+ def conv_1x1_bn(inp, oup):
34
+ return nn.Sequential(
35
+ nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
36
+ nn.BatchNorm2d(oup),
37
+ nn.ReLU6(inplace=True)
38
+ )
39
+
40
+
41
+ #------------------------------------------------------------------------------
42
+ # Class of Inverted Residual block
43
+ #------------------------------------------------------------------------------
44
+
45
+ class InvertedResidual(nn.Module):
46
+ def __init__(self, inp, oup, stride, expansion, dilation=1):
47
+ super(InvertedResidual, self).__init__()
48
+ self.stride = stride
49
+ assert stride in [1, 2]
50
+
51
+ hidden_dim = round(inp * expansion)
52
+ self.use_res_connect = self.stride == 1 and inp == oup
53
+
54
+ if expansion == 1:
55
+ self.conv = nn.Sequential(
56
+ # dw
57
+ nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
58
+ nn.BatchNorm2d(hidden_dim),
59
+ nn.ReLU6(inplace=True),
60
+ # pw-linear
61
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
62
+ nn.BatchNorm2d(oup),
63
+ )
64
+ else:
65
+ self.conv = nn.Sequential(
66
+ # pw
67
+ nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
68
+ nn.BatchNorm2d(hidden_dim),
69
+ nn.ReLU6(inplace=True),
70
+ # dw
71
+ nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
72
+ nn.BatchNorm2d(hidden_dim),
73
+ nn.ReLU6(inplace=True),
74
+ # pw-linear
75
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
76
+ nn.BatchNorm2d(oup),
77
+ )
78
+
79
+ def forward(self, x):
80
+ if self.use_res_connect:
81
+ return x + self.conv(x)
82
+ else:
83
+ return self.conv(x)
84
+
85
+
86
+ #------------------------------------------------------------------------------
87
+ # Class of MobileNetV2
88
+ #------------------------------------------------------------------------------
89
+
90
+ class MobileNetV2(nn.Module):
91
+ def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
92
+ super(MobileNetV2, self).__init__()
93
+ self.in_channels = in_channels
94
+ self.num_classes = num_classes
95
+ input_channel = 32
96
+ last_channel = 1280
97
+ interverted_residual_setting = [
98
+ # t, c, n, s
99
+ [1 , 16, 1, 1],
100
+ [expansion, 24, 2, 2],
101
+ [expansion, 32, 3, 2],
102
+ [expansion, 64, 4, 2],
103
+ [expansion, 96, 3, 1],
104
+ [expansion, 160, 3, 2],
105
+ [expansion, 320, 1, 1],
106
+ ]
107
+
108
+ # building first layer
109
+ input_channel = _make_divisible(input_channel*alpha, 8)
110
+ self.last_channel = _make_divisible(last_channel*alpha, 8) if alpha > 1.0 else last_channel
111
+ self.features = [conv_bn(self.in_channels, input_channel, 2)]
112
+
113
+ # building inverted residual blocks
114
+ for t, c, n, s in interverted_residual_setting:
115
+ output_channel = _make_divisible(int(c*alpha), 8)
116
+ for i in range(n):
117
+ if i == 0:
118
+ self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
119
+ else:
120
+ self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
121
+ input_channel = output_channel
122
+
123
+ # building last several layers
124
+ self.features.append(conv_1x1_bn(input_channel, self.last_channel))
125
+
126
+ # make it nn.Sequential
127
+ self.features = nn.Sequential(*self.features)
128
+
129
+ # building classifier
130
+ if self.num_classes is not None:
131
+ self.classifier = nn.Sequential(
132
+ nn.Dropout(0.2),
133
+ nn.Linear(self.last_channel, num_classes),
134
+ )
135
+
136
+ # Initialize weights
137
+ self._init_weights()
138
+
139
+ def forward(self, x):
140
+ # Stage1
141
+ x = self.features[0](x)
142
+ x = self.features[1](x)
143
+ # Stage2
144
+ x = self.features[2](x)
145
+ x = self.features[3](x)
146
+ # Stage3
147
+ x = self.features[4](x)
148
+ x = self.features[5](x)
149
+ x = self.features[6](x)
150
+ # Stage4
151
+ x = self.features[7](x)
152
+ x = self.features[8](x)
153
+ x = self.features[9](x)
154
+ x = self.features[10](x)
155
+ x = self.features[11](x)
156
+ x = self.features[12](x)
157
+ x = self.features[13](x)
158
+ # Stage5
159
+ x = self.features[14](x)
160
+ x = self.features[15](x)
161
+ x = self.features[16](x)
162
+ x = self.features[17](x)
163
+ x = self.features[18](x)
164
+
165
+ # Classification
166
+ if self.num_classes is not None:
167
+ x = x.mean(dim=(2,3))
168
+ x = self.classifier(x)
169
+
170
+ # Output
171
+ return x
172
+
173
+ def _load_pretrained_model(self, pretrained_file):
174
+ pretrain_dict = torch.load(pretrained_file, map_location='cpu')
175
+ model_dict = {}
176
+ state_dict = self.state_dict()
177
+ print("[MobileNetV2] Loading pretrained model...")
178
+ for k, v in pretrain_dict.items():
179
+ if k in state_dict:
180
+ model_dict[k] = v
181
+ else:
182
+ print(k, "is ignored")
183
+ state_dict.update(model_dict)
184
+ self.load_state_dict(state_dict)
185
+
186
+ def _init_weights(self):
187
+ for m in self.modules():
188
+ if isinstance(m, nn.Conv2d):
189
+ n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
190
+ m.weight.data.normal_(0, math.sqrt(2. / n))
191
+ if m.bias is not None:
192
+ m.bias.data.zero_()
193
+ elif isinstance(m, nn.BatchNorm2d):
194
+ m.weight.data.fill_(1)
195
+ m.bias.data.zero_()
196
+ elif isinstance(m, nn.Linear):
197
+ n = m.weight.size(1)
198
+ m.weight.data.normal_(0, 0.01)
199
+ m.bias.data.zero_()
MODNet/src/models/backbones/wrapper.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from functools import reduce
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from .mobilenetv2 import MobileNetV2
8
+
9
+
10
+ class BaseBackbone(nn.Module):
11
+ """ Superclass of Replaceable Backbone Model for Semantic Estimation
12
+ """
13
+
14
+ def __init__(self, in_channels):
15
+ super(BaseBackbone, self).__init__()
16
+ self.in_channels = in_channels
17
+
18
+ self.model = None
19
+ self.enc_channels = []
20
+
21
+ def forward(self, x):
22
+ raise NotImplementedError
23
+
24
+ def load_pretrained_ckpt(self):
25
+ raise NotImplementedError
26
+
27
+
28
+ class MobileNetV2Backbone(BaseBackbone):
29
+ """ MobileNetV2 Backbone
30
+ """
31
+
32
+ def __init__(self, in_channels):
33
+ super(MobileNetV2Backbone, self).__init__(in_channels)
34
+
35
+ self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
36
+ self.enc_channels = [16, 24, 32, 96, 1280]
37
+
38
+ def forward(self, x):
39
+ # x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
40
+ x = self.model.features[0](x)
41
+ x = self.model.features[1](x)
42
+ enc2x = x
43
+
44
+ # x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
45
+ x = self.model.features[2](x)
46
+ x = self.model.features[3](x)
47
+ enc4x = x
48
+
49
+ # x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
50
+ x = self.model.features[4](x)
51
+ x = self.model.features[5](x)
52
+ x = self.model.features[6](x)
53
+ enc8x = x
54
+
55
+ # x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
56
+ x = self.model.features[7](x)
57
+ x = self.model.features[8](x)
58
+ x = self.model.features[9](x)
59
+ x = self.model.features[10](x)
60
+ x = self.model.features[11](x)
61
+ x = self.model.features[12](x)
62
+ x = self.model.features[13](x)
63
+ enc16x = x
64
+
65
+ # x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
66
+ x = self.model.features[14](x)
67
+ x = self.model.features[15](x)
68
+ x = self.model.features[16](x)
69
+ x = self.model.features[17](x)
70
+ x = self.model.features[18](x)
71
+ enc32x = x
72
+ return [enc2x, enc4x, enc8x, enc16x, enc32x]
73
+
74
+ def load_pretrained_ckpt(self):
75
+ # the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
76
+ ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
77
+ if not os.path.exists(ckpt_path):
78
+ print('cannot find the pretrained mobilenetv2 backbone')
79
+ exit()
80
+
81
+ ckpt = torch.load(ckpt_path)
82
+ self.model.load_state_dict(ckpt)
MODNet/src/trainer.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import scipy
3
+ import numpy as np
4
+ from scipy.ndimage import grey_dilation, grey_erosion
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+
10
+
11
+ __all__ = [
12
+ 'supervised_training_iter',
13
+ 'soc_adaptation_iter',
14
+ ]
15
+
16
+
17
+ # ----------------------------------------------------------------------------------
18
+ # Tool Classes/Functions
19
+ # ----------------------------------------------------------------------------------
20
+
21
+ class GaussianBlurLayer(nn.Module):
22
+ """ Add Gaussian Blur to a 4D tensors
23
+ This layer takes a 4D tensor of {N, C, H, W} as input.
24
+ The Gaussian blur will be performed in given channel number (C) splitly.
25
+ """
26
+
27
+ def __init__(self, channels, kernel_size):
28
+ """
29
+ Arguments:
30
+ channels (int): Channel for input tensor
31
+ kernel_size (int): Size of the kernel used in blurring
32
+ """
33
+
34
+ super(GaussianBlurLayer, self).__init__()
35
+ self.channels = channels
36
+ self.kernel_size = kernel_size
37
+ assert self.kernel_size % 2 != 0
38
+
39
+ self.op = nn.Sequential(
40
+ nn.ReflectionPad2d(math.floor(self.kernel_size / 2)),
41
+ nn.Conv2d(channels, channels, self.kernel_size,
42
+ stride=1, padding=0, bias=None, groups=channels)
43
+ )
44
+
45
+ self._init_kernel()
46
+
47
+ def forward(self, x):
48
+ """
49
+ Arguments:
50
+ x (torch.Tensor): input 4D tensor
51
+ Returns:
52
+ torch.Tensor: Blurred version of the input
53
+ """
54
+
55
+ if not len(list(x.shape)) == 4:
56
+ print('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
57
+ exit()
58
+ elif not x.shape[1] == self.channels:
59
+ print('In \'GaussianBlurLayer\', the required channel ({0}) is'
60
+ 'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
61
+ exit()
62
+
63
+ return self.op(x)
64
+
65
+ def _init_kernel(self):
66
+ sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
67
+
68
+ n = np.zeros((self.kernel_size, self.kernel_size))
69
+ i = math.floor(self.kernel_size / 2)
70
+ n[i, i] = 1
71
+ kernel = scipy.ndimage.gaussian_filter(n, sigma)
72
+
73
+ for name, param in self.named_parameters():
74
+ param.data.copy_(torch.from_numpy(kernel))
75
+
76
+ # ----------------------------------------------------------------------------------
77
+
78
+
79
+ # ----------------------------------------------------------------------------------
80
+ # MODNet Training Functions
81
+ # ----------------------------------------------------------------------------------
82
+
83
+ blurer = GaussianBlurLayer(1, 3).cuda()
84
+
85
+
86
+ def supervised_training_iter(
87
+ modnet, optimizer, image, trimap, gt_matte,
88
+ semantic_scale=10.0, detail_scale=10.0, matte_scale=1.0):
89
+ """ Supervised training iteration of MODNet
90
+ This function trains MODNet for one iteration in a labeled dataset.
91
+
92
+ Arguments:
93
+ modnet (torch.nn.Module): instance of MODNet
94
+ optimizer (torch.optim.Optimizer): optimizer for supervised training
95
+ image (torch.autograd.Variable): input RGB image
96
+ its pixel values should be normalized
97
+ trimap (torch.autograd.Variable): trimap used to calculate the losses
98
+ its pixel values can be 0, 0.5, or 1
99
+ (foreground=1, background=0, unknown=0.5)
100
+ gt_matte (torch.autograd.Variable): ground truth alpha matte
101
+ its pixel values are between [0, 1]
102
+ semantic_scale (float): scale of the semantic loss
103
+ NOTE: please adjust according to your dataset
104
+ detail_scale (float): scale of the detail loss
105
+ NOTE: please adjust according to your dataset
106
+ matte_scale (float): scale of the matte loss
107
+ NOTE: please adjust according to your dataset
108
+
109
+ Returns:
110
+ semantic_loss (torch.Tensor): loss of the semantic estimation [Low-Resolution (LR) Branch]
111
+ detail_loss (torch.Tensor): loss of the detail prediction [High-Resolution (HR) Branch]
112
+ matte_loss (torch.Tensor): loss of the semantic-detail fusion [Fusion Branch]
113
+
114
+ Example:
115
+ import torch
116
+ from src.models.modnet import MODNet
117
+ from src.trainer import supervised_training_iter
118
+
119
+ bs = 16 # batch size
120
+ lr = 0.01 # learn rate
121
+ epochs = 40 # total epochs
122
+
123
+ modnet = torch.nn.DataParallel(MODNet()).cuda()
124
+ optimizer = torch.optim.SGD(modnet.parameters(), lr=lr, momentum=0.9)
125
+ lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(0.25 * epochs), gamma=0.1)
126
+
127
+ dataloader = CREATE_YOUR_DATALOADER(bs) # NOTE: please finish this function
128
+
129
+ for epoch in range(0, epochs):
130
+ for idx, (image, trimap, gt_matte) in enumerate(dataloader):
131
+ semantic_loss, detail_loss, matte_loss = \
132
+ supervised_training_iter(modnet, optimizer, image, trimap, gt_matte)
133
+ lr_scheduler.step()
134
+ """
135
+
136
+ global blurer
137
+
138
+ # set the model to train mode and clear the optimizer
139
+ modnet.train()
140
+ optimizer.zero_grad()
141
+
142
+ # forward the model
143
+ pred_semantic, pred_detail, pred_matte = modnet(image, False)
144
+
145
+ # calculate the boundary mask from the trimap
146
+ boundaries = (trimap < 0.5) + (trimap > 0.5)
147
+
148
+ # calculate the semantic loss
149
+ gt_semantic = F.interpolate(gt_matte, scale_factor=1/16, mode='bilinear')
150
+ gt_semantic = blurer(gt_semantic)
151
+ semantic_loss = torch.mean(F.mse_loss(pred_semantic, gt_semantic))
152
+ semantic_loss = semantic_scale * semantic_loss
153
+
154
+ # calculate the detail loss
155
+ pred_boundary_detail = torch.where(boundaries, trimap, pred_detail)
156
+ gt_detail = torch.where(boundaries, trimap, gt_matte)
157
+ detail_loss = torch.mean(F.l1_loss(pred_boundary_detail, gt_detail))
158
+ detail_loss = detail_scale * detail_loss
159
+
160
+ # calculate the matte loss
161
+ pred_boundary_matte = torch.where(boundaries, trimap, pred_matte)
162
+ matte_l1_loss = F.l1_loss(pred_matte, gt_matte) + 4.0 * F.l1_loss(pred_boundary_matte, gt_matte)
163
+ matte_compositional_loss = F.l1_loss(image * pred_matte, image * gt_matte) \
164
+ + 4.0 * F.l1_loss(image * pred_boundary_matte, image * gt_matte)
165
+ matte_loss = torch.mean(matte_l1_loss + matte_compositional_loss)
166
+ matte_loss = matte_scale * matte_loss
167
+
168
+ # calculate the final loss, backward the loss, and update the model
169
+ loss = semantic_loss + detail_loss + matte_loss
170
+ loss.backward()
171
+ optimizer.step()
172
+
173
+ # for test
174
+ return semantic_loss, detail_loss, matte_loss
175
+
176
+
177
+ def soc_adaptation_iter(
178
+ modnet, backup_modnet, optimizer, image,
179
+ soc_semantic_scale=100.0, soc_detail_scale=1.0):
180
+ """ Self-Supervised sub-objective consistency (SOC) adaptation iteration of MODNet
181
+ This function fine-tunes MODNet for one iteration in an unlabeled dataset.
182
+ Note that SOC can only fine-tune a converged MODNet, i.e., MODNet that has been
183
+ trained in a labeled dataset.
184
+
185
+ Arguments:
186
+ modnet (torch.nn.Module): instance of MODNet
187
+ backup_modnet (torch.nn.Module): backup of the trained MODNet
188
+ optimizer (torch.optim.Optimizer): optimizer for self-supervised SOC
189
+ image (torch.autograd.Variable): input RGB image
190
+ its pixel values should be normalized
191
+ soc_semantic_scale (float): scale of the SOC semantic loss
192
+ NOTE: please adjust according to your dataset
193
+ soc_detail_scale (float): scale of the SOC detail loss
194
+ NOTE: please adjust according to your dataset
195
+
196
+ Returns:
197
+ soc_semantic_loss (torch.Tensor): loss of the semantic SOC
198
+ soc_detail_loss (torch.Tensor): loss of the detail SOC
199
+
200
+ Example:
201
+ import copy
202
+ import torch
203
+ from src.models.modnet import MODNet
204
+ from src.trainer import soc_adaptation_iter
205
+
206
+ bs = 1 # batch size
207
+ lr = 0.00001 # learn rate
208
+ epochs = 10 # total epochs
209
+
210
+ modnet = torch.nn.DataParallel(MODNet()).cuda()
211
+ modnet = LOAD_TRAINED_CKPT() # NOTE: please finish this function
212
+
213
+ optimizer = torch.optim.Adam(modnet.parameters(), lr=lr, betas=(0.9, 0.99))
214
+ dataloader = CREATE_YOUR_DATALOADER(bs) # NOTE: please finish this function
215
+
216
+ for epoch in range(0, epochs):
217
+ backup_modnet = copy.deepcopy(modnet)
218
+ for idx, (image) in enumerate(dataloader):
219
+ soc_semantic_loss, soc_detail_loss = \
220
+ soc_adaptation_iter(modnet, backup_modnet, optimizer, image)
221
+ """
222
+
223
+ global blurer
224
+
225
+ # set the backup model to eval mode
226
+ backup_modnet.eval()
227
+
228
+ # set the main model to train mode and freeze its norm layers
229
+ modnet.train()
230
+ modnet.module.freeze_norm()
231
+
232
+ # clear the optimizer
233
+ optimizer.zero_grad()
234
+
235
+ # forward the main model
236
+ pred_semantic, pred_detail, pred_matte = modnet(image, False)
237
+
238
+ # forward the backup model
239
+ with torch.no_grad():
240
+ _, pred_backup_detail, pred_backup_matte = backup_modnet(image, False)
241
+
242
+ # calculate the boundary mask from `pred_matte` and `pred_semantic`
243
+ pred_matte_fg = (pred_matte.detach() > 0.1).float()
244
+ pred_semantic_fg = (pred_semantic.detach() > 0.1).float()
245
+ pred_semantic_fg = F.interpolate(pred_semantic_fg, scale_factor=16, mode='bilinear')
246
+ pred_fg = pred_matte_fg * pred_semantic_fg
247
+
248
+ n, c, h, w = pred_matte.shape
249
+ np_pred_fg = pred_fg.data.cpu().numpy()
250
+ np_boundaries = np.zeros([n, c, h, w])
251
+ for sdx in range(0, n):
252
+ sample_np_boundaries = np_boundaries[sdx, 0, ...]
253
+ sample_np_pred_fg = np_pred_fg[sdx, 0, ...]
254
+
255
+ side = int((h + w) / 2 * 0.05)
256
+ dilated = grey_dilation(sample_np_pred_fg, size=(side, side))
257
+ eroded = grey_erosion(sample_np_pred_fg, size=(side, side))
258
+
259
+ sample_np_boundaries[np.where(dilated - eroded != 0)] = 1
260
+ np_boundaries[sdx, 0, ...] = sample_np_boundaries
261
+
262
+ boundaries = torch.tensor(np_boundaries).float().cuda()
263
+
264
+ # sub-objectives consistency between `pred_semantic` and `pred_matte`
265
+ # generate pseudo ground truth for `pred_semantic`
266
+ downsampled_pred_matte = blurer(F.interpolate(pred_matte, scale_factor=1/16, mode='bilinear'))
267
+ pseudo_gt_semantic = downsampled_pred_matte.detach()
268
+ pseudo_gt_semantic = pseudo_gt_semantic * (pseudo_gt_semantic > 0.01).float()
269
+
270
+ # generate pseudo ground truth for `pred_matte`
271
+ pseudo_gt_matte = pred_semantic.detach()
272
+ pseudo_gt_matte = pseudo_gt_matte * (pseudo_gt_matte > 0.01).float()
273
+
274
+ # calculate the SOC semantic loss
275
+ soc_semantic_loss = F.mse_loss(pred_semantic, pseudo_gt_semantic) + F.mse_loss(downsampled_pred_matte, pseudo_gt_matte)
276
+ soc_semantic_loss = soc_semantic_scale * torch.mean(soc_semantic_loss)
277
+
278
+ # NOTE: using the formulas in our paper to calculate the following losses has similar results
279
+ # sub-objectives consistency between `pred_detail` and `pred_backup_detail` (on boundaries only)
280
+ backup_detail_loss = boundaries * F.l1_loss(pred_detail, pred_backup_detail, reduction='none')
281
+ backup_detail_loss = torch.sum(backup_detail_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
282
+ backup_detail_loss = torch.mean(backup_detail_loss)
283
+
284
+ # sub-objectives consistency between pred_matte` and `pred_backup_matte` (on boundaries only)
285
+ backup_matte_loss = boundaries * F.l1_loss(pred_matte, pred_backup_matte, reduction='none')
286
+ backup_matte_loss = torch.sum(backup_matte_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
287
+ backup_matte_loss = torch.mean(backup_matte_loss)
288
+
289
+ soc_detail_loss = soc_detail_scale * (backup_detail_loss + backup_matte_loss)
290
+
291
+ # calculate the final loss, backward the loss, and update the model
292
+ loss = soc_semantic_loss + soc_detail_loss
293
+
294
+ loss.backward()
295
+ optimizer.step()
296
+
297
+ return soc_semantic_loss, soc_detail_loss
298
+
299
+ # ----------------------------------------------------------------------------------
MODNet/torchscript/README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## MODNet - TorchScript Model
2
+
3
+ This TorchScript version of MODNet is provided by [@yarkable](https://github.com/yarkable) from the community.
4
+ Please note that the PyTorch version required for this TorchScript export function is higher than the official MODNet code (torch>=1.2.0).
5
+
6
+ You can also download the TorchScript version of the official **Image Matting Model** from [this link](https://pan.baidu.com/s/1kOmmmbG7lSZiSmDdE7CaRw) with the exextraction code `dm9e`.
7
+
8
+ To export the TorchScript version of MODNet (assuming you are currently in project root directory):
9
+ 1. Download the pre-trained **Image Matting Model** from this [link](https://drive.google.com/drive/folders/1umYmlCulvIFNaqPjwod1SayFmSRHziyR?usp=sharing) and put the model into the folder `MODNet/pretrained/`.
10
+
11
+ 2. Ensure your PyTorch version >= 1.2.0.
12
+
13
+ 3. Export the TorchScript version of MODNet by:
14
+ ```shell
15
+ python -m torchscript.export_torchscript \
16
+ --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
17
+ --output-path=pretrained/modnet_photographic_portrait_matting.torchscript
18
+ ```
MODNet/torchscript/__init__.py ADDED
File without changes
MODNet/torchscript/export_torchscript.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Export TorchScript model of MODNet
3
+
4
+ Arguments:
5
+ --ckpt-path: path of the checkpoint that will be converted
6
+ --output-path: path for saving the TorchScript model
7
+
8
+ Example:
9
+ python export_torchscript.py \
10
+ --ckpt-path=modnet_photographic_portrait_matting.ckpt \
11
+ --output-path=modnet_photographic_portrait_matting.torchscript
12
+ """
13
+
14
+ import os
15
+ import argparse
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+ import torch.nn.functional as F
20
+
21
+ from . import modnet_torchscript
22
+
23
+
24
+ if __name__ == '__main__':
25
+ # define cmd arguments
26
+ parser = argparse.ArgumentParser()
27
+ parser.add_argument('--ckpt-path', type=str, required=True, help='path of the checkpoint that will be converted')
28
+ parser.add_argument('--output-path', type=str, required=True, help='path for saving the TorchScript model')
29
+ args = parser.parse_args()
30
+
31
+ # check input arguments
32
+ if not os.path.exists(args.ckpt_path):
33
+ print(args.ckpt_path)
34
+ print('Cannot find checkpoint path: {0}'.format(args.ckpt_path))
35
+ exit()
36
+
37
+ # create MODNet and load the pre-trained ckpt
38
+ modnet = modnet_torchscript.MODNet(backbone_pretrained=False)
39
+ modnet = nn.DataParallel(modnet).cuda()
40
+ state_dict = torch.load(args.ckpt_path)
41
+ modnet.load_state_dict(state_dict)
42
+ modnet.eval()
43
+
44
+ # export to TorchScript model
45
+ scripted_model = torch.jit.script(modnet.module)
46
+ torch.jit.save(scripted_model, os.path.join(args.output_path))
MODNet/torchscript/modnet_torchscript.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains a modified version of the original file `modnet.py` without
3
+ `pred_semantic` and `pred_details` as these both returns None when `inference=True`
4
+
5
+ And it does not contain `inference` argument which will make it easier to
6
+ convert checkpoint to TorchScript model.
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+
13
+ from src.models.backbones import SUPPORTED_BACKBONES
14
+
15
+
16
+ #------------------------------------------------------------------------------
17
+ # MODNet Basic Modules
18
+ #------------------------------------------------------------------------------
19
+
20
+ class IBNorm(nn.Module):
21
+ """ Combine Instance Norm and Batch Norm into One Layer
22
+ """
23
+
24
+ def __init__(self, in_channels):
25
+ super(IBNorm, self).__init__()
26
+ in_channels = in_channels
27
+ self.bnorm_channels = int(in_channels / 2)
28
+ self.inorm_channels = in_channels - self.bnorm_channels
29
+
30
+ self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
31
+ self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
32
+
33
+ def forward(self, x):
34
+ bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
35
+ in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
36
+
37
+ return torch.cat((bn_x, in_x), 1)
38
+
39
+
40
+ class Conv2dIBNormRelu(nn.Module):
41
+ """ Convolution + IBNorm + ReLu
42
+ """
43
+
44
+ def __init__(self, in_channels, out_channels, kernel_size,
45
+ stride=1, padding=0, dilation=1, groups=1, bias=True,
46
+ with_ibn=True, with_relu=True):
47
+ super(Conv2dIBNormRelu, self).__init__()
48
+
49
+ layers = [
50
+ nn.Conv2d(in_channels, out_channels, kernel_size,
51
+ stride=stride, padding=padding, dilation=dilation,
52
+ groups=groups, bias=bias)
53
+ ]
54
+
55
+ if with_ibn:
56
+ layers.append(IBNorm(out_channels))
57
+ if with_relu:
58
+ layers.append(nn.ReLU(inplace=True))
59
+
60
+ self.layers = nn.Sequential(*layers)
61
+
62
+ def forward(self, x):
63
+ return self.layers(x)
64
+
65
+
66
+ class SEBlock(nn.Module):
67
+ """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
68
+ """
69
+
70
+ def __init__(self, in_channels, out_channels, reduction=1):
71
+ super(SEBlock, self).__init__()
72
+ self.pool = nn.AdaptiveAvgPool2d(1)
73
+ self.fc = nn.Sequential(
74
+ nn.Linear(in_channels, int(in_channels // reduction), bias=False),
75
+ nn.ReLU(inplace=True),
76
+ nn.Linear(int(in_channels // reduction), out_channels, bias=False),
77
+ nn.Sigmoid()
78
+ )
79
+
80
+ def forward(self, x):
81
+ b, c, _, _ = x.size()
82
+ w = self.pool(x).view(b, c)
83
+ w = self.fc(w).view(b, c, 1, 1)
84
+
85
+ return x * w.expand_as(x)
86
+
87
+
88
+ #------------------------------------------------------------------------------
89
+ # MODNet Branches
90
+ #------------------------------------------------------------------------------
91
+
92
+ class LRBranch(nn.Module):
93
+ """ Low Resolution Branch of MODNet
94
+ """
95
+
96
+ def __init__(self, backbone):
97
+ super(LRBranch, self).__init__()
98
+
99
+ enc_channels = backbone.enc_channels
100
+
101
+ self.backbone = backbone
102
+ self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
103
+ self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
104
+ self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
105
+ self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
106
+
107
+ def forward(self, img):
108
+ enc_features = self.backbone.forward(img)
109
+ enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
110
+
111
+ enc32x = self.se_block(enc32x)
112
+ lr16x = F.interpolate(enc32x, scale_factor=2.0, mode='bilinear', align_corners=False)
113
+ lr16x = self.conv_lr16x(lr16x)
114
+ lr8x = F.interpolate(lr16x, scale_factor=2.0, mode='bilinear', align_corners=False)
115
+ lr8x = self.conv_lr8x(lr8x)
116
+
117
+ return lr8x, enc2x, enc4x
118
+
119
+
120
+ class HRBranch(nn.Module):
121
+ """ High Resolution Branch of MODNet
122
+ """
123
+
124
+ def __init__(self, hr_channels, enc_channels):
125
+ super(HRBranch, self).__init__()
126
+
127
+ self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
128
+ self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
129
+
130
+ self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
131
+ self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
132
+
133
+ self.conv_hr4x = nn.Sequential(
134
+ Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
135
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
136
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
137
+ )
138
+
139
+ self.conv_hr2x = nn.Sequential(
140
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
141
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
142
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
143
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
144
+ )
145
+
146
+ self.conv_hr = nn.Sequential(
147
+ Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
148
+ Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
149
+ )
150
+
151
+ def forward(self, img, enc2x, enc4x, lr8x):
152
+ img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
153
+ img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
154
+
155
+ enc2x = self.tohr_enc2x(enc2x)
156
+ hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
157
+
158
+ enc4x = self.tohr_enc4x(enc4x)
159
+ hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
160
+
161
+ lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False)
162
+ hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
163
+
164
+ hr2x = F.interpolate(hr4x, scale_factor=2.0, mode='bilinear', align_corners=False)
165
+ hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
166
+
167
+ return hr2x
168
+
169
+
170
+ class FusionBranch(nn.Module):
171
+ """ Fusion Branch of MODNet
172
+ """
173
+
174
+ def __init__(self, hr_channels, enc_channels):
175
+ super(FusionBranch, self).__init__()
176
+ self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
177
+
178
+ self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
179
+ self.conv_f = nn.Sequential(
180
+ Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
181
+ Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
182
+ )
183
+
184
+ def forward(self, img, lr8x, hr2x):
185
+ lr4x = F.interpolate(lr8x, scale_factor=2.0, mode='bilinear', align_corners=False)
186
+ lr4x = self.conv_lr4x(lr4x)
187
+ lr2x = F.interpolate(lr4x, scale_factor=2.0, mode='bilinear', align_corners=False)
188
+
189
+ f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
190
+ f = F.interpolate(f2x, scale_factor=2.0, mode='bilinear', align_corners=False)
191
+ f = self.conv_f(torch.cat((f, img), dim=1))
192
+ pred_matte = torch.sigmoid(f)
193
+
194
+ return pred_matte
195
+
196
+
197
+ #------------------------------------------------------------------------------
198
+ # MODNet
199
+ #------------------------------------------------------------------------------
200
+
201
+ class MODNet(nn.Module):
202
+ """ Architecture of MODNet
203
+ """
204
+
205
+ def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
206
+ super(MODNet, self).__init__()
207
+
208
+ self.in_channels = in_channels
209
+ self.hr_channels = hr_channels
210
+ self.backbone_arch = backbone_arch
211
+ self.backbone_pretrained = backbone_pretrained
212
+
213
+ self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
214
+
215
+ self.lr_branch = LRBranch(self.backbone)
216
+ self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
217
+ self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
218
+
219
+ for m in self.modules():
220
+ if isinstance(m, nn.Conv2d):
221
+ self._init_conv(m)
222
+ elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
223
+ self._init_norm(m)
224
+
225
+ if self.backbone_pretrained:
226
+ self.backbone.load_pretrained_ckpt()
227
+
228
+ def forward(self, img):
229
+ # NOTE
230
+ lr_out = self.lr_branch(img)
231
+ lr8x = lr_out[0]
232
+ enc2x = lr_out[1]
233
+ enc4x = lr_out[2]
234
+
235
+ hr2x = self.hr_branch(img, enc2x, enc4x, lr8x)
236
+
237
+ pred_matte = self.f_branch(img, lr8x, hr2x)
238
+
239
+ return pred_matte
240
+
241
+ def freeze_norm(self):
242
+ norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
243
+ for m in self.modules():
244
+ for n in norm_types:
245
+ if isinstance(m, n):
246
+ m.eval()
247
+ continue
248
+
249
+ def _init_conv(self, conv):
250
+ nn.init.kaiming_uniform_(
251
+ conv.weight, a=0, mode='fan_in', nonlinearity='relu')
252
+ if conv.bias is not None:
253
+ nn.init.constant_(conv.bias, 0)
254
+
255
+ def _init_norm(self, norm):
256
+ if norm.weight is not None:
257
+ nn.init.constant_(norm.weight, 1)
258
+ nn.init.constant_(norm.bias, 0)
config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "model_type": "modnet",
 
 
3
  "backbone": "mobilenetv2",
4
- "architectures": ["HF_MODNet"],
5
- "auto_map": {
6
- "AutoConfig": "configuration_modnet.MODNetConfig",
7
- "AutoModel": "modeling_modnet.HF_MODNet"
8
- }
9
  }
 
1
  {
2
+ "architectures": [
3
+ "HF_MODNet"
4
+ ],
5
  "backbone": "mobilenetv2",
6
+ "dtype": "float32",
7
+ "model_type": "modnet",
8
+ "transformers_version": "4.57.6"
 
 
9
  }
modeling_modnet.py CHANGED
@@ -1,17 +1,17 @@
1
- import torch
2
- from torch import nn
3
- from transformers import PreTrainedModel, PretrainedConfig
4
- from .configuration_modnet import MODNetConfig
5
-
6
- from .MODNet.modnet import MODNet
7
-
8
-
9
- class HF_MODNet(PreTrainedModel):
10
- config_class = MODNetConfig
11
-
12
- def __init__(self, config):
13
- super().__init__(config)
14
- self.modnet = MODNet(backbone_pretrained=False)
15
-
16
- def forward(self, x, inference=True):
17
  return self.modnet(x, inference)
 
1
+ import torch
2
+ from torch import nn
3
+ from transformers import PreTrainedModel, PretrainedConfig
4
+ from .configuration_modnet import MODNetConfig
5
+
6
+ from .MODNet.modnet import MODNet
7
+
8
+
9
+ class HF_MODNet(PreTrainedModel):
10
+ config_class = MODNetConfig
11
+
12
+ def __init__(self, config):
13
+ super().__init__(config)
14
+ self.modnet = MODNet(backbone_pretrained=False)
15
+
16
+ def forward(self, x, inference=True):
17
  return self.modnet(x, inference)