Remove-Image-Background

Running

App Files Files Community

schirrmacher commited on Jul 6, 2024

Commit

92c1934

verified ·

1 Parent(s): 8285c6b

Upload folder using huggingface_hub

Browse files

Files changed (35) hide show

.DS_Store +0 -0
.gitattributes +8 -0
.gitignore +157 -0
README.md +59 -9
app.py +7 -3
dataset/training/gt/p_00a4eda7.png +0 -0
dataset/training/gt/p_00a5b702.png +0 -0
dataset/training/im/p_00a4eda7.png +3 -0
dataset/training/im/p_00a5b702.png +3 -0
dataset/validation/gt/p_00a7a27c.png +0 -0
dataset/validation/im/p_00a7a27c.png +3 -0
environment.yaml +199 -0
examples/.DS_Store +0 -0
examples/image/example01.jpeg +3 -0
examples/image/example02.jpeg +3 -0
examples/image/example03.jpeg +3 -0
examples/image/image01.png +3 -0
examples/image/image01_no_background.png +3 -0
examples/loss/gt.png +0 -0
examples/loss/loss01.png +0 -0
examples/loss/loss02.png +0 -0
examples/loss/loss03.png +0 -0
examples/loss/loss04.png +0 -0
examples/loss/loss05.png +0 -0
examples/loss/orginal.jpg +0 -0
ormbg/.DS_Store +0 -0
ormbg/basics.py +79 -0
ormbg/data_loader_cache.py +489 -0
ormbg/inference.py +110 -0
ormbg/models/ormbg.py +484 -0
ormbg/train_model.py +474 -0
utils/.DS_Store +0 -0
utils/architecture.py +4 -0
utils/loss_example.py +69 -0
utils/pth_to_onnx.py +59 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

.gitattributes CHANGED Viewed

@@ -36,3 +36,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 example1.jpeg filter=lfs diff=lfs merge=lfs -text
 example2.jpeg filter=lfs diff=lfs merge=lfs -text
 example3.jpeg filter=lfs diff=lfs merge=lfs -text

 example1.jpeg filter=lfs diff=lfs merge=lfs -text
 example2.jpeg filter=lfs diff=lfs merge=lfs -text
 example3.jpeg filter=lfs diff=lfs merge=lfs -text
+dataset/training/im/p_00a4eda7.png filter=lfs diff=lfs merge=lfs -text
+dataset/training/im/p_00a5b702.png filter=lfs diff=lfs merge=lfs -text
+dataset/validation/im/p_00a7a27c.png filter=lfs diff=lfs merge=lfs -text
+examples/image/example01.jpeg filter=lfs diff=lfs merge=lfs -text
+examples/image/example02.jpeg filter=lfs diff=lfs merge=lfs -text
+examples/image/example03.jpeg filter=lfs diff=lfs merge=lfs -text
+examples/image/image01.png filter=lfs diff=lfs merge=lfs -text
+examples/image/image01_no_background.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,157 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+models/*

README.md CHANGED Viewed

@@ -1,13 +1,63 @@
 ---
-title: Open Remove Background Model (ormbg)
-emoji: 💻
-colorFrom: red
-colorTo: red
-sdk: gradio
-sdk_version: 4.29.0
-app_file: app.py
-pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 license: apache-2.0
+tags:
+  - segmentation
+  - remove background
+  - background
+  - background-removal
+  - Pytorch
+pretty_name: Open Remove Background Model
+datasets:
+  - schirrmacher/humans
 ---
+# Open Remove Background Model (ormbg)
+[>>> DEMO <<<](https://huggingface.co/spaces/schirrmacher/ormbg)
+Join our [Research Discord Group](https://discord.gg/YYZ3D66t)!
+![](examples/image/image01_no_background.png)
+This model is a **fully open-source background remover** optimized for images with humans. It is based on [Highly Accurate Dichotomous Image Segmentation research](https://github.com/xuebinqin/DIS). The model was trained with the synthetic [Human Segmentation Dataset](https://huggingface.co/datasets/schirrmacher/humans), [P3M-10k](https://paperswithcode.com/dataset/p3m-10k), [PPM-100](https://github.com/ZHKKKe/PPM) and [AIM-500](https://paperswithcode.com/dataset/aim-500).
+This model is similar to [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4), but with open training data/process and commercially free to use.
+## Inference
+```
+python ormbg/inference.py
+```
+## Training
+Install dependencies:
+```
+conda env create -f environment.yaml
+conda activate ormbg
+```
+Replace dummy dataset with [training dataset](https://huggingface.co/datasets/schirrmacher/humans).
+```
+python3 ormbg/train_model.py
+```
+# Research
+I started training the model with synthetic images of the [Human Segmentation Dataset](https://huggingface.co/datasets/schirrmacher/humans) crafted with [LayerDiffuse](https://github.com/layerdiffusion/LayerDiffuse). However, I noticed that the model struggles to perform well on real images.
+Synthetic datasets have limitations for achieving great segmentation results. This is because artificial lighting, occlusion, scale or backgrounds create a gap between synthetic and real images. A "model trained solely on synthetic data generated with naïve domain randomization struggles to generalize on the real domain", see [PEOPLESANSPEOPLE: A Synthetic Data Generator for Human-Centric Computer Vision (2022)](https://arxiv.org/pdf/2112.09290).
+Latest changes (05/07/2024):
+- Added [P3M-10K](https://paperswithcode.com/dataset/p3m-10k) dataset for training and validation
+- Added [AIM-500](https://paperswithcode.com/dataset/aim-500) dataset for training and validation
+- Added [PPM-100](https://github.com/ZHKKKe/PPM) dataset for training and validation
+- Applied [Grid Dropout](https://albumentations.ai/docs/api_reference/augmentations/dropout/grid_dropout/) to make the model smarter
+Next steps:
+- Expand dataset with synthetic and real images
+- Research on multi-step segmentation/matting by incorporating [ViTMatte](https://github.com/hustvl/ViTMatte)

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import numpy as np
 import torch
 import torch.nn.functional as F
 import gradio as gr
-from ormbg import ORMBG
 from PIL import Image
-model_path = "ormbg.pth"
 # Load the model globally but don't send to device yet
 net = ORMBG()
@@ -69,7 +69,11 @@ If you identify cases where the model fails, <a href='https://huggingface.co/sch
 - <a href='https://huggingface.co/schirrmacher/ormbg\#research' target='_blank'>Research</a>: see current approach for improvements
 """
-examples = ["./example1.jpeg", "./example2.jpeg", "./example3.jpeg"]
 demo = gr.Interface(
     fn=inference,

 import torch
 import torch.nn.functional as F
 import gradio as gr
+from ormbg.models.ormbg import ORMBG
 from PIL import Image
+model_path = "models/ormbg.pth"
 # Load the model globally but don't send to device yet
 net = ORMBG()
 - <a href='https://huggingface.co/schirrmacher/ormbg\#research' target='_blank'>Research</a>: see current approach for improvements
 """
+examples = [
+    "./examples/image/example1.jpeg",
+    "./examples/image/example2.jpeg",
+    "./examples/image/example3.jpeg",
+]
 demo = gr.Interface(
     fn=inference,

dataset/training/gt/p_00a4eda7.png ADDED Viewed

dataset/training/gt/p_00a5b702.png ADDED Viewed

dataset/training/im/p_00a4eda7.png ADDED Viewed

Git LFS Details

SHA256: e226a687b5d755056076e12d7f2c24704d101ad90918554c43028e8c1e53638f
Pointer size: 132 Bytes
Size of remote file: 1.74 MB

dataset/training/im/p_00a5b702.png ADDED Viewed

Git LFS Details

SHA256: 184b2d97ffdbffc9d0a5d3c3b84a848938df636855d59b81f3d109445a92b0ef
Pointer size: 132 Bytes
Size of remote file: 3.46 MB

dataset/validation/gt/p_00a7a27c.png ADDED Viewed

dataset/validation/im/p_00a7a27c.png ADDED Viewed

Git LFS Details

SHA256: b87d59e4598ddc1078ebdc856e7101d92582315ecff2aecdadc17802e82bc8c1
Pointer size: 132 Bytes
Size of remote file: 1 MB

environment.yaml ADDED Viewed

	@@ -0,0 +1,199 @@

+name: ormbg
+channels:
+  - pytorch
+  - nvidia
+  - anaconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - aom=3.6.0=h6a678d5_0
+  - blas=1.0=mkl
+  - blosc=1.21.3=h6a678d5_0
+  - brotli=1.0.9=h5eee18b_7
+  - brotli-bin=1.0.9=h5eee18b_7
+  - brotli-python=1.0.9=py38h6a678d5_7
+  - brunsli=0.1=h2531618_0
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.19.1=h5eee18b_0
+  - ca-certificates=2023.08.22=h06a4308_0
+  - certifi=2023.7.22=py38h06a4308_0
+  - cffi=1.15.0=py38h7f8727e_0
+  - cfitsio=3.470=h5893167_7
+  - charls=2.2.0=h2531618_0
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.1.7=py38h06a4308_0
+  - cloudpickle=2.2.1=py38h06a4308_0
+  - contourpy=1.0.5=py38hdb19cb5_0
+  - cryptography=41.0.3=py38h130f0dd_0
+  - cuda-cudart=11.8.89=0
+  - cuda-cupti=11.8.87=0
+  - cuda-libraries=11.8.0=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-runtime=11.8.0=0
+  - cudatoolkit=11.8.0=h6a678d5_0
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - cytoolz=0.12.0=py38h5eee18b_0
+  - dask-core=2023.4.1=py38h06a4308_0
+  - dav1d=1.2.1=h5eee18b_0
+  - dbus=1.13.18=hb2f20db_0
+  - expat=2.5.0=h6a678d5_0
+  - ffmpeg=4.3=hf484d3e_0
+  - fftw=3.3.9=h27cfd23_1
+  - filelock=3.9.0=py38h06a4308_0
+  - fontconfig=2.14.1=h52c9d5c_1
+  - fonttools=4.25.0=pyhd3eb1b0_0
+  - freetype=2.12.1=h4a9f257_0
+  - fsspec=2023.9.2=py38h06a4308_0
+  - giflib=5.2.1=h5eee18b_3
+  - glib=2.63.1=h5a9c865_0
+  - gmp=6.2.1=h295c915_3
+  - gmpy2=2.1.2=py38heeb90bb_0
+  - gnutls=3.6.15=he1e5248_0
+  - gst-plugins-base=1.14.0=hbbd80ab_1
+  - gstreamer=1.14.0=hb453b48_1
+  - icu=58.2=he6710b0_3
+  - idna=3.4=py38h06a4308_0
+  - imagecodecs=2023.1.23=py38hc4b7b5f_0
+  - imageio=2.31.4=py38h06a4308_0
+  - importlib-metadata=6.0.0=py38h06a4308_0
+  - importlib_resources=6.1.0=py38h06a4308_0
+  - intel-openmp=2021.4.0=h06a4308_3561
+  - jinja2=3.1.2=py38h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - jxrlib=1.1=h7b6447c_2
+  - kiwisolver=1.4.4=py38h6a678d5_0
+  - krb5=1.20.1=h568e23c_1
+  - lame=3.100=h7b6447c_0
+  - lazy_loader=0.3=py38h06a4308_0
+  - lcms2=2.12=h3be6417_0
+  - lerc=3.0=h295c915_0
+  - libaec=1.0.4=he6710b0_1
+  - libavif=0.11.1=h5eee18b_0
+  - libbrotlicommon=1.0.9=h5eee18b_7
+  - libbrotlidec=1.0.9=h5eee18b_7
+  - libbrotlienc=1.0.9=h5eee18b_7
+  - libcublas=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufile=1.8.1.2=0
+  - libcurand=10.3.4.101=0
+  - libcurl=7.88.1=h91b91d3_2
+  - libcusolver=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libdeflate=1.17=h5eee18b_1
+  - libedit=3.1.20221030=h5eee18b_0
+  - libev=4.33=h7f8727e_1
+  - libffi=3.2.1=hf484d3e_1007
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgfortran-ng=11.2.0=h00389a5_1
+  - libgfortran5=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h7f8727e_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libnghttp2=1.52.0=ha637b67_1
+  - libnpp=11.8.0.86=0
+  - libnvjpeg=11.9.0.86=0
+  - libpng=1.6.39=h5eee18b_0
+  - libssh2=1.10.0=h37d81fd_2
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp=1.3.2=h11a3e52_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - libxcb=1.15=h7f8727e_0
+  - libxml2=2.9.14=h74e7548_0
+  - libzopfli=1.0.3=he6710b0_0
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - locket=1.0.0=py38h06a4308_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - markupsafe=2.1.1=py38h7f8727e_0
+  - matplotlib=3.7.2=py38h06a4308_0
+  - matplotlib-base=3.7.2=py38h1128e8f_0
+  - mkl=2021.4.0=h06a4308_640
+  - mkl-service=2.4.0=py38h7f8727e_0
+  - mkl_fft=1.3.1=py38hd3c417c_0
+  - mkl_random=1.2.2=py38h51133e4_0
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py38h06a4308_0
+  - munkres=1.1.4=py_0
+  - ncurses=6.4=h6a678d5_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.1=py38h06a4308_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h3ad879b_0
+  - openssl=1.1.1w=h7f8727e_0
+  - packaging=23.1=py38h06a4308_0
+  - partd=1.4.1=py38h06a4308_0
+  - pcre=8.45=h295c915_0
+  - pillow=10.0.1=py38ha6cbd5a_0
+  - pip=23.3=py38h06a4308_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyopenssl=23.2.0=py38h06a4308_0
+  - pyparsing=3.0.9=py38h06a4308_0
+  - pyqt=5.9.2=py38h05f1152_4
+  - pysocks=1.7.1=py38h06a4308_0
+  - python=3.8.0=h0371630_2
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - pytorch=2.1.1=py3.8_cuda11.8_cudnn8.7.0_0
+  - pytorch-cuda=11.8=h7e8668a_5
+  - pytorch-mutex=1.0=cuda
+  - pywavelets=1.4.1=py38h5eee18b_0
+  - pyyaml=6.0.1=py38h5eee18b_0
+  - qt=5.9.7=h5867ecd_1
+  - readline=7.0=h7b6447c_5
+  - requests=2.31.0=py38h06a4308_0
+  - setuptools=68.0.0=py38h06a4308_0
+  - sip=4.19.13=py38h295c915_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - snappy=1.1.9=h295c915_0
+  - sqlite=3.33.0=h62c20be_0
+  - sympy=1.11.1=py38h06a4308_0
+  - tifffile=2023.4.12=py38h06a4308_0
+  - tk=8.6.12=h1ccaba5_0
+  - toolz=0.12.0=py38h06a4308_0
+  - torchaudio=2.1.1=py38_cu118
+  - torchtriton=2.1.0=py38
+  - torchvision=0.16.1=py38_cu118
+  - tornado=6.3.3=py38h5eee18b_0
+  - tqdm=4.65.0=py38hb070fc8_0
+  - urllib3=1.26.18=py38h06a4308_0
+  - wheel=0.41.2=py38h06a4308_0
+  - xz=5.4.2=h5eee18b_0
+  - yaml=0.2.5=h7b6447c_0
+  - zfp=1.0.0=h6a678d5_0
+  - zipp=3.11.0=py38h06a4308_0
+  - zlib=1.2.13=h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+      - albucore==0.0.12
+      - albumentations==1.4.11
+      - annotated-types==0.7.0
+      - appdirs==1.4.4
+      - conda-pack==0.7.1
+      - docker-pycreds==0.4.0
+      - eval-type-backport==0.2.0
+      - gitdb==4.0.11
+      - gitpython==3.1.40
+      - joblib==1.4.2
+      - numpy==1.24.4
+      - opencv-python-headless==4.10.0.84
+      - protobuf==4.25.1
+      - psutil==5.9.6
+      - pydantic==2.8.2
+      - pydantic-core==2.20.1
+      - scikit-image==0.21.0
+      - scikit-learn==1.3.2
+      - scipy==1.10.1
+      - sentry-sdk==1.35.0
+      - setproctitle==1.3.3
+      - smmap==5.0.1
+      - threadpoolctl==3.5.0
+      - tomli==2.0.1
+      - typing-extensions==4.12.2
+      - wandb==0.16.0
+prefix: /home/macher/miniconda3/envs/ormbg

examples/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

examples/image/example01.jpeg ADDED Viewed

Git LFS Details

SHA256: 436f546cc1d7b2fd7021180299b028c0d379e48a9e9f05214a694b9c4eb8a7e3
Pointer size: 132 Bytes
Size of remote file: 7.63 MB

examples/image/example02.jpeg ADDED Viewed

Git LFS Details

SHA256: 1dad92b56723fd8ac1c3832844873ad297300d0e85f6e14764334687a70c8abc
Pointer size: 132 Bytes
Size of remote file: 4.32 MB

examples/image/example03.jpeg ADDED Viewed

Git LFS Details

SHA256: f392dc4716469f5367ce0e2ac788f284d1b8d70c39be109db7038c3306a1da16
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

examples/image/image01.png ADDED Viewed

Git LFS Details

SHA256: 1c6d54789fc0d8816231ca9f061b19af50bdbfb59a4fed7fa6c7bd3168591b0e
Pointer size: 133 Bytes
Size of remote file: 16.7 MB

examples/image/image01_no_background.png ADDED Viewed

Git LFS Details

SHA256: 9290ced416914386458bded92614b3b620bf82fc9dc7b06b4015fc6791d34cc3
Pointer size: 133 Bytes
Size of remote file: 21.4 MB

examples/loss/gt.png ADDED Viewed

examples/loss/loss01.png ADDED Viewed

examples/loss/loss02.png ADDED Viewed

examples/loss/loss03.png ADDED Viewed

examples/loss/loss04.png ADDED Viewed

examples/loss/loss05.png ADDED Viewed

examples/loss/orginal.jpg ADDED Viewed

ormbg/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

ormbg/basics.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+# os.environ['CUDA_VISIBLE_DEVICES'] = '2'
+from skimage import io, transform
+import torch
+import torchvision
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms, utils
+import torch.optim as optim
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import glob
+def mae_torch(pred, gt):
+    h, w = gt.shape[0:2]
+    sumError = torch.sum(torch.absolute(torch.sub(pred.float(), gt.float())))
+    maeError = torch.divide(sumError, float(h) * float(w) * 255.0 + 1e-4)
+    return maeError
+def f1score_torch(pd, gt):
+    # print(gt.shape)
+    gtNum = torch.sum((gt > 128).float() * 1)  ## number of ground truth pixels
+    pp = pd[gt > 128]
+    nn = pd[gt <= 128]
+    pp_hist = torch.histc(pp, bins=255, min=0, max=255)
+    nn_hist = torch.histc(nn, bins=255, min=0, max=255)
+    pp_hist_flip = torch.flipud(pp_hist)
+    nn_hist_flip = torch.flipud(nn_hist)
+    pp_hist_flip_cum = torch.cumsum(pp_hist_flip, dim=0)
+    nn_hist_flip_cum = torch.cumsum(nn_hist_flip, dim=0)
+    precision = (pp_hist_flip_cum) / (
+        pp_hist_flip_cum + nn_hist_flip_cum + 1e-4
+    )  # torch.divide(pp_hist_flip_cum,torch.sum(torch.sum(pp_hist_flip_cum, nn_hist_flip_cum), 1e-4))
+    recall = (pp_hist_flip_cum) / (gtNum + 1e-4)
+    f1 = (1 + 0.3) * precision * recall / (0.3 * precision + recall + 1e-4)
+    return (
+        torch.reshape(precision, (1, precision.shape[0])),
+        torch.reshape(recall, (1, recall.shape[0])),
+        torch.reshape(f1, (1, f1.shape[0])),
+    )
+def f1_mae_torch(pred, gt, valid_dataset, idx, mybins, hypar):
+    import time
+    tic = time.time()
+    if len(gt.shape) > 2:
+        gt = gt[:, :, 0]
+    pre, rec, f1 = f1score_torch(pred, gt)
+    mae = mae_torch(pred, gt)
+    print(valid_dataset.dataset["im_name"][idx] + ".png")
+    print("time for evaluation : ", time.time() - tic)
+    return (
+        pre.cpu().data.numpy(),
+        rec.cpu().data.numpy(),
+        f1.cpu().data.numpy(),
+        mae.cpu().data.numpy(),
+    )

ormbg/data_loader_cache.py ADDED Viewed

	@@ -0,0 +1,489 @@

+## data loader
+## Ackownledgement:
+## We would like to thank Dr. Ibrahim Almakky (https://scholar.google.co.uk/citations?user=T9MTcK0AAAAJ&hl=en)
+## for his helps in implementing cache machanism of our DIS dataloader.
+from __future__ import print_function, division
+import albumentations as A
+import numpy as np
+import random
+from copy import deepcopy
+import json
+from tqdm import tqdm
+from skimage import io
+import os
+from glob import glob
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms
+from torchvision.transforms.functional import normalize
+import torch.nn.functional as F
+#### --------------------- DIS dataloader cache ---------------------####
+def get_im_gt_name_dict(datasets, flag="valid"):
+    print("------------------------------", flag, "--------------------------------")
+    name_im_gt_list = []
+    for i in range(len(datasets)):
+        print(
+            "--->>>",
+            flag,
+            " dataset ",
+            i,
+            "/",
+            len(datasets),
+            " ",
+            datasets[i]["name"],
+            "<<<---",
+        )
+        tmp_im_list, tmp_gt_list = [], []
+        im_dir = datasets[i]["im_dir"]
+        gt_dir = datasets[i]["gt_dir"]
+        tmp_im_list = glob(os.path.join(im_dir, "*" + "*.[jp][pn]g"))
+        tmp_gt_list = glob(os.path.join(gt_dir, "*" + "*.[jp][pn]g"))
+        print(
+            "-im-", datasets[i]["name"], datasets[i]["im_dir"], ": ", len(tmp_im_list)
+        )
+        print(
+            "-gt-",
+            datasets[i]["name"],
+            datasets[i]["gt_dir"],
+            ": ",
+            len(tmp_gt_list),
+        )
+        if flag == "train":  ## combine multiple training sets into one dataset
+            if len(name_im_gt_list) == 0:
+                name_im_gt_list.append(
+                    {
+                        "dataset_name": datasets[i]["name"],
+                        "im_path": tmp_im_list,
+                        "gt_path": tmp_gt_list,
+                        "im_ext": datasets[i]["im_ext"],
+                        "gt_ext": datasets[i]["gt_ext"],
+                        "cache_dir": datasets[i]["cache_dir"],
+                    }
+                )
+            else:
+                name_im_gt_list[0]["dataset_name"] = (
+                    name_im_gt_list[0]["dataset_name"] + "_" + datasets[i]["name"]
+                )
+                name_im_gt_list[0]["im_path"] = (
+                    name_im_gt_list[0]["im_path"] + tmp_im_list
+                )
+                name_im_gt_list[0]["gt_path"] = (
+                    name_im_gt_list[0]["gt_path"] + tmp_gt_list
+                )
+                if datasets[i]["im_ext"] != ".jpg" or datasets[i]["gt_ext"] != ".png":
+                    print(
+                        "Error: Please make sure all you images and ground truth masks are in jpg and png format respectively !!!"
+                    )
+                    exit()
+                name_im_gt_list[0]["im_ext"] = ".jpg"
+                name_im_gt_list[0]["gt_ext"] = ".png"
+                name_im_gt_list[0]["cache_dir"] = (
+                    os.sep.join(datasets[i]["cache_dir"].split(os.sep)[0:-1])
+                    + os.sep
+                    + name_im_gt_list[0]["dataset_name"]
+                )
+        else:  ## keep different validation or inference datasets as separate ones
+            name_im_gt_list.append(
+                {
+                    "dataset_name": datasets[i]["name"],
+                    "im_path": tmp_im_list,
+                    "gt_path": tmp_gt_list,
+                    "im_ext": datasets[i]["im_ext"],
+                    "gt_ext": datasets[i]["gt_ext"],
+                    "cache_dir": datasets[i]["cache_dir"],
+                }
+            )
+    return name_im_gt_list
+def create_dataloaders(
+    name_im_gt_list,
+    cache_size=[],
+    cache_boost=True,
+    my_transforms=[],
+    batch_size=1,
+    shuffle=False,
+):
+    ## model="train": return one dataloader for training
+    ## model="valid": return a list of dataloaders for validation or testing
+    gos_dataloaders = []
+    gos_datasets = []
+    if len(name_im_gt_list) == 0:
+        return gos_dataloaders, gos_datasets
+    num_workers_ = 1
+    if batch_size > 1:
+        num_workers_ = 2
+    if batch_size > 4:
+        num_workers_ = 4
+    if batch_size > 8:
+        num_workers_ = 8
+    for i in range(0, len(name_im_gt_list)):
+        gos_dataset = GOSDatasetCache(
+            [name_im_gt_list[i]],
+            cache_size=cache_size,
+            cache_path=name_im_gt_list[i]["cache_dir"],
+            cache_boost=cache_boost,
+            transform=transforms.Compose(my_transforms),
+        )
+        gos_dataloaders.append(
+            DataLoader(
+                gos_dataset,
+                batch_size=batch_size,
+                shuffle=shuffle,
+                num_workers=num_workers_,
+            )
+        )
+        gos_datasets.append(gos_dataset)
+    return gos_dataloaders, gos_datasets
+def im_reader(im_path):
+    return io.imread(im_path)
+def im_preprocess(im, size):
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    if im.shape[2] == 1:
+        im = np.repeat(im, 3, axis=2)
+    im_tensor = torch.tensor(im.copy(), dtype=torch.float32)
+    im_tensor = torch.transpose(torch.transpose(im_tensor, 1, 2), 0, 1)
+    if len(size) < 2:
+        return im_tensor, im.shape[0:2]
+    else:
+        im_tensor = torch.unsqueeze(im_tensor, 0)
+        im_tensor = F.upsample(im_tensor, size, mode="bilinear")
+        im_tensor = torch.squeeze(im_tensor, 0)
+    return im_tensor.type(torch.uint8), im.shape[0:2]
+def gt_preprocess(gt, size):
+    if len(gt.shape) > 2:
+        gt = gt[:, :, 0]
+    gt_tensor = torch.unsqueeze(torch.tensor(gt, dtype=torch.uint8), 0)
+    if len(size) < 2:
+        return gt_tensor.type(torch.uint8), gt.shape[0:2]
+    else:
+        gt_tensor = torch.unsqueeze(torch.tensor(gt_tensor, dtype=torch.float32), 0)
+        gt_tensor = F.upsample(gt_tensor, size, mode="bilinear")
+        gt_tensor = torch.squeeze(gt_tensor, 0)
+    return gt_tensor.type(torch.uint8), gt.shape[0:2]
+    # return gt_tensor, gt.shape[0:2]
+class GOSGridDropout(object):
+    def __init__(
+        self,
+        ratio=0.5,
+        unit_size_min=100,
+        unit_size_max=100,
+        holes_number_x=None,
+        holes_number_y=None,
+        shift_x=0,
+        shift_y=0,
+        random_offset=True,
+        fill_value=0,
+        mask_fill_value=None,
+        always_apply=None,
+        p=1.0,
+    ):
+        self.transform = A.GridDropout(
+            ratio=ratio,
+            unit_size_min=unit_size_min,
+            unit_size_max=unit_size_max,
+            holes_number_x=holes_number_x,
+            holes_number_y=holes_number_y,
+            shift_x=shift_x,
+            shift_y=shift_y,
+            random_offset=random_offset,
+            fill_value=fill_value,
+            mask_fill_value=mask_fill_value,
+            always_apply=always_apply,
+            p=p,
+        )
+    def __call__(self, sample):
+        imidx, image, label, shape = (
+            sample["imidx"],
+            sample["image"],
+            sample["label"],
+            sample["shape"],
+        )
+        # Convert the torch tensors to numpy arrays
+        image_np = image.permute(1, 2, 0).numpy()
+        augmented = self.transform(image=image_np)
+        # Convert the numpy arrays back to torch tensors
+        image = torch.tensor(augmented["image"]).permute(2, 0, 1)
+        return {"imidx": imidx, "image": image, "label": label, "shape": shape}
+class GOSRandomHFlip(object):
+    def __init__(self, prob=0.5):
+        self.prob = prob
+    def __call__(self, sample):
+        imidx, image, label, shape = (
+            sample["imidx"],
+            sample["image"],
+            sample["label"],
+            sample["shape"],
+        )
+        # random horizontal flip
+        if random.random() >= self.prob:
+            image = torch.flip(image, dims=[2])
+            label = torch.flip(label, dims=[2])
+        return {"imidx": imidx, "image": image, "label": label, "shape": shape}
+class GOSDatasetCache(Dataset):
+    def __init__(
+        self,
+        name_im_gt_list,
+        cache_size=[],
+        cache_path="./cache",
+        cache_file_name="dataset.json",
+        cache_boost=False,
+        transform=None,
+    ):
+        self.cache_size = cache_size
+        self.cache_path = cache_path
+        self.cache_file_name = cache_file_name
+        self.cache_boost_name = ""
+        self.cache_boost = cache_boost
+        # self.ims_npy = None
+        # self.gts_npy = None
+        ## cache all the images and ground truth into a single pytorch tensor
+        self.ims_pt = None
+        self.gts_pt = None
+        ## we will cache the npy as well regardless of the cache_boost
+        # if(self.cache_boost):
+        self.cache_boost_name = cache_file_name.split(".json")[0]
+        self.transform = transform
+        self.dataset = {}
+        ## combine different datasets into one
+        dataset_names = []
+        dt_name_list = []  # dataset name per image
+        im_name_list = []  # image name
+        im_path_list = []  # im path
+        gt_path_list = []  # gt path
+        im_ext_list = []  # im ext
+        gt_ext_list = []  # gt ext
+        for i in range(0, len(name_im_gt_list)):
+            dataset_names.append(name_im_gt_list[i]["dataset_name"])
+            # dataset name repeated based on the number of images in this dataset
+            dt_name_list.extend(
+                [
+                    name_im_gt_list[i]["dataset_name"]
+                    for x in name_im_gt_list[i]["im_path"]
+                ]
+            )
+            im_name_list.extend(
+                [
+                    x.split(os.sep)[-1].split(name_im_gt_list[i]["im_ext"])[0]
+                    for x in name_im_gt_list[i]["im_path"]
+                ]
+            )
+            im_path_list.extend(name_im_gt_list[i]["im_path"])
+            gt_path_list.extend(name_im_gt_list[i]["gt_path"])
+            im_ext_list.extend(
+                [name_im_gt_list[i]["im_ext"] for x in name_im_gt_list[i]["im_path"]]
+            )
+            gt_ext_list.extend(
+                [name_im_gt_list[i]["gt_ext"] for x in name_im_gt_list[i]["gt_path"]]
+            )
+        self.dataset["data_name"] = dt_name_list
+        self.dataset["im_name"] = im_name_list
+        self.dataset["im_path"] = im_path_list
+        self.dataset["ori_im_path"] = deepcopy(im_path_list)
+        self.dataset["gt_path"] = gt_path_list
+        self.dataset["ori_gt_path"] = deepcopy(gt_path_list)
+        self.dataset["im_shp"] = []
+        self.dataset["gt_shp"] = []
+        self.dataset["im_ext"] = im_ext_list
+        self.dataset["gt_ext"] = gt_ext_list
+        self.dataset["ims_pt_dir"] = ""
+        self.dataset["gts_pt_dir"] = ""
+        self.dataset = self.manage_cache(dataset_names)
+    def manage_cache(self, dataset_names):
+        if not os.path.exists(self.cache_path):  # create the folder for cache
+            os.makedirs(self.cache_path)
+        cache_folder = os.path.join(
+            self.cache_path,
+            "_".join(dataset_names) + "_" + "x".join([str(x) for x in self.cache_size]),
+        )
+        if not os.path.exists(
+            cache_folder
+        ):  # check if the cache files are there, if not then cache
+            return self.cache(cache_folder)
+        return self.load_cache(cache_folder)
+    def cache(self, cache_folder):
+        os.mkdir(cache_folder)
+        cached_dataset = deepcopy(self.dataset)
+        # ims_list = []
+        # gts_list = []
+        ims_pt_list = []
+        gts_pt_list = []
+        for i, im_path in tqdm(
+            enumerate(self.dataset["im_path"]), total=len(self.dataset["im_path"])
+        ):
+            im_id = cached_dataset["im_name"][i]
+            print("im_path: ", im_path)
+            im = im_reader(im_path)
+            im, im_shp = im_preprocess(im, self.cache_size)
+            im_cache_file = os.path.join(
+                cache_folder, self.dataset["data_name"][i] + "_" + im_id + "_im.pt"
+            )
+            torch.save(im, im_cache_file)
+            cached_dataset["im_path"][i] = im_cache_file
+            if self.cache_boost:
+                ims_pt_list.append(torch.unsqueeze(im, 0))
+            # ims_list.append(im.cpu().data.numpy().astype(np.uint8))
+            gt = np.zeros(im.shape[0:2])
+            if len(self.dataset["gt_path"]) != 0:
+                gt = im_reader(self.dataset["gt_path"][i])
+            gt, gt_shp = gt_preprocess(gt, self.cache_size)
+            gt_cache_file = os.path.join(
+                cache_folder, self.dataset["data_name"][i] + "_" + im_id + "_gt.pt"
+            )
+            torch.save(gt, gt_cache_file)
+            if len(self.dataset["gt_path"]) > 0:
+                cached_dataset["gt_path"][i] = gt_cache_file
+            else:
+                cached_dataset["gt_path"].append(gt_cache_file)
+            if self.cache_boost:
+                gts_pt_list.append(torch.unsqueeze(gt, 0))
+            # gts_list.append(gt.cpu().data.numpy().astype(np.uint8))
+            # im_shp_cache_file = os.path.join(cache_folder,im_id + "_im_shp.pt")
+            # torch.save(gt_shp, shp_cache_file)
+            cached_dataset["im_shp"].append(im_shp)
+            # self.dataset["im_shp"].append(im_shp)
+            # shp_cache_file = os.path.join(cache_folder,im_id + "_gt_shp.pt")
+            # torch.save(gt_shp, shp_cache_file)
+            cached_dataset["gt_shp"].append(gt_shp)
+            # self.dataset["gt_shp"].append(gt_shp)
+        if self.cache_boost:
+            cached_dataset["ims_pt_dir"] = os.path.join(
+                cache_folder, self.cache_boost_name + "_ims.pt"
+            )
+            cached_dataset["gts_pt_dir"] = os.path.join(
+                cache_folder, self.cache_boost_name + "_gts.pt"
+            )
+            self.ims_pt = torch.cat(ims_pt_list, dim=0)
+            self.gts_pt = torch.cat(gts_pt_list, dim=0)
+            torch.save(torch.cat(ims_pt_list, dim=0), cached_dataset["ims_pt_dir"])
+            torch.save(torch.cat(gts_pt_list, dim=0), cached_dataset["gts_pt_dir"])
+        try:
+            json_file = open(os.path.join(cache_folder, self.cache_file_name), "w")
+            json.dump(cached_dataset, json_file)
+            json_file.close()
+        except Exception:
+            raise FileNotFoundError("Cannot create JSON")
+        return cached_dataset
+    def load_cache(self, cache_folder):
+        json_file = open(os.path.join(cache_folder, self.cache_file_name), "r")
+        dataset = json.load(json_file)
+        json_file.close()
+        ## if cache_boost is true, we will load the image npy and ground truth npy into the RAM
+        ## otherwise the pytorch tensor will be loaded
+        if self.cache_boost:
+            # self.ims_npy = np.load(dataset["ims_npy_dir"])
+            # self.gts_npy = np.load(dataset["gts_npy_dir"])
+            self.ims_pt = torch.load(dataset["ims_pt_dir"], map_location="cpu")
+            self.gts_pt = torch.load(dataset["gts_pt_dir"], map_location="cpu")
+        return dataset
+    def __len__(self):
+        return len(self.dataset["im_path"])
+    def __getitem__(self, idx):
+        im = None
+        gt = None
+        if self.cache_boost and self.ims_pt is not None:
+            # start = time.time()
+            im = self.ims_pt[idx]  # .type(torch.float32)
+            gt = self.gts_pt[idx]  # .type(torch.float32)
+            # print(idx, 'time for pt loading: ', time.time()-start)
+        else:
+            # import time
+            # start = time.time()
+            # print("tensor***")
+            im_pt_path = os.path.join(
+                self.cache_path,
+                os.sep.join(self.dataset["im_path"][idx].split(os.sep)[-2:]),
+            )
+            im = torch.load(im_pt_path)  # (self.dataset["im_path"][idx])
+            gt_pt_path = os.path.join(
+                self.cache_path,
+                os.sep.join(self.dataset["gt_path"][idx].split(os.sep)[-2:]),
+            )
+            gt = torch.load(gt_pt_path)  # (self.dataset["gt_path"][idx])
+            # print(idx,'time for tensor loading: ', time.time()-start)
+        im_shp = self.dataset["im_shp"][idx]
+        # print("time for loading im and gt: ", time.time()-start)
+        # start_time = time.time()
+        im = torch.divide(im, 255.0)
+        gt = torch.divide(gt, 255.0)
+        # print(idx, 'time for normalize torch divide: ', time.time()-start_time)
+        sample = {
+            "imidx": torch.from_numpy(np.array(idx)),
+            "image": im,
+            "label": gt,
+            "shape": torch.from_numpy(np.array(im_shp)),
+        }
+        if self.transform:
+            sample = self.transform(sample)
+        return sample

ormbg/inference.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import torch
+import argparse
+import numpy as np
+from PIL import Image
+from skimage import io
+from models.ormbg import ORMBG
+import torch.nn.functional as F
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Remove background from images using ORMBG model."
+    )
+    parser.add_argument(
+        "--image",
+        type=str,
+        default=os.path.join("examples", "image", "example01.jpeg"),
+        help="Path to the input image file.",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=os.path.join("example01_no_background.png"),
+        help="Path to the output image file.",
+    )
+    parser.add_argument(
+        "--model-path",
+        type=str,
+        default=os.path.join("models", "ormbg.pth"),
+        help="Path to the model file.",
+    )
+    parser.add_argument(
+        "--compare",
+        action="store_false",
+        help="Flag to save the original and processed images side by side.",
+    )
+    return parser.parse_args()
+def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2, 0, 1)
+    im_tensor = F.interpolate(
+        torch.unsqueeze(im_tensor, 0), size=model_input_size, mode="bilinear"
+    ).type(torch.uint8)
+    image = torch.divide(im_tensor, 255.0)
+    return image
+def postprocess_image(result: torch.Tensor, im_size: list) -> np.ndarray:
+    result = torch.squeeze(F.interpolate(result, size=im_size, mode="bilinear"), 0)
+    ma = torch.max(result)
+    mi = torch.min(result)
+    result = (result - mi) / (ma - mi)
+    im_array = (result * 255).permute(1, 2, 0).cpu().data.numpy().astype(np.uint8)
+    im_array = np.squeeze(im_array)
+    return im_array
+def inference(args):
+    image_path = args.image
+    result_name = args.output
+    model_path = args.model_path
+    compare = args.compare
+    net = ORMBG()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(model_path))
+        net = net.cuda()
+    else:
+        net.load_state_dict(torch.load(model_path, map_location="cpu"))
+    net.eval()
+    model_input_size = [1024, 1024]
+    orig_im = io.imread(image_path)
+    orig_im_size = orig_im.shape[0:2]
+    image = preprocess_image(orig_im, model_input_size).to(device)
+    result = net(image)
+    # post process
+    result_image = postprocess_image(result[0][0], orig_im_size)
+    # save result
+    pil_im = Image.fromarray(result_image)
+    if pil_im.mode == "RGBA":
+        pil_im = pil_im.convert("RGB")
+    no_bg_image = Image.new("RGBA", pil_im.size, (0, 0, 0, 0))
+    orig_image = Image.open(image_path)
+    no_bg_image.paste(orig_image, mask=pil_im)
+    if compare:
+        combined_width = orig_image.width + no_bg_image.width
+        combined_image = Image.new("RGBA", (combined_width, orig_image.height))
+        combined_image.paste(orig_image, (0, 0))
+        combined_image.paste(no_bg_image, (orig_image.width, 0))
+        stacked_output_path = os.path.splitext(result_name)[0] + ".png"
+        combined_image.save(stacked_output_path)
+    else:
+        no_bg_image.save(result_name)
+if __name__ == "__main__":
+    inference(parse_args())

ormbg/models/ormbg.py ADDED Viewed

	@@ -0,0 +1,484 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/xuebinqin/DIS/blob/main/IS-Net/models/isnet.py
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        dilation=1,
+        groups=1,
+    ):
+        super(myrebnconv, self).__init__()
+        self.conv = nn.Conv2d(
+            in_ch,
+            out_ch,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.rl(self.bn(self.conv(x)))
+bce_loss = nn.BCELoss(size_average=True)
+class ORMBG(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(ORMBG, self).__init__()
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def compute_loss(self, predictions, ground_truth):
+        loss0, loss = 0.0, 0.0
+        for i in range(0, len(predictions)):
+            loss = loss + bce_loss(predictions[i], ground_truth)
+            if i == 0:
+                loss0 = loss
+        return loss0, loss
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        return [
+            F.sigmoid(d1),
+            F.sigmoid(d2),
+            F.sigmoid(d3),
+            F.sigmoid(d4),
+            F.sigmoid(d5),
+            F.sigmoid(d6),
+        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

ormbg/train_model.py ADDED Viewed

	@@ -0,0 +1,474 @@

+import os
+import time
+import torch, gc
+import torch.nn as nn
+import torch.optim as optim
+from torch.autograd import Variable
+import torch.nn.functional as F
+import numpy as np
+from pathlib import Path
+from models.ormbg import ORMBG
+from skimage import io
+from basics import f1_mae_torch
+from data_loader_cache import (
+    get_im_gt_name_dict,
+    create_dataloaders,
+    GOSGridDropout,
+    GOSRandomHFlip,
+)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def valid(net, valid_dataloaders, valid_datasets, hypar, epoch=0):
+    net.eval()
+    print("Validating...")
+    epoch_num = hypar["max_epoch_num"]
+    val_loss = 0.0
+    tar_loss = 0.0
+    val_cnt = 0.0
+    tmp_f1 = []
+    tmp_mae = []
+    tmp_time = []
+    start_valid = time.time()
+    for k in range(len(valid_dataloaders)):
+        valid_dataloader = valid_dataloaders[k]
+        valid_dataset = valid_datasets[k]
+        val_num = valid_dataset.__len__()
+        mybins = np.arange(0, 256)
+        PRE = np.zeros((val_num, len(mybins) - 1))
+        REC = np.zeros((val_num, len(mybins) - 1))
+        F1 = np.zeros((val_num, len(mybins) - 1))
+        MAE = np.zeros((val_num))
+        for i_val, data_val in enumerate(valid_dataloader):
+            val_cnt = val_cnt + 1.0
+            imidx_val, inputs_val, labels_val, shapes_val = (
+                data_val["imidx"],
+                data_val["image"],
+                data_val["label"],
+                data_val["shape"],
+            )
+            if hypar["model_digit"] == "full":
+                inputs_val = inputs_val.type(torch.FloatTensor)
+                labels_val = labels_val.type(torch.FloatTensor)
+            else:
+                inputs_val = inputs_val.type(torch.HalfTensor)
+                labels_val = labels_val.type(torch.HalfTensor)
+            # wrap them in Variable
+            if torch.cuda.is_available():
+                inputs_val_v, labels_val_v = Variable(
+                    inputs_val.cuda(), requires_grad=False
+                ), Variable(labels_val.cuda(), requires_grad=False)
+            else:
+                inputs_val_v, labels_val_v = Variable(
+                    inputs_val, requires_grad=False
+                ), Variable(labels_val, requires_grad=False)
+            t_start = time.time()
+            ds_val = net(inputs_val_v)[0]
+            t_end = time.time() - t_start
+            tmp_time.append(t_end)
+            # loss2_val, loss_val = muti_loss_fusion(ds_val, labels_val_v)
+            loss2_val, loss_val = net.compute_loss(ds_val, labels_val_v)
+            # compute F measure
+            for t in range(hypar["batch_size_valid"]):
+                i_test = imidx_val[t].data.numpy()
+                pred_val = ds_val[0][t, :, :, :]  # B x 1 x H x W
+                ## recover the prediction spatial size to the orignal image size
+                pred_val = torch.squeeze(
+                    F.upsample(
+                        torch.unsqueeze(pred_val, 0),
+                        (shapes_val[t][0], shapes_val[t][1]),
+                        mode="bilinear",
+                    )
+                )
+                # pred_val = normPRED(pred_val)
+                ma = torch.max(pred_val)
+                mi = torch.min(pred_val)
+                pred_val = (pred_val - mi) / (ma - mi)  # max = 1
+                if len(valid_dataset.dataset["ori_gt_path"]) != 0:
+                    gt = np.squeeze(
+                        io.imread(valid_dataset.dataset["ori_gt_path"][i_test])
+                    )  # max = 255
+                    if gt.max() == 1:
+                        gt = gt * 255
+                else:
+                    gt = np.zeros((shapes_val[t][0], shapes_val[t][1]))
+                with torch.no_grad():
+                    gt = torch.tensor(gt).to(device)
+                pre, rec, f1, mae = f1_mae_torch(
+                    pred_val * 255, gt, valid_dataset, i_test, mybins, hypar
+                )
+                PRE[i_test, :] = pre
+                REC[i_test, :] = rec
+                F1[i_test, :] = f1
+                MAE[i_test] = mae
+                del ds_val, gt
+                gc.collect()
+                torch.cuda.empty_cache()
+            # if(loss_val.data[0]>1):
+            val_loss += loss_val.item()  # data[0]
+            tar_loss += loss2_val.item()  # data[0]
+            print(
+                "[validating: %5d/%5d] val_ls:%f, tar_ls: %f, f1: %f, mae: %f, time: %f"
+                % (
+                    i_val,
+                    val_num,
+                    val_loss / (i_val + 1),
+                    tar_loss / (i_val + 1),
+                    np.amax(F1[i_test, :]),
+                    MAE[i_test],
+                    t_end,
+                )
+            )
+            del loss2_val, loss_val
+        print("============================")
+        PRE_m = np.mean(PRE, 0)
+        REC_m = np.mean(REC, 0)
+        f1_m = (1 + 0.3) * PRE_m * REC_m / (0.3 * PRE_m + REC_m + 1e-8)
+        tmp_f1.append(np.amax(f1_m))
+        tmp_mae.append(np.mean(MAE))
+    return tmp_f1, tmp_mae, val_loss, tar_loss, i_val, tmp_time
+def train(
+    net,
+    optimizer,
+    train_dataloaders,
+    train_datasets,
+    valid_dataloaders,
+    valid_datasets,
+    hypar,
+):
+    model_path = hypar["model_path"]
+    model_save_fre = hypar["model_save_fre"]
+    max_ite = hypar["max_ite"]
+    batch_size_train = hypar["batch_size_train"]
+    batch_size_valid = hypar["batch_size_valid"]
+    if not os.path.exists(model_path):
+        os.mkdir(model_path)
+    ite_num = hypar["start_ite"]  # count the toal iteration number
+    ite_num4val = 0  #
+    running_loss = 0.0  # count the toal loss
+    running_tar_loss = 0.0  # count the target output loss
+    last_f1 = [0 for x in range(len(valid_dataloaders))]
+    train_num = train_datasets[0].__len__()
+    net.train()
+    start_last = time.time()
+    gos_dataloader = train_dataloaders[0]
+    epoch_num = hypar["max_epoch_num"]
+    notgood_cnt = 0
+    for epoch in range(epoch_num):
+        for i, data in enumerate(gos_dataloader):
+            if ite_num >= max_ite:
+                print("Training Reached the Maximal Iteration Number ", max_ite)
+                exit()
+            # start_read = time.time()
+            ite_num = ite_num + 1
+            ite_num4val = ite_num4val + 1
+            # get the inputs
+            inputs, labels = data["image"], data["label"]
+            if hypar["model_digit"] == "full":
+                inputs = inputs.type(torch.FloatTensor)
+                labels = labels.type(torch.FloatTensor)
+            else:
+                inputs = inputs.type(torch.HalfTensor)
+                labels = labels.type(torch.HalfTensor)
+            # wrap them in Variable
+            if torch.cuda.is_available():
+                inputs_v, labels_v = Variable(
+                    inputs.cuda(), requires_grad=False
+                ), Variable(labels.cuda(), requires_grad=False)
+            else:
+                inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(
+                    labels, requires_grad=False
+                )
+            # y zero the parameter gradients
+            start_inf_loss_back = time.time()
+            optimizer.zero_grad()
+            ds, _ = net(inputs_v)
+            loss2, loss = net.compute_loss(ds, labels_v)
+            loss.backward()
+            optimizer.step()
+            # # print statistics
+            running_loss += loss.item()
+            running_tar_loss += loss2.item()
+            # del outputs, loss
+            del ds, loss2, loss
+            end_inf_loss_back = time.time() - start_inf_loss_back
+            print(
+                ">>>"
+                + model_path.split("/")[-1]
+                + " - [epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f, time-per-iter: %3f s, time_read: %3f"
+                % (
+                    epoch + 1,
+                    epoch_num,
+                    (i + 1) * batch_size_train,
+                    train_num,
+                    ite_num,
+                    running_loss / ite_num4val,
+                    running_tar_loss / ite_num4val,
+                    time.time() - start_last,
+                    time.time() - start_last - end_inf_loss_back,
+                )
+            )
+            start_last = time.time()
+            if ite_num % model_save_fre == 0:  # validate every 2000 iterations
+                notgood_cnt += 1
+                net.eval()
+                tmp_f1, tmp_mae, val_loss, tar_loss, i_val, tmp_time = valid(
+                    net, valid_dataloaders, valid_datasets, hypar, epoch
+                )
+                net.train()  # resume train
+                tmp_out = 0
+                print("last_f1:", last_f1)
+                print("tmp_f1:", tmp_f1)
+                for fi in range(len(last_f1)):
+                    if tmp_f1[fi] > last_f1[fi]:
+                        tmp_out = 1
+                print("tmp_out:", tmp_out)
+                if tmp_out:
+                    notgood_cnt = 0
+                    last_f1 = tmp_f1
+                    tmp_f1_str = [str(round(f1x, 4)) for f1x in tmp_f1]
+                    tmp_mae_str = [str(round(mx, 4)) for mx in tmp_mae]
+                    maxf1 = "_".join(tmp_f1_str)
+                    meanM = "_".join(tmp_mae_str)
+                    # .cpu().detach().numpy()
+                    model_name = (
+                        "/gpu_itr_"
+                        + str(ite_num)
+                        + "_traLoss_"
+                        + str(np.round(running_loss / ite_num4val, 4))
+                        + "_traTarLoss_"
+                        + str(np.round(running_tar_loss / ite_num4val, 4))
+                        + "_valLoss_"
+                        + str(np.round(val_loss / (i_val + 1), 4))
+                        + "_valTarLoss_"
+                        + str(np.round(tar_loss / (i_val + 1), 4))
+                        + "_maxF1_"
+                        + maxf1
+                        + "_mae_"
+                        + meanM
+                        + "_time_"
+                        + str(
+                            np.round(np.mean(np.array(tmp_time)) / batch_size_valid, 6)
+                        )
+                        + ".pth"
+                    )
+                    torch.save(net.state_dict(), model_path + model_name)
+                running_loss = 0.0
+                running_tar_loss = 0.0
+                ite_num4val = 0
+                if notgood_cnt >= hypar["early_stop"]:
+                    print(
+                        "No improvements in the last "
+                        + str(notgood_cnt)
+                        + " validation periods, so training stopped !"
+                    )
+                    exit()
+    print("Training Reaches The Maximum Epoch Number")
+def main(train_datasets, valid_datasets, hypar):
+    print("--- create training dataloader ---")
+    train_nm_im_gt_list = get_im_gt_name_dict(train_datasets, flag="train")
+    ## build dataloader for training datasets
+    train_dataloaders, train_datasets = create_dataloaders(
+        train_nm_im_gt_list,
+        cache_size=hypar["cache_size"],
+        cache_boost=hypar["cache_boost_train"],
+        my_transforms=[GOSGridDropout(), GOSRandomHFlip()],
+        batch_size=hypar["batch_size_train"],
+        shuffle=True,
+    )
+    valid_nm_im_gt_list = get_im_gt_name_dict(valid_datasets, flag="valid")
+    valid_dataloaders, valid_datasets = create_dataloaders(
+        valid_nm_im_gt_list,
+        cache_size=hypar["cache_size"],
+        cache_boost=hypar["cache_boost_valid"],
+        my_transforms=[],
+        batch_size=hypar["batch_size_valid"],
+        shuffle=False,
+    )
+    net = hypar["model"]
+    if hypar["model_digit"] == "half":
+        net.half()
+        for layer in net.modules():
+            if isinstance(layer, nn.BatchNorm2d):
+                layer.float()
+    if torch.cuda.is_available():
+        net.cuda()
+    if hypar["restore_model"] != "":
+        print("restore model from:")
+        print(hypar["model_path"] + "/" + hypar["restore_model"])
+        if torch.cuda.is_available():
+            net.load_state_dict(
+                torch.load(hypar["model_path"] + "/" + hypar["restore_model"])
+            )
+        else:
+            net.load_state_dict(
+                torch.load(
+                    hypar["model_path"] + "/" + hypar["restore_model"],
+                    map_location="cpu",
+                )
+            )
+    optimizer = optim.Adam(
+        net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0
+    )
+    train(
+        net,
+        optimizer,
+        train_dataloaders,
+        train_datasets,
+        valid_dataloaders,
+        valid_datasets,
+        hypar,
+    )
+if __name__ == "__main__":
+    output_model_folder = "saved_models"
+    Path(output_model_folder).mkdir(parents=True, exist_ok=True)
+    train_datasets, valid_datasets = [], []
+    dataset_1, dataset_1 = {}, {}
+    dataset_training = {
+        "name": "ormbg-training",
+        "im_dir": str(Path("dataset", "training", "im")),
+        "gt_dir": str(Path("dataset", "training", "gt")),
+        "im_ext": ".png",
+        "gt_ext": ".png",
+        "cache_dir": str(Path("cache", "teacher", "training")),
+    }
+    dataset_validation = {
+        "name": "ormbg-training",
+        "im_dir": str(Path("dataset", "validation", "im")),
+        "gt_dir": str(Path("dataset", "validation", "gt")),
+        "im_ext": ".png",
+        "gt_ext": ".png",
+        "cache_dir": str(Path("cache", "teacher", "validation")),
+    }
+    train_datasets = [dataset_training]
+    valid_datasets = [dataset_validation]
+    ### --------------- STEP 2: Configuring the hyperparamters for Training, validation and inferencing ---------------
+    hypar = {}
+    hypar["model"] = ORMBG()
+    hypar["seed"] = 0
+    ## model weights path
+    hypar["model_path"] = "saved_models"
+    ## name of the segmentation model weights .pth for resume training process from last stop or for the inferencing
+    hypar["restore_model"] = ""
+    ## start iteration for the training, can be changed to match the restored training process
+    hypar["start_ite"] = 0
+    ## indicates "half" or "full" accuracy of float number
+    hypar["model_digit"] = "full"
+    ## To handle large size input images, which take a lot of time for loading in training,
+    #  we introduce the cache mechanism for pre-convering and resizing the jpg and png images into .pt file
+    hypar["cache_size"] = [
+        1024,
+        1024,
+    ]
+    ## cached input spatial resolution, can be configured into different size
+    ## "True" or "False", indicates wheather to load all the training datasets into RAM, True will greatly speed the training process while requires more RAM
+    hypar["cache_boost_train"] = False
+    ## "True" or "False", indicates wheather to load all the validation datasets into RAM, True will greatly speed the training process while requires more RAM
+    hypar["cache_boost_valid"] = False
+    ## stop the training when no improvement in the past 20 validation periods, smaller numbers can be used here e.g., 5 or 10.
+    hypar["early_stop"] = 20
+    ## valid and save model weights every 2000 iterations
+    hypar["model_save_fre"] = 2000
+    ## batch size for training
+    hypar["batch_size_train"] = 8
+    ## batch size for validation and inferencing
+    hypar["batch_size_valid"] = 1
+    ## if early stop couldn't stop the training process, stop it by the max_ite_num
+    hypar["max_ite"] = 10000000
+    ## if early stop and max_ite couldn't stop the training process, stop it by the max_epoch_num
+    hypar["max_epoch_num"] = 1000000
+    main(train_datasets, valid_datasets, hypar=hypar)

utils/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

utils/architecture.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from ormbg.models.ormbg import ORMBG
+if __name__ == "__main__":
+    print(ORMBG())

utils/loss_example.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import torch
+import argparse
+import numpy as np
+from skimage import io
+from ormbg.models.ormbg import ORMBG
+import torch.nn.functional as F
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Remove background from images using ORMBG model."
+    )
+    parser.add_argument(
+        "--prediction",
+        type=list,
+        default=[
+            os.path.join("examples", "loss", "loss01.png"),
+            os.path.join("examples", "loss", "loss02.png"),
+            os.path.join("examples", "loss", "loss03.png"),
+            os.path.join("examples", "loss", "loss04.png"),
+            os.path.join("examples", "loss", "loss05.png"),
+        ],
+        help="Path to the input image file.",
+    )
+    parser.add_argument(
+        "--gt",
+        type=str,
+        default=os.path.join("examples", "loss", "gt.png"),
+        help="Ground truth mask",
+    )
+    return parser.parse_args()
+def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2, 0, 1)
+    im_tensor = F.interpolate(
+        torch.unsqueeze(im_tensor, 0), size=model_input_size, mode="bilinear"
+    ).type(torch.uint8)
+    image = torch.divide(im_tensor, 255.0)
+    return image
+def inference(args):
+    prediction_paths = args.prediction
+    gt_path = args.gt
+    net = ORMBG()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    for pred_path in prediction_paths:
+        model_input_size = [1024, 1024]
+        loss = io.imread(pred_path)
+        prediction = preprocess_image(loss, model_input_size).to(device)
+        model_input_size = [1024, 1024]
+        gt = io.imread(gt_path)
+        ground_truth = preprocess_image(gt, model_input_size).to(device)
+        _, loss = net.compute_loss([prediction], ground_truth)
+        print(f"Loss: {pred_path} {loss}")
+if __name__ == "__main__":
+    inference(parse_args())

utils/pth_to_onnx.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+import argparse
+from ormbg.models.ormbg import ORMBG
+def export_to_onnx(model_path, onnx_path):
+    net = ORMBG()
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(model_path))
+        net = net.cuda()
+    else:
+        net.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+    net.eval()
+    # Create a dummy input tensor. The size should match the model's input size.
+    # Adjust the dimensions as necessary; here it is assumed the input is a 3-channel image.
+    dummy_input = torch.randn(
+        1,
+        3,
+        1024,
+        1024,
+        device="cuda" if torch.cuda.is_available() else "cpu",
+    )
+    torch.onnx.export(
+        net,
+        dummy_input,
+        onnx_path,
+        export_params=True,
+        opset_version=11,
+        do_constant_folding=True,
+        input_names=["input"],
+        output_names=["output"],
+    )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Export a trained model to ONNX format."
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="models/ormbg.pth",
+        help="The path to the trained model file.",
+    )
+    parser.add_argument(
+        "--onnx_path",
+        type=str,
+        default="models/ormbg.pth",
+        help="The path where the ONNX model will be saved.",
+    )
+    args = parser.parse_args()
+    export_to_onnx(args.model_path, args.onnx_path)