Remove-Image-Background

Sleeping

App Files Files Community

schirrmacher commited on Jul 6, 2024

Commit

9a96838

verified ·

1 Parent(s): 29275af

Upload folder using huggingface_hub

Browse files

Files changed (49) hide show

.gitignore +0 -157
IMG_1051.png +0 -3
dataset/training/gt/p_00a4eda7.png +0 -0
dataset/training/gt/p_00a5b702.png +0 -0
dataset/training/im/p_00a4eda7.png +0 -3
dataset/training/im/p_00a5b702.png +0 -3
dataset/validation/gt/p_00a7a27c.png +0 -0
dataset/validation/im/p_00a7a27c.png +0 -3
environment.yaml +0 -199
example 2.png +0 -3
example1.jpeg +0 -3
example1.png +0 -3
example2.jpeg +0 -3
example2.png +0 -3
example3.jpeg +0 -3
example3.png +0 -3
examples.jpg +0 -3
examples/.DS_Store +0 -0
examples/image/example01.jpeg +0 -3
examples/image/example02.jpeg +0 -3
examples/image/example03.jpeg +0 -3
examples/image/image01.png +0 -3
examples/image/image01_no_background.png +0 -3
examples/loss/gt.png +0 -0
examples/loss/loss01.png +0 -0
examples/loss/loss02.png +0 -0
examples/loss/loss03.png +0 -0
examples/loss/loss04.png +0 -0
examples/loss/loss05.png +0 -0
examples/loss/orginal.jpg +0 -0
explanation.jpg +0 -0
hf_space.py +0 -88
hf_space/app.py +0 -90
hf_space/example01.jpeg +0 -3
hf_space/example02.jpeg +0 -3
hf_space/example03.jpeg +0 -3
hf_space/ormbg.py +0 -484
input.png +0 -3
ormbg/.DS_Store +0 -0
ormbg/basics.py +0 -79
ormbg/data_loader_cache.py +0 -489
ormbg/inference.py +0 -110
ormbg/models/ormbg.py +0 -484
ormbg/train_model.py +0 -474
stack.py +0 -37
utils/.DS_Store +0 -0
utils/architecture.py +0 -4
utils/loss_example.py +0 -69
utils/pth_to_onnx.py +0 -59

.gitignore DELETED Viewed

@@ -1,157 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-models/*

IMG_1051.png DELETED Viewed

Git LFS Details

SHA256: fe4c6f3e70dfce78cbb26f24d83c1eab96b791972f8f1bffe6126eddc8edb78c
Pointer size: 132 Bytes
Size of remote file: 4.62 MB

dataset/training/gt/p_00a4eda7.png DELETED Viewed

Binary file (38.7 kB)

dataset/training/gt/p_00a5b702.png DELETED Viewed

Binary file (96.9 kB)

dataset/training/im/p_00a4eda7.png DELETED Viewed

Git LFS Details

SHA256: e226a687b5d755056076e12d7f2c24704d101ad90918554c43028e8c1e53638f
Pointer size: 132 Bytes
Size of remote file: 1.74 MB

dataset/training/im/p_00a5b702.png DELETED Viewed

Git LFS Details

SHA256: 184b2d97ffdbffc9d0a5d3c3b84a848938df636855d59b81f3d109445a92b0ef
Pointer size: 132 Bytes
Size of remote file: 3.46 MB

dataset/validation/gt/p_00a7a27c.png DELETED Viewed

Binary file (81.8 kB)

dataset/validation/im/p_00a7a27c.png DELETED Viewed

Git LFS Details

SHA256: b87d59e4598ddc1078ebdc856e7101d92582315ecff2aecdadc17802e82bc8c1
Pointer size: 132 Bytes
Size of remote file: 1 MB

environment.yaml DELETED Viewed

@@ -1,199 +0,0 @@
-name: ormbg
-channels:
-  - pytorch
-  - nvidia
-  - anaconda
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - aom=3.6.0=h6a678d5_0
-  - blas=1.0=mkl
-  - blosc=1.21.3=h6a678d5_0
-  - brotli=1.0.9=h5eee18b_7
-  - brotli-bin=1.0.9=h5eee18b_7
-  - brotli-python=1.0.9=py38h6a678d5_7
-  - brunsli=0.1=h2531618_0
-  - bzip2=1.0.8=h7b6447c_0
-  - c-ares=1.19.1=h5eee18b_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - certifi=2023.7.22=py38h06a4308_0
-  - cffi=1.15.0=py38h7f8727e_0
-  - cfitsio=3.470=h5893167_7
-  - charls=2.2.0=h2531618_0
-  - charset-normalizer=2.0.4=pyhd3eb1b0_0
-  - click=8.1.7=py38h06a4308_0
-  - cloudpickle=2.2.1=py38h06a4308_0
-  - contourpy=1.0.5=py38hdb19cb5_0
-  - cryptography=41.0.3=py38h130f0dd_0
-  - cuda-cudart=11.8.89=0
-  - cuda-cupti=11.8.87=0
-  - cuda-libraries=11.8.0=0
-  - cuda-nvrtc=11.8.89=0
-  - cuda-nvtx=11.8.86=0
-  - cuda-runtime=11.8.0=0
-  - cudatoolkit=11.8.0=h6a678d5_0
-  - cycler=0.11.0=pyhd3eb1b0_0
-  - cytoolz=0.12.0=py38h5eee18b_0
-  - dask-core=2023.4.1=py38h06a4308_0
-  - dav1d=1.2.1=h5eee18b_0
-  - dbus=1.13.18=hb2f20db_0
-  - expat=2.5.0=h6a678d5_0
-  - ffmpeg=4.3=hf484d3e_0
-  - fftw=3.3.9=h27cfd23_1
-  - filelock=3.9.0=py38h06a4308_0
-  - fontconfig=2.14.1=h52c9d5c_1
-  - fonttools=4.25.0=pyhd3eb1b0_0
-  - freetype=2.12.1=h4a9f257_0
-  - fsspec=2023.9.2=py38h06a4308_0
-  - giflib=5.2.1=h5eee18b_3
-  - glib=2.63.1=h5a9c865_0
-  - gmp=6.2.1=h295c915_3
-  - gmpy2=2.1.2=py38heeb90bb_0
-  - gnutls=3.6.15=he1e5248_0
-  - gst-plugins-base=1.14.0=hbbd80ab_1
-  - gstreamer=1.14.0=hb453b48_1
-  - icu=58.2=he6710b0_3
-  - idna=3.4=py38h06a4308_0
-  - imagecodecs=2023.1.23=py38hc4b7b5f_0
-  - imageio=2.31.4=py38h06a4308_0
-  - importlib-metadata=6.0.0=py38h06a4308_0
-  - importlib_resources=6.1.0=py38h06a4308_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jinja2=3.1.2=py38h06a4308_0
-  - jpeg=9e=h5eee18b_1
-  - jxrlib=1.1=h7b6447c_2
-  - kiwisolver=1.4.4=py38h6a678d5_0
-  - krb5=1.20.1=h568e23c_1
-  - lame=3.100=h7b6447c_0
-  - lazy_loader=0.3=py38h06a4308_0
-  - lcms2=2.12=h3be6417_0
-  - lerc=3.0=h295c915_0
-  - libaec=1.0.4=he6710b0_1
-  - libavif=0.11.1=h5eee18b_0
-  - libbrotlicommon=1.0.9=h5eee18b_7
-  - libbrotlidec=1.0.9=h5eee18b_7
-  - libbrotlienc=1.0.9=h5eee18b_7
-  - libcublas=11.11.3.6=0
-  - libcufft=10.9.0.58=0
-  - libcufile=1.8.1.2=0
-  - libcurand=10.3.4.101=0
-  - libcurl=7.88.1=h91b91d3_2
-  - libcusolver=11.4.1.48=0
-  - libcusparse=11.7.5.86=0
-  - libdeflate=1.17=h5eee18b_1
-  - libedit=3.1.20221030=h5eee18b_0
-  - libev=4.33=h7f8727e_1
-  - libffi=3.2.1=hf484d3e_1007
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgfortran-ng=11.2.0=h00389a5_1
-  - libgfortran5=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libiconv=1.16=h7f8727e_2
-  - libidn2=2.3.4=h5eee18b_0
-  - libjpeg-turbo=2.0.0=h9bf148f_0
-  - libnghttp2=1.52.0=ha637b67_1
-  - libnpp=11.8.0.86=0
-  - libnvjpeg=11.9.0.86=0
-  - libpng=1.6.39=h5eee18b_0
-  - libssh2=1.10.0=h37d81fd_2
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libtasn1=4.19.0=h5eee18b_0
-  - libtiff=4.5.1=h6a678d5_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuuid=1.41.5=h5eee18b_0
-  - libwebp=1.3.2=h11a3e52_0
-  - libwebp-base=1.3.2=h5eee18b_0
-  - libxcb=1.15=h7f8727e_0
-  - libxml2=2.9.14=h74e7548_0
-  - libzopfli=1.0.3=he6710b0_0
-  - llvm-openmp=14.0.6=h9e868ea_0
-  - locket=1.0.0=py38h06a4308_0
-  - lz4-c=1.9.4=h6a678d5_0
-  - markupsafe=2.1.1=py38h7f8727e_0
-  - matplotlib=3.7.2=py38h06a4308_0
-  - matplotlib-base=3.7.2=py38h1128e8f_0
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py38h7f8727e_0
-  - mkl_fft=1.3.1=py38hd3c417c_0
-  - mkl_random=1.2.2=py38h51133e4_0
-  - mpc=1.1.0=h10f8cd9_1
-  - mpfr=4.0.2=hb69a4c5_1
-  - mpmath=1.3.0=py38h06a4308_0
-  - munkres=1.1.4=py_0
-  - ncurses=6.4=h6a678d5_0
-  - nettle=3.7.3=hbbd107a_1
-  - networkx=3.1=py38h06a4308_0
-  - openh264=2.1.1=h4ff587b_0
-  - openjpeg=2.4.0=h3ad879b_0
-  - openssl=1.1.1w=h7f8727e_0
-  - packaging=23.1=py38h06a4308_0
-  - partd=1.4.1=py38h06a4308_0
-  - pcre=8.45=h295c915_0
-  - pillow=10.0.1=py38ha6cbd5a_0
-  - pip=23.3=py38h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=23.2.0=py38h06a4308_0
-  - pyparsing=3.0.9=py38h06a4308_0
-  - pyqt=5.9.2=py38h05f1152_4
-  - pysocks=1.7.1=py38h06a4308_0
-  - python=3.8.0=h0371630_2
-  - python-dateutil=2.8.2=pyhd3eb1b0_0
-  - pytorch=2.1.1=py3.8_cuda11.8_cudnn8.7.0_0
-  - pytorch-cuda=11.8=h7e8668a_5
-  - pytorch-mutex=1.0=cuda
-  - pywavelets=1.4.1=py38h5eee18b_0
-  - pyyaml=6.0.1=py38h5eee18b_0
-  - qt=5.9.7=h5867ecd_1
-  - readline=7.0=h7b6447c_5
-  - requests=2.31.0=py38h06a4308_0
-  - setuptools=68.0.0=py38h06a4308_0
-  - sip=4.19.13=py38h295c915_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - snappy=1.1.9=h295c915_0
-  - sqlite=3.33.0=h62c20be_0
-  - sympy=1.11.1=py38h06a4308_0
-  - tifffile=2023.4.12=py38h06a4308_0
-  - tk=8.6.12=h1ccaba5_0
-  - toolz=0.12.0=py38h06a4308_0
-  - torchaudio=2.1.1=py38_cu118
-  - torchtriton=2.1.0=py38
-  - torchvision=0.16.1=py38_cu118
-  - tornado=6.3.3=py38h5eee18b_0
-  - tqdm=4.65.0=py38hb070fc8_0
-  - urllib3=1.26.18=py38h06a4308_0
-  - wheel=0.41.2=py38h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - yaml=0.2.5=h7b6447c_0
-  - zfp=1.0.0=h6a678d5_0
-  - zipp=3.11.0=py38h06a4308_0
-  - zlib=1.2.13=h5eee18b_0
-  - zstd=1.5.5=hc292b87_0
-  - pip:
-      - albucore==0.0.12
-      - albumentations==1.4.11
-      - annotated-types==0.7.0
-      - appdirs==1.4.4
-      - conda-pack==0.7.1
-      - docker-pycreds==0.4.0
-      - eval-type-backport==0.2.0
-      - gitdb==4.0.11
-      - gitpython==3.1.40
-      - joblib==1.4.2
-      - numpy==1.24.4
-      - opencv-python-headless==4.10.0.84
-      - protobuf==4.25.1
-      - psutil==5.9.6
-      - pydantic==2.8.2
-      - pydantic-core==2.20.1
-      - scikit-image==0.21.0
-      - scikit-learn==1.3.2
-      - scipy==1.10.1
-      - sentry-sdk==1.35.0
-      - setproctitle==1.3.3
-      - smmap==5.0.1
-      - threadpoolctl==3.5.0
-      - tomli==2.0.1
-      - typing-extensions==4.12.2
-      - wandb==0.16.0
-prefix: /home/macher/miniconda3/envs/ormbg

example 2.png DELETED Viewed

Git LFS Details

SHA256: 5c7d7c861bd738768dcbefb98076c8423978d3108e6f0bcd482bb180a775a8af
Pointer size: 132 Bytes
Size of remote file: 4.77 MB

example1.jpeg DELETED Viewed

Git LFS Details

SHA256: 436f546cc1d7b2fd7021180299b028c0d379e48a9e9f05214a694b9c4eb8a7e3
Pointer size: 132 Bytes
Size of remote file: 7.63 MB

example1.png DELETED Viewed

Git LFS Details

SHA256: 42c8627c1ada7b69ef8561fcb5611cd8aa08af5eed211379a2619960524639c5
Pointer size: 132 Bytes
Size of remote file: 4.83 MB

example2.jpeg DELETED Viewed

Git LFS Details

SHA256: 1dad92b56723fd8ac1c3832844873ad297300d0e85f6e14764334687a70c8abc
Pointer size: 132 Bytes
Size of remote file: 4.32 MB

example2.png DELETED Viewed

Git LFS Details

SHA256: 5c7d7c861bd738768dcbefb98076c8423978d3108e6f0bcd482bb180a775a8af
Pointer size: 132 Bytes
Size of remote file: 4.77 MB

example3.jpeg DELETED Viewed

Git LFS Details

SHA256: f392dc4716469f5367ce0e2ac788f284d1b8d70c39be109db7038c3306a1da16
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

example3.png DELETED Viewed

Git LFS Details

SHA256: e024065f9c0f6c981c107c5a403b1cc8fd3dfd20ac37fa212b43e0e69ec1b8ae
Pointer size: 132 Bytes
Size of remote file: 4.81 MB

examples.jpg DELETED Viewed

Git LFS Details

SHA256: ca9ab255b054e237cb51072bf687bf5f044a902d494ab7aa14b931e450519358
Pointer size: 132 Bytes
Size of remote file: 1.39 MB

examples/.DS_Store DELETED Viewed

Binary file (6.15 kB)

examples/image/example01.jpeg DELETED Viewed

Git LFS Details

SHA256: 436f546cc1d7b2fd7021180299b028c0d379e48a9e9f05214a694b9c4eb8a7e3
Pointer size: 132 Bytes
Size of remote file: 7.63 MB

examples/image/example02.jpeg DELETED Viewed

Git LFS Details

SHA256: 1dad92b56723fd8ac1c3832844873ad297300d0e85f6e14764334687a70c8abc
Pointer size: 132 Bytes
Size of remote file: 4.32 MB

examples/image/example03.jpeg DELETED Viewed

Git LFS Details

SHA256: f392dc4716469f5367ce0e2ac788f284d1b8d70c39be109db7038c3306a1da16
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

examples/image/image01.png DELETED Viewed

Git LFS Details

SHA256: 1c6d54789fc0d8816231ca9f061b19af50bdbfb59a4fed7fa6c7bd3168591b0e
Pointer size: 133 Bytes
Size of remote file: 16.7 MB

examples/image/image01_no_background.png DELETED Viewed

Git LFS Details

SHA256: 9290ced416914386458bded92614b3b620bf82fc9dc7b06b4015fc6791d34cc3
Pointer size: 133 Bytes
Size of remote file: 21.4 MB

examples/loss/gt.png DELETED Viewed

Binary file (258 kB)

examples/loss/loss01.png DELETED Viewed

Binary file (291 kB)

examples/loss/loss02.png DELETED Viewed

Binary file (417 kB)

examples/loss/loss03.png DELETED Viewed

Binary file (645 kB)

examples/loss/loss04.png DELETED Viewed

Binary file (794 kB)

examples/loss/loss05.png DELETED Viewed

Binary file (983 kB)

examples/loss/orginal.jpg DELETED Viewed

Binary file (366 kB)

explanation.jpg DELETED Viewed

Binary file (713 kB)

hf_space.py DELETED Viewed

@@ -1,88 +0,0 @@
-import spaces
-import numpy as np
-import torch
-import torch.nn.functional as F
-import gradio as gr
-from ormbg.models.ormbg import ORMBG
-from PIL import Image
-model_path = "models/ormbg.pth"
-# Load the model globally but don't send to device yet
-net = ORMBG()
-net.load_state_dict(torch.load(model_path, map_location="cpu"))
-net.eval()
-def resize_image(image):
-    image = image.convert("RGB")
-    model_input_size = (1024, 1024)
-    image = image.resize(model_input_size, Image.BILINEAR)
-    return image
-@spaces.GPU
-@torch.inference_mode()
-def inference(image):
-    # Check for CUDA and set the device inside inference
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    net.to(device)
-    # Prepare input
-    orig_image = Image.fromarray(image)
-    w, h = orig_image.size
-    image = resize_image(orig_image)
-    im_np = np.array(image)
-    im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1)
-    im_tensor = torch.unsqueeze(im_tensor, 0)
-    im_tensor = torch.divide(im_tensor, 255.0)
-    if torch.cuda.is_available():
-        im_tensor = im_tensor.to(device)
-    # Inference
-    result = net(im_tensor)
-    # Post process
-    result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode="bilinear"), 0)
-    ma = torch.max(result)
-    mi = torch.min(result)
-    result = (result - mi) / (ma - mi)
-    # Image to PIL
-    im_array = (result * 255).cpu().data.numpy().astype(np.uint8)
-    pil_im = Image.fromarray(np.squeeze(im_array))
-    # Paste the mask on the original image
-    new_im = Image.new("RGBA", pil_im.size, (0, 0, 0, 0))
-    new_im.paste(orig_image, mask=pil_im)
-    return new_im
-# Gradio interface setup
-title = "Open Remove Background Model (ormbg)"
-description = r"""
-This model is a <strong>fully open-source background remover</strong> optimized for images with humans. It is based on [Highly Accurate Dichotomous Image Segmentation research](https://github.com/xuebinqin/DIS). The model was trained with the synthetic <a href="https://huggingface.co/datasets/schirrmacher/humans">Human Segmentation Dataset</a>, <a href="https://paperswithcode.com/dataset/p3m-10k">P3M-10k</a> and <a href="https://paperswithcode.com/dataset/aim-500">AIM-500</a>.
-If you identify cases where the model fails, <a href='https://huggingface.co/schirrmacher/ormbg/discussions' target='_blank'>upload your examples</a>!
-- <a href='https://huggingface.co/schirrmacher/ormbg' target='_blank'>Model card</a>: find inference code, training information, tutorials
-- <a href='https://huggingface.co/schirrmacher/ormbg' target='_blank'>Dataset</a>: see training images, segmentation data, backgrounds
-- <a href='https://huggingface.co/schirrmacher/ormbg\#research' target='_blank'>Research</a>: see current approach for improvements
-"""
-examples = [
-    "./examples/image/example1.jpeg",
-    "./examples/image/example2.jpeg",
-    "./examples/image/example3.jpeg",
-]
-demo = gr.Interface(
-    fn=inference,
-    inputs="image",
-    outputs="image",
-    examples=examples,
-    title=title,
-    description=description,
-)
-if __name__ == "__main__":
-    demo.launch(share=False, allowed_paths=["ormbg", "models", "examples"])

hf_space/app.py DELETED Viewed

@@ -1,90 +0,0 @@
-import spaces
-import numpy as np
-import torch
-import torch.nn.functional as F
-import gradio as gr
-from ormbg import ORMBG
-from PIL import Image
-model_path = "../models/ormbg.pth"
-# Load the model globally but don't send to device yet
-net = ORMBG()
-net.load_state_dict(torch.load(model_path, map_location="cpu"))
-net.eval()
-def resize_image(image):
-    image = image.convert("RGB")
-    model_input_size = (1024, 1024)
-    image = image.resize(model_input_size, Image.BILINEAR)
-    return image
-@spaces.GPU
-@torch.inference_mode()
-def inference(image):
-    # Check for CUDA and set the device inside inference
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    net.to(device)
-    # Prepare input
-    orig_image = Image.fromarray(image)
-    w, h = orig_image.size
-    image = resize_image(orig_image)
-    im_np = np.array(image)
-    im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1)
-    im_tensor = torch.unsqueeze(im_tensor, 0)
-    im_tensor = torch.divide(im_tensor, 255.0)
-    if torch.cuda.is_available():
-        im_tensor = im_tensor.to(device)
-    # Inference
-    result = net(im_tensor)
-    # Post process
-    result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode="bilinear"), 0)
-    ma = torch.max(result)
-    mi = torch.min(result)
-    result = (result - mi) / (ma - mi)
-    # Image to PIL
-    im_array = (result * 255).cpu().data.numpy().astype(np.uint8)
-    pil_im = Image.fromarray(np.squeeze(im_array))
-    # Paste the mask on the original image
-    new_im = Image.new("RGBA", pil_im.size, (0, 0, 0, 0))
-    new_im.paste(orig_image, mask=pil_im)
-    return new_im
-# Gradio interface setup
-title = "Open Remove Background Model (ormbg)"
-description = r"""
-This model is a <strong>fully open-source background remover</strong> optimized for images with humans. It is based on [Highly Accurate Dichotomous Image Segmentation research](https://github.com/xuebinqin/DIS). The model was trained with the synthetic <a href="https://huggingface.co/datasets/schirrmacher/humans">Human Segmentation Dataset</a>, <a href="https://paperswithcode.com/dataset/p3m-10k">P3M-10k</a> and <a href="https://paperswithcode.com/dataset/aim-500">AIM-500</a>.
-If you identify cases where the model fails, <a href='https://huggingface.co/schirrmacher/ormbg/discussions' target='_blank'>upload your examples</a>!
-- <a href='https://huggingface.co/schirrmacher/ormbg' target='_blank'>Model card</a>: find inference code, training information, tutorials
-- <a href='https://huggingface.co/schirrmacher/ormbg' target='_blank'>Dataset</a>: see training images, segmentation data, backgrounds
-- <a href='https://huggingface.co/schirrmacher/ormbg\#research' target='_blank'>Research</a>: see current approach for improvements
-"""
-examples = [
-    "example1.jpeg",
-    "example2.jpeg",
-    "example3.jpeg",
-]
-demo = gr.Interface(
-    fn=inference,
-    inputs="image",
-    outputs="image",
-    examples=examples,
-    title=title,
-    description=description,
-)
-if __name__ == "__main__":
-    demo.launch(
-        share=False, root_path="../", allowed_paths=["../hf_space", "../models"]
-    )

hf_space/example01.jpeg DELETED Viewed

Git LFS Details

SHA256: 436f546cc1d7b2fd7021180299b028c0d379e48a9e9f05214a694b9c4eb8a7e3
Pointer size: 132 Bytes
Size of remote file: 7.63 MB

hf_space/example02.jpeg DELETED Viewed

Git LFS Details

SHA256: 1dad92b56723fd8ac1c3832844873ad297300d0e85f6e14764334687a70c8abc
Pointer size: 132 Bytes
Size of remote file: 4.32 MB

hf_space/example03.jpeg DELETED Viewed

Git LFS Details

SHA256: f392dc4716469f5367ce0e2ac788f284d1b8d70c39be109db7038c3306a1da16
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

hf_space/ormbg.py DELETED Viewed

@@ -1,484 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-# https://github.com/xuebinqin/DIS/blob/main/IS-Net/models/isnet.py
-class REBNCONV(nn.Module):
-    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
-        super(REBNCONV, self).__init__()
-        self.conv_s1 = nn.Conv2d(
-            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
-        )
-        self.bn_s1 = nn.BatchNorm2d(out_ch)
-        self.relu_s1 = nn.ReLU(inplace=True)
-    def forward(self, x):
-        hx = x
-        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
-        return xout
-## upsample tensor 'src' to have the same spatial size with tensor 'tar'
-def _upsample_like(src, tar):
-    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear")
-    return src
-### RSU-7 ###
-class RSU7(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
-        super(RSU7, self).__init__()
-        self.in_ch = in_ch
-        self.mid_ch = mid_ch
-        self.out_ch = out_ch
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        b, c, h, w = x.shape
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx = self.pool5(hx5)
-        hx6 = self.rebnconv6(hx)
-        hx7 = self.rebnconv7(hx6)
-        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
-        hx6dup = _upsample_like(hx6d, hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-6 ###
-class RSU6(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU6, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx6 = self.rebnconv6(hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-5 ###
-class RSU5(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU5, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx5 = self.rebnconv5(hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-4 ###
-class RSU4(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-4F ###
-class RSU4F(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4F, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx2 = self.rebnconv2(hx1)
-        hx3 = self.rebnconv3(hx2)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
-        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
-        return hx1d + hxin
-class myrebnconv(nn.Module):
-    def __init__(
-        self,
-        in_ch=3,
-        out_ch=1,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-        dilation=1,
-        groups=1,
-    ):
-        super(myrebnconv, self).__init__()
-        self.conv = nn.Conv2d(
-            in_ch,
-            out_ch,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_ch)
-        self.rl = nn.ReLU(inplace=True)
-    def forward(self, x):
-        return self.rl(self.bn(self.conv(x)))
-bce_loss = nn.BCELoss(size_average=True)
-class ORMBG(nn.Module):
-    def __init__(self, in_ch=3, out_ch=1):
-        super(ORMBG, self).__init__()
-        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
-        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage1 = RSU7(64, 32, 64)
-        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage2 = RSU6(64, 32, 128)
-        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage3 = RSU5(128, 64, 256)
-        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage4 = RSU4(256, 128, 512)
-        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage5 = RSU4F(512, 256, 512)
-        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage6 = RSU4F(512, 256, 512)
-        # decoder
-        self.stage5d = RSU4F(1024, 256, 512)
-        self.stage4d = RSU4(1024, 128, 256)
-        self.stage3d = RSU5(512, 64, 128)
-        self.stage2d = RSU6(256, 32, 64)
-        self.stage1d = RSU7(128, 16, 64)
-        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
-        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
-        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
-        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
-        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
-    def compute_loss(self, predictions, ground_truth):
-        loss0, loss = 0.0, 0.0
-        for i in range(0, len(predictions)):
-            loss = loss + bce_loss(predictions[i], ground_truth)
-            if i == 0:
-                loss0 = loss
-        return loss0, loss
-    def forward(self, x):
-        hx = x
-        hxin = self.conv_in(hx)
-        # hx = self.pool_in(hxin)
-        # stage 1
-        hx1 = self.stage1(hxin)
-        hx = self.pool12(hx1)
-        # stage 2
-        hx2 = self.stage2(hx)
-        hx = self.pool23(hx2)
-        # stage 3
-        hx3 = self.stage3(hx)
-        hx = self.pool34(hx3)
-        # stage 4
-        hx4 = self.stage4(hx)
-        hx = self.pool45(hx4)
-        # stage 5
-        hx5 = self.stage5(hx)
-        hx = self.pool56(hx5)
-        # stage 6
-        hx6 = self.stage6(hx)
-        hx6up = _upsample_like(hx6, hx5)
-        # -------------------- decoder --------------------
-        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
-        # side output
-        d1 = self.side1(hx1d)
-        d1 = _upsample_like(d1, x)
-        d2 = self.side2(hx2d)
-        d2 = _upsample_like(d2, x)
-        d3 = self.side3(hx3d)
-        d3 = _upsample_like(d3, x)
-        d4 = self.side4(hx4d)
-        d4 = _upsample_like(d4, x)
-        d5 = self.side5(hx5d)
-        d5 = _upsample_like(d5, x)
-        d6 = self.side6(hx6)
-        d6 = _upsample_like(d6, x)
-        return [
-            F.sigmoid(d1),
-            F.sigmoid(d2),
-            F.sigmoid(d3),
-            F.sigmoid(d4),
-            F.sigmoid(d5),
-            F.sigmoid(d6),
-        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

input.png DELETED Viewed

Git LFS Details

SHA256: 42c8627c1ada7b69ef8561fcb5611cd8aa08af5eed211379a2619960524639c5
Pointer size: 132 Bytes
Size of remote file: 4.83 MB

ormbg/.DS_Store DELETED Viewed

Binary file (6.15 kB)

ormbg/basics.py DELETED Viewed

@@ -1,79 +0,0 @@
-import os
-# os.environ['CUDA_VISIBLE_DEVICES'] = '2'
-from skimage import io, transform
-import torch
-import torchvision
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms, utils
-import torch.optim as optim
-import matplotlib.pyplot as plt
-import numpy as np
-from PIL import Image
-import glob
-def mae_torch(pred, gt):
-    h, w = gt.shape[0:2]
-    sumError = torch.sum(torch.absolute(torch.sub(pred.float(), gt.float())))
-    maeError = torch.divide(sumError, float(h) * float(w) * 255.0 + 1e-4)
-    return maeError
-def f1score_torch(pd, gt):
-    # print(gt.shape)
-    gtNum = torch.sum((gt > 128).float() * 1)  ## number of ground truth pixels
-    pp = pd[gt > 128]
-    nn = pd[gt <= 128]
-    pp_hist = torch.histc(pp, bins=255, min=0, max=255)
-    nn_hist = torch.histc(nn, bins=255, min=0, max=255)
-    pp_hist_flip = torch.flipud(pp_hist)
-    nn_hist_flip = torch.flipud(nn_hist)
-    pp_hist_flip_cum = torch.cumsum(pp_hist_flip, dim=0)
-    nn_hist_flip_cum = torch.cumsum(nn_hist_flip, dim=0)
-    precision = (pp_hist_flip_cum) / (
-        pp_hist_flip_cum + nn_hist_flip_cum + 1e-4
-    )  # torch.divide(pp_hist_flip_cum,torch.sum(torch.sum(pp_hist_flip_cum, nn_hist_flip_cum), 1e-4))
-    recall = (pp_hist_flip_cum) / (gtNum + 1e-4)
-    f1 = (1 + 0.3) * precision * recall / (0.3 * precision + recall + 1e-4)
-    return (
-        torch.reshape(precision, (1, precision.shape[0])),
-        torch.reshape(recall, (1, recall.shape[0])),
-        torch.reshape(f1, (1, f1.shape[0])),
-    )
-def f1_mae_torch(pred, gt, valid_dataset, idx, mybins, hypar):
-    import time
-    tic = time.time()
-    if len(gt.shape) > 2:
-        gt = gt[:, :, 0]
-    pre, rec, f1 = f1score_torch(pred, gt)
-    mae = mae_torch(pred, gt)
-    print(valid_dataset.dataset["im_name"][idx] + ".png")
-    print("time for evaluation : ", time.time() - tic)
-    return (
-        pre.cpu().data.numpy(),
-        rec.cpu().data.numpy(),
-        f1.cpu().data.numpy(),
-        mae.cpu().data.numpy(),
-    )

ormbg/data_loader_cache.py DELETED Viewed

@@ -1,489 +0,0 @@
-## data loader
-## Ackownledgement:
-## We would like to thank Dr. Ibrahim Almakky (https://scholar.google.co.uk/citations?user=T9MTcK0AAAAJ&hl=en)
-## for his helps in implementing cache machanism of our DIS dataloader.
-from __future__ import print_function, division
-import albumentations as A
-import numpy as np
-import random
-from copy import deepcopy
-import json
-from tqdm import tqdm
-from skimage import io
-import os
-from glob import glob
-import torch
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms
-from torchvision.transforms.functional import normalize
-import torch.nn.functional as F
-#### --------------------- DIS dataloader cache ---------------------####
-def get_im_gt_name_dict(datasets, flag="valid"):
-    print("------------------------------", flag, "--------------------------------")
-    name_im_gt_list = []
-    for i in range(len(datasets)):
-        print(
-            "--->>>",
-            flag,
-            " dataset ",
-            i,
-            "/",
-            len(datasets),
-            " ",
-            datasets[i]["name"],
-            "<<<---",
-        )
-        tmp_im_list, tmp_gt_list = [], []
-        im_dir = datasets[i]["im_dir"]
-        gt_dir = datasets[i]["gt_dir"]
-        tmp_im_list = glob(os.path.join(im_dir, "*" + "*.[jp][pn]g"))
-        tmp_gt_list = glob(os.path.join(gt_dir, "*" + "*.[jp][pn]g"))
-        print(
-            "-im-", datasets[i]["name"], datasets[i]["im_dir"], ": ", len(tmp_im_list)
-        )
-        print(
-            "-gt-",
-            datasets[i]["name"],
-            datasets[i]["gt_dir"],
-            ": ",
-            len(tmp_gt_list),
-        )
-        if flag == "train":  ## combine multiple training sets into one dataset
-            if len(name_im_gt_list) == 0:
-                name_im_gt_list.append(
-                    {
-                        "dataset_name": datasets[i]["name"],
-                        "im_path": tmp_im_list,
-                        "gt_path": tmp_gt_list,
-                        "im_ext": datasets[i]["im_ext"],
-                        "gt_ext": datasets[i]["gt_ext"],
-                        "cache_dir": datasets[i]["cache_dir"],
-                    }
-                )
-            else:
-                name_im_gt_list[0]["dataset_name"] = (
-                    name_im_gt_list[0]["dataset_name"] + "_" + datasets[i]["name"]
-                )
-                name_im_gt_list[0]["im_path"] = (
-                    name_im_gt_list[0]["im_path"] + tmp_im_list
-                )
-                name_im_gt_list[0]["gt_path"] = (
-                    name_im_gt_list[0]["gt_path"] + tmp_gt_list
-                )
-                if datasets[i]["im_ext"] != ".jpg" or datasets[i]["gt_ext"] != ".png":
-                    print(
-                        "Error: Please make sure all you images and ground truth masks are in jpg and png format respectively !!!"
-                    )
-                    exit()
-                name_im_gt_list[0]["im_ext"] = ".jpg"
-                name_im_gt_list[0]["gt_ext"] = ".png"
-                name_im_gt_list[0]["cache_dir"] = (
-                    os.sep.join(datasets[i]["cache_dir"].split(os.sep)[0:-1])
-                    + os.sep
-                    + name_im_gt_list[0]["dataset_name"]
-                )
-        else:  ## keep different validation or inference datasets as separate ones
-            name_im_gt_list.append(
-                {
-                    "dataset_name": datasets[i]["name"],
-                    "im_path": tmp_im_list,
-                    "gt_path": tmp_gt_list,
-                    "im_ext": datasets[i]["im_ext"],
-                    "gt_ext": datasets[i]["gt_ext"],
-                    "cache_dir": datasets[i]["cache_dir"],
-                }
-            )
-    return name_im_gt_list
-def create_dataloaders(
-    name_im_gt_list,
-    cache_size=[],
-    cache_boost=True,
-    my_transforms=[],
-    batch_size=1,
-    shuffle=False,
-):
-    ## model="train": return one dataloader for training
-    ## model="valid": return a list of dataloaders for validation or testing
-    gos_dataloaders = []
-    gos_datasets = []
-    if len(name_im_gt_list) == 0:
-        return gos_dataloaders, gos_datasets
-    num_workers_ = 1
-    if batch_size > 1:
-        num_workers_ = 2
-    if batch_size > 4:
-        num_workers_ = 4
-    if batch_size > 8:
-        num_workers_ = 8
-    for i in range(0, len(name_im_gt_list)):
-        gos_dataset = GOSDatasetCache(
-            [name_im_gt_list[i]],
-            cache_size=cache_size,
-            cache_path=name_im_gt_list[i]["cache_dir"],
-            cache_boost=cache_boost,
-            transform=transforms.Compose(my_transforms),
-        )
-        gos_dataloaders.append(
-            DataLoader(
-                gos_dataset,
-                batch_size=batch_size,
-                shuffle=shuffle,
-                num_workers=num_workers_,
-            )
-        )
-        gos_datasets.append(gos_dataset)
-    return gos_dataloaders, gos_datasets
-def im_reader(im_path):
-    return io.imread(im_path)
-def im_preprocess(im, size):
-    if len(im.shape) < 3:
-        im = im[:, :, np.newaxis]
-    if im.shape[2] == 1:
-        im = np.repeat(im, 3, axis=2)
-    im_tensor = torch.tensor(im.copy(), dtype=torch.float32)
-    im_tensor = torch.transpose(torch.transpose(im_tensor, 1, 2), 0, 1)
-    if len(size) < 2:
-        return im_tensor, im.shape[0:2]
-    else:
-        im_tensor = torch.unsqueeze(im_tensor, 0)
-        im_tensor = F.upsample(im_tensor, size, mode="bilinear")
-        im_tensor = torch.squeeze(im_tensor, 0)
-    return im_tensor.type(torch.uint8), im.shape[0:2]
-def gt_preprocess(gt, size):
-    if len(gt.shape) > 2:
-        gt = gt[:, :, 0]
-    gt_tensor = torch.unsqueeze(torch.tensor(gt, dtype=torch.uint8), 0)
-    if len(size) < 2:
-        return gt_tensor.type(torch.uint8), gt.shape[0:2]
-    else:
-        gt_tensor = torch.unsqueeze(torch.tensor(gt_tensor, dtype=torch.float32), 0)
-        gt_tensor = F.upsample(gt_tensor, size, mode="bilinear")
-        gt_tensor = torch.squeeze(gt_tensor, 0)
-    return gt_tensor.type(torch.uint8), gt.shape[0:2]
-    # return gt_tensor, gt.shape[0:2]
-class GOSGridDropout(object):
-    def __init__(
-        self,
-        ratio=0.5,
-        unit_size_min=100,
-        unit_size_max=100,
-        holes_number_x=None,
-        holes_number_y=None,
-        shift_x=0,
-        shift_y=0,
-        random_offset=True,
-        fill_value=0,
-        mask_fill_value=None,
-        always_apply=None,
-        p=1.0,
-    ):
-        self.transform = A.GridDropout(
-            ratio=ratio,
-            unit_size_min=unit_size_min,
-            unit_size_max=unit_size_max,
-            holes_number_x=holes_number_x,
-            holes_number_y=holes_number_y,
-            shift_x=shift_x,
-            shift_y=shift_y,
-            random_offset=random_offset,
-            fill_value=fill_value,
-            mask_fill_value=mask_fill_value,
-            always_apply=always_apply,
-            p=p,
-        )
-    def __call__(self, sample):
-        imidx, image, label, shape = (
-            sample["imidx"],
-            sample["image"],
-            sample["label"],
-            sample["shape"],
-        )
-        # Convert the torch tensors to numpy arrays
-        image_np = image.permute(1, 2, 0).numpy()
-        augmented = self.transform(image=image_np)
-        # Convert the numpy arrays back to torch tensors
-        image = torch.tensor(augmented["image"]).permute(2, 0, 1)
-        return {"imidx": imidx, "image": image, "label": label, "shape": shape}
-class GOSRandomHFlip(object):
-    def __init__(self, prob=0.5):
-        self.prob = prob
-    def __call__(self, sample):
-        imidx, image, label, shape = (
-            sample["imidx"],
-            sample["image"],
-            sample["label"],
-            sample["shape"],
-        )
-        # random horizontal flip
-        if random.random() >= self.prob:
-            image = torch.flip(image, dims=[2])
-            label = torch.flip(label, dims=[2])
-        return {"imidx": imidx, "image": image, "label": label, "shape": shape}
-class GOSDatasetCache(Dataset):
-    def __init__(
-        self,
-        name_im_gt_list,
-        cache_size=[],
-        cache_path="./cache",
-        cache_file_name="dataset.json",
-        cache_boost=False,
-        transform=None,
-    ):
-        self.cache_size = cache_size
-        self.cache_path = cache_path
-        self.cache_file_name = cache_file_name
-        self.cache_boost_name = ""
-        self.cache_boost = cache_boost
-        # self.ims_npy = None
-        # self.gts_npy = None
-        ## cache all the images and ground truth into a single pytorch tensor
-        self.ims_pt = None
-        self.gts_pt = None
-        ## we will cache the npy as well regardless of the cache_boost
-        # if(self.cache_boost):
-        self.cache_boost_name = cache_file_name.split(".json")[0]
-        self.transform = transform
-        self.dataset = {}
-        ## combine different datasets into one
-        dataset_names = []
-        dt_name_list = []  # dataset name per image
-        im_name_list = []  # image name
-        im_path_list = []  # im path
-        gt_path_list = []  # gt path
-        im_ext_list = []  # im ext
-        gt_ext_list = []  # gt ext
-        for i in range(0, len(name_im_gt_list)):
-            dataset_names.append(name_im_gt_list[i]["dataset_name"])
-            # dataset name repeated based on the number of images in this dataset
-            dt_name_list.extend(
-                [
-                    name_im_gt_list[i]["dataset_name"]
-                    for x in name_im_gt_list[i]["im_path"]
-                ]
-            )
-            im_name_list.extend(
-                [
-                    x.split(os.sep)[-1].split(name_im_gt_list[i]["im_ext"])[0]
-                    for x in name_im_gt_list[i]["im_path"]
-                ]
-            )
-            im_path_list.extend(name_im_gt_list[i]["im_path"])
-            gt_path_list.extend(name_im_gt_list[i]["gt_path"])
-            im_ext_list.extend(
-                [name_im_gt_list[i]["im_ext"] for x in name_im_gt_list[i]["im_path"]]
-            )
-            gt_ext_list.extend(
-                [name_im_gt_list[i]["gt_ext"] for x in name_im_gt_list[i]["gt_path"]]
-            )
-        self.dataset["data_name"] = dt_name_list
-        self.dataset["im_name"] = im_name_list
-        self.dataset["im_path"] = im_path_list
-        self.dataset["ori_im_path"] = deepcopy(im_path_list)
-        self.dataset["gt_path"] = gt_path_list
-        self.dataset["ori_gt_path"] = deepcopy(gt_path_list)
-        self.dataset["im_shp"] = []
-        self.dataset["gt_shp"] = []
-        self.dataset["im_ext"] = im_ext_list
-        self.dataset["gt_ext"] = gt_ext_list
-        self.dataset["ims_pt_dir"] = ""
-        self.dataset["gts_pt_dir"] = ""
-        self.dataset = self.manage_cache(dataset_names)
-    def manage_cache(self, dataset_names):
-        if not os.path.exists(self.cache_path):  # create the folder for cache
-            os.makedirs(self.cache_path)
-        cache_folder = os.path.join(
-            self.cache_path,
-            "_".join(dataset_names) + "_" + "x".join([str(x) for x in self.cache_size]),
-        )
-        if not os.path.exists(
-            cache_folder
-        ):  # check if the cache files are there, if not then cache
-            return self.cache(cache_folder)
-        return self.load_cache(cache_folder)
-    def cache(self, cache_folder):
-        os.mkdir(cache_folder)
-        cached_dataset = deepcopy(self.dataset)
-        # ims_list = []
-        # gts_list = []
-        ims_pt_list = []
-        gts_pt_list = []
-        for i, im_path in tqdm(
-            enumerate(self.dataset["im_path"]), total=len(self.dataset["im_path"])
-        ):
-            im_id = cached_dataset["im_name"][i]
-            print("im_path: ", im_path)
-            im = im_reader(im_path)
-            im, im_shp = im_preprocess(im, self.cache_size)
-            im_cache_file = os.path.join(
-                cache_folder, self.dataset["data_name"][i] + "_" + im_id + "_im.pt"
-            )
-            torch.save(im, im_cache_file)
-            cached_dataset["im_path"][i] = im_cache_file
-            if self.cache_boost:
-                ims_pt_list.append(torch.unsqueeze(im, 0))
-            # ims_list.append(im.cpu().data.numpy().astype(np.uint8))
-            gt = np.zeros(im.shape[0:2])
-            if len(self.dataset["gt_path"]) != 0:
-                gt = im_reader(self.dataset["gt_path"][i])
-            gt, gt_shp = gt_preprocess(gt, self.cache_size)
-            gt_cache_file = os.path.join(
-                cache_folder, self.dataset["data_name"][i] + "_" + im_id + "_gt.pt"
-            )
-            torch.save(gt, gt_cache_file)
-            if len(self.dataset["gt_path"]) > 0:
-                cached_dataset["gt_path"][i] = gt_cache_file
-            else:
-                cached_dataset["gt_path"].append(gt_cache_file)
-            if self.cache_boost:
-                gts_pt_list.append(torch.unsqueeze(gt, 0))
-            # gts_list.append(gt.cpu().data.numpy().astype(np.uint8))
-            # im_shp_cache_file = os.path.join(cache_folder,im_id + "_im_shp.pt")
-            # torch.save(gt_shp, shp_cache_file)
-            cached_dataset["im_shp"].append(im_shp)
-            # self.dataset["im_shp"].append(im_shp)
-            # shp_cache_file = os.path.join(cache_folder,im_id + "_gt_shp.pt")
-            # torch.save(gt_shp, shp_cache_file)
-            cached_dataset["gt_shp"].append(gt_shp)
-            # self.dataset["gt_shp"].append(gt_shp)
-        if self.cache_boost:
-            cached_dataset["ims_pt_dir"] = os.path.join(
-                cache_folder, self.cache_boost_name + "_ims.pt"
-            )
-            cached_dataset["gts_pt_dir"] = os.path.join(
-                cache_folder, self.cache_boost_name + "_gts.pt"
-            )
-            self.ims_pt = torch.cat(ims_pt_list, dim=0)
-            self.gts_pt = torch.cat(gts_pt_list, dim=0)
-            torch.save(torch.cat(ims_pt_list, dim=0), cached_dataset["ims_pt_dir"])
-            torch.save(torch.cat(gts_pt_list, dim=0), cached_dataset["gts_pt_dir"])
-        try:
-            json_file = open(os.path.join(cache_folder, self.cache_file_name), "w")
-            json.dump(cached_dataset, json_file)
-            json_file.close()
-        except Exception:
-            raise FileNotFoundError("Cannot create JSON")
-        return cached_dataset
-    def load_cache(self, cache_folder):
-        json_file = open(os.path.join(cache_folder, self.cache_file_name), "r")
-        dataset = json.load(json_file)
-        json_file.close()
-        ## if cache_boost is true, we will load the image npy and ground truth npy into the RAM
-        ## otherwise the pytorch tensor will be loaded
-        if self.cache_boost:
-            # self.ims_npy = np.load(dataset["ims_npy_dir"])
-            # self.gts_npy = np.load(dataset["gts_npy_dir"])
-            self.ims_pt = torch.load(dataset["ims_pt_dir"], map_location="cpu")
-            self.gts_pt = torch.load(dataset["gts_pt_dir"], map_location="cpu")
-        return dataset
-    def __len__(self):
-        return len(self.dataset["im_path"])
-    def __getitem__(self, idx):
-        im = None
-        gt = None
-        if self.cache_boost and self.ims_pt is not None:
-            # start = time.time()
-            im = self.ims_pt[idx]  # .type(torch.float32)
-            gt = self.gts_pt[idx]  # .type(torch.float32)
-            # print(idx, 'time for pt loading: ', time.time()-start)
-        else:
-            # import time
-            # start = time.time()
-            # print("tensor***")
-            im_pt_path = os.path.join(
-                self.cache_path,
-                os.sep.join(self.dataset["im_path"][idx].split(os.sep)[-2:]),
-            )
-            im = torch.load(im_pt_path)  # (self.dataset["im_path"][idx])
-            gt_pt_path = os.path.join(
-                self.cache_path,
-                os.sep.join(self.dataset["gt_path"][idx].split(os.sep)[-2:]),
-            )
-            gt = torch.load(gt_pt_path)  # (self.dataset["gt_path"][idx])
-            # print(idx,'time for tensor loading: ', time.time()-start)
-        im_shp = self.dataset["im_shp"][idx]
-        # print("time for loading im and gt: ", time.time()-start)
-        # start_time = time.time()
-        im = torch.divide(im, 255.0)
-        gt = torch.divide(gt, 255.0)
-        # print(idx, 'time for normalize torch divide: ', time.time()-start_time)
-        sample = {
-            "imidx": torch.from_numpy(np.array(idx)),
-            "image": im,
-            "label": gt,
-            "shape": torch.from_numpy(np.array(im_shp)),
-        }
-        if self.transform:
-            sample = self.transform(sample)
-        return sample

ormbg/inference.py DELETED Viewed

@@ -1,110 +0,0 @@
-import os
-import torch
-import argparse
-import numpy as np
-from PIL import Image
-from skimage import io
-from models.ormbg import ORMBG
-import torch.nn.functional as F
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Remove background from images using ORMBG model."
-    )
-    parser.add_argument(
-        "--image",
-        type=str,
-        default=os.path.join("examples", "image", "example01.jpeg"),
-        help="Path to the input image file.",
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        default=os.path.join("example01_no_background.png"),
-        help="Path to the output image file.",
-    )
-    parser.add_argument(
-        "--model-path",
-        type=str,
-        default=os.path.join("models", "ormbg.pth"),
-        help="Path to the model file.",
-    )
-    parser.add_argument(
-        "--compare",
-        action="store_false",
-        help="Flag to save the original and processed images side by side.",
-    )
-    return parser.parse_args()
-def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
-    if len(im.shape) < 3:
-        im = im[:, :, np.newaxis]
-    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2, 0, 1)
-    im_tensor = F.interpolate(
-        torch.unsqueeze(im_tensor, 0), size=model_input_size, mode="bilinear"
-    ).type(torch.uint8)
-    image = torch.divide(im_tensor, 255.0)
-    return image
-def postprocess_image(result: torch.Tensor, im_size: list) -> np.ndarray:
-    result = torch.squeeze(F.interpolate(result, size=im_size, mode="bilinear"), 0)
-    ma = torch.max(result)
-    mi = torch.min(result)
-    result = (result - mi) / (ma - mi)
-    im_array = (result * 255).permute(1, 2, 0).cpu().data.numpy().astype(np.uint8)
-    im_array = np.squeeze(im_array)
-    return im_array
-def inference(args):
-    image_path = args.image
-    result_name = args.output
-    model_path = args.model_path
-    compare = args.compare
-    net = ORMBG()
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    if torch.cuda.is_available():
-        net.load_state_dict(torch.load(model_path))
-        net = net.cuda()
-    else:
-        net.load_state_dict(torch.load(model_path, map_location="cpu"))
-    net.eval()
-    model_input_size = [1024, 1024]
-    orig_im = io.imread(image_path)
-    orig_im_size = orig_im.shape[0:2]
-    image = preprocess_image(orig_im, model_input_size).to(device)
-    result = net(image)
-    # post process
-    result_image = postprocess_image(result[0][0], orig_im_size)
-    # save result
-    pil_im = Image.fromarray(result_image)
-    if pil_im.mode == "RGBA":
-        pil_im = pil_im.convert("RGB")
-    no_bg_image = Image.new("RGBA", pil_im.size, (0, 0, 0, 0))
-    orig_image = Image.open(image_path)
-    no_bg_image.paste(orig_image, mask=pil_im)
-    if compare:
-        combined_width = orig_image.width + no_bg_image.width
-        combined_image = Image.new("RGBA", (combined_width, orig_image.height))
-        combined_image.paste(orig_image, (0, 0))
-        combined_image.paste(no_bg_image, (orig_image.width, 0))
-        stacked_output_path = os.path.splitext(result_name)[0] + ".png"
-        combined_image.save(stacked_output_path)
-    else:
-        no_bg_image.save(result_name)
-if __name__ == "__main__":
-    inference(parse_args())

ormbg/models/ormbg.py DELETED Viewed

@@ -1,484 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-# https://github.com/xuebinqin/DIS/blob/main/IS-Net/models/isnet.py
-class REBNCONV(nn.Module):
-    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
-        super(REBNCONV, self).__init__()
-        self.conv_s1 = nn.Conv2d(
-            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
-        )
-        self.bn_s1 = nn.BatchNorm2d(out_ch)
-        self.relu_s1 = nn.ReLU(inplace=True)
-    def forward(self, x):
-        hx = x
-        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
-        return xout
-## upsample tensor 'src' to have the same spatial size with tensor 'tar'
-def _upsample_like(src, tar):
-    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear")
-    return src
-### RSU-7 ###
-class RSU7(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
-        super(RSU7, self).__init__()
-        self.in_ch = in_ch
-        self.mid_ch = mid_ch
-        self.out_ch = out_ch
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        b, c, h, w = x.shape
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx = self.pool5(hx5)
-        hx6 = self.rebnconv6(hx)
-        hx7 = self.rebnconv7(hx6)
-        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
-        hx6dup = _upsample_like(hx6d, hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-6 ###
-class RSU6(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU6, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx6 = self.rebnconv6(hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-5 ###
-class RSU5(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU5, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx5 = self.rebnconv5(hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-4 ###
-class RSU4(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-### RSU-4F ###
-class RSU4F(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4F, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
-        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
-        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
-        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx2 = self.rebnconv2(hx1)
-        hx3 = self.rebnconv3(hx2)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
-        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
-        return hx1d + hxin
-class myrebnconv(nn.Module):
-    def __init__(
-        self,
-        in_ch=3,
-        out_ch=1,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-        dilation=1,
-        groups=1,
-    ):
-        super(myrebnconv, self).__init__()
-        self.conv = nn.Conv2d(
-            in_ch,
-            out_ch,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_ch)
-        self.rl = nn.ReLU(inplace=True)
-    def forward(self, x):
-        return self.rl(self.bn(self.conv(x)))
-bce_loss = nn.BCELoss(size_average=True)
-class ORMBG(nn.Module):
-    def __init__(self, in_ch=3, out_ch=1):
-        super(ORMBG, self).__init__()
-        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
-        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage1 = RSU7(64, 32, 64)
-        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage2 = RSU6(64, 32, 128)
-        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage3 = RSU5(128, 64, 256)
-        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage4 = RSU4(256, 128, 512)
-        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage5 = RSU4F(512, 256, 512)
-        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage6 = RSU4F(512, 256, 512)
-        # decoder
-        self.stage5d = RSU4F(1024, 256, 512)
-        self.stage4d = RSU4(1024, 128, 256)
-        self.stage3d = RSU5(512, 64, 128)
-        self.stage2d = RSU6(256, 32, 64)
-        self.stage1d = RSU7(128, 16, 64)
-        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
-        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
-        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
-        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
-        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
-    def compute_loss(self, predictions, ground_truth):
-        loss0, loss = 0.0, 0.0
-        for i in range(0, len(predictions)):
-            loss = loss + bce_loss(predictions[i], ground_truth)
-            if i == 0:
-                loss0 = loss
-        return loss0, loss
-    def forward(self, x):
-        hx = x
-        hxin = self.conv_in(hx)
-        # hx = self.pool_in(hxin)
-        # stage 1
-        hx1 = self.stage1(hxin)
-        hx = self.pool12(hx1)
-        # stage 2
-        hx2 = self.stage2(hx)
-        hx = self.pool23(hx2)
-        # stage 3
-        hx3 = self.stage3(hx)
-        hx = self.pool34(hx3)
-        # stage 4
-        hx4 = self.stage4(hx)
-        hx = self.pool45(hx4)
-        # stage 5
-        hx5 = self.stage5(hx)
-        hx = self.pool56(hx5)
-        # stage 6
-        hx6 = self.stage6(hx)
-        hx6up = _upsample_like(hx6, hx5)
-        # -------------------- decoder --------------------
-        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
-        # side output
-        d1 = self.side1(hx1d)
-        d1 = _upsample_like(d1, x)
-        d2 = self.side2(hx2d)
-        d2 = _upsample_like(d2, x)
-        d3 = self.side3(hx3d)
-        d3 = _upsample_like(d3, x)
-        d4 = self.side4(hx4d)
-        d4 = _upsample_like(d4, x)
-        d5 = self.side5(hx5d)
-        d5 = _upsample_like(d5, x)
-        d6 = self.side6(hx6)
-        d6 = _upsample_like(d6, x)
-        return [
-            F.sigmoid(d1),
-            F.sigmoid(d2),
-            F.sigmoid(d3),
-            F.sigmoid(d4),
-            F.sigmoid(d5),
-            F.sigmoid(d6),
-        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

ormbg/train_model.py DELETED Viewed

@@ -1,474 +0,0 @@
-import os
-import time
-import torch, gc
-import torch.nn as nn
-import torch.optim as optim
-from torch.autograd import Variable
-import torch.nn.functional as F
-import numpy as np
-from pathlib import Path
-from models.ormbg import ORMBG
-from skimage import io
-from basics import f1_mae_torch
-from data_loader_cache import (
-    get_im_gt_name_dict,
-    create_dataloaders,
-    GOSGridDropout,
-    GOSRandomHFlip,
-)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-def valid(net, valid_dataloaders, valid_datasets, hypar, epoch=0):
-    net.eval()
-    print("Validating...")
-    epoch_num = hypar["max_epoch_num"]
-    val_loss = 0.0
-    tar_loss = 0.0
-    val_cnt = 0.0
-    tmp_f1 = []
-    tmp_mae = []
-    tmp_time = []
-    start_valid = time.time()
-    for k in range(len(valid_dataloaders)):
-        valid_dataloader = valid_dataloaders[k]
-        valid_dataset = valid_datasets[k]
-        val_num = valid_dataset.__len__()
-        mybins = np.arange(0, 256)
-        PRE = np.zeros((val_num, len(mybins) - 1))
-        REC = np.zeros((val_num, len(mybins) - 1))
-        F1 = np.zeros((val_num, len(mybins) - 1))
-        MAE = np.zeros((val_num))
-        for i_val, data_val in enumerate(valid_dataloader):
-            val_cnt = val_cnt + 1.0
-            imidx_val, inputs_val, labels_val, shapes_val = (
-                data_val["imidx"],
-                data_val["image"],
-                data_val["label"],
-                data_val["shape"],
-            )
-            if hypar["model_digit"] == "full":
-                inputs_val = inputs_val.type(torch.FloatTensor)
-                labels_val = labels_val.type(torch.FloatTensor)
-            else:
-                inputs_val = inputs_val.type(torch.HalfTensor)
-                labels_val = labels_val.type(torch.HalfTensor)
-            # wrap them in Variable
-            if torch.cuda.is_available():
-                inputs_val_v, labels_val_v = Variable(
-                    inputs_val.cuda(), requires_grad=False
-                ), Variable(labels_val.cuda(), requires_grad=False)
-            else:
-                inputs_val_v, labels_val_v = Variable(
-                    inputs_val, requires_grad=False
-                ), Variable(labels_val, requires_grad=False)
-            t_start = time.time()
-            ds_val = net(inputs_val_v)[0]
-            t_end = time.time() - t_start
-            tmp_time.append(t_end)
-            # loss2_val, loss_val = muti_loss_fusion(ds_val, labels_val_v)
-            loss2_val, loss_val = net.compute_loss(ds_val, labels_val_v)
-            # compute F measure
-            for t in range(hypar["batch_size_valid"]):
-                i_test = imidx_val[t].data.numpy()
-                pred_val = ds_val[0][t, :, :, :]  # B x 1 x H x W
-                ## recover the prediction spatial size to the orignal image size
-                pred_val = torch.squeeze(
-                    F.upsample(
-                        torch.unsqueeze(pred_val, 0),
-                        (shapes_val[t][0], shapes_val[t][1]),
-                        mode="bilinear",
-                    )
-                )
-                # pred_val = normPRED(pred_val)
-                ma = torch.max(pred_val)
-                mi = torch.min(pred_val)
-                pred_val = (pred_val - mi) / (ma - mi)  # max = 1
-                if len(valid_dataset.dataset["ori_gt_path"]) != 0:
-                    gt = np.squeeze(
-                        io.imread(valid_dataset.dataset["ori_gt_path"][i_test])
-                    )  # max = 255
-                    if gt.max() == 1:
-                        gt = gt * 255
-                else:
-                    gt = np.zeros((shapes_val[t][0], shapes_val[t][1]))
-                with torch.no_grad():
-                    gt = torch.tensor(gt).to(device)
-                pre, rec, f1, mae = f1_mae_torch(
-                    pred_val * 255, gt, valid_dataset, i_test, mybins, hypar
-                )
-                PRE[i_test, :] = pre
-                REC[i_test, :] = rec
-                F1[i_test, :] = f1
-                MAE[i_test] = mae
-                del ds_val, gt
-                gc.collect()
-                torch.cuda.empty_cache()
-            # if(loss_val.data[0]>1):
-            val_loss += loss_val.item()  # data[0]
-            tar_loss += loss2_val.item()  # data[0]
-            print(
-                "[validating: %5d/%5d] val_ls:%f, tar_ls: %f, f1: %f, mae: %f, time: %f"
-                % (
-                    i_val,
-                    val_num,
-                    val_loss / (i_val + 1),
-                    tar_loss / (i_val + 1),
-                    np.amax(F1[i_test, :]),
-                    MAE[i_test],
-                    t_end,
-                )
-            )
-            del loss2_val, loss_val
-        print("============================")
-        PRE_m = np.mean(PRE, 0)
-        REC_m = np.mean(REC, 0)
-        f1_m = (1 + 0.3) * PRE_m * REC_m / (0.3 * PRE_m + REC_m + 1e-8)
-        tmp_f1.append(np.amax(f1_m))
-        tmp_mae.append(np.mean(MAE))
-    return tmp_f1, tmp_mae, val_loss, tar_loss, i_val, tmp_time
-def train(
-    net,
-    optimizer,
-    train_dataloaders,
-    train_datasets,
-    valid_dataloaders,
-    valid_datasets,
-    hypar,
-):
-    model_path = hypar["model_path"]
-    model_save_fre = hypar["model_save_fre"]
-    max_ite = hypar["max_ite"]
-    batch_size_train = hypar["batch_size_train"]
-    batch_size_valid = hypar["batch_size_valid"]
-    if not os.path.exists(model_path):
-        os.mkdir(model_path)
-    ite_num = hypar["start_ite"]  # count the toal iteration number
-    ite_num4val = 0  #
-    running_loss = 0.0  # count the toal loss
-    running_tar_loss = 0.0  # count the target output loss
-    last_f1 = [0 for x in range(len(valid_dataloaders))]
-    train_num = train_datasets[0].__len__()
-    net.train()
-    start_last = time.time()
-    gos_dataloader = train_dataloaders[0]
-    epoch_num = hypar["max_epoch_num"]
-    notgood_cnt = 0
-    for epoch in range(epoch_num):
-        for i, data in enumerate(gos_dataloader):
-            if ite_num >= max_ite:
-                print("Training Reached the Maximal Iteration Number ", max_ite)
-                exit()
-            # start_read = time.time()
-            ite_num = ite_num + 1
-            ite_num4val = ite_num4val + 1
-            # get the inputs
-            inputs, labels = data["image"], data["label"]
-            if hypar["model_digit"] == "full":
-                inputs = inputs.type(torch.FloatTensor)
-                labels = labels.type(torch.FloatTensor)
-            else:
-                inputs = inputs.type(torch.HalfTensor)
-                labels = labels.type(torch.HalfTensor)
-            # wrap them in Variable
-            if torch.cuda.is_available():
-                inputs_v, labels_v = Variable(
-                    inputs.cuda(), requires_grad=False
-                ), Variable(labels.cuda(), requires_grad=False)
-            else:
-                inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(
-                    labels, requires_grad=False
-                )
-            # y zero the parameter gradients
-            start_inf_loss_back = time.time()
-            optimizer.zero_grad()
-            ds, _ = net(inputs_v)
-            loss2, loss = net.compute_loss(ds, labels_v)
-            loss.backward()
-            optimizer.step()
-            # # print statistics
-            running_loss += loss.item()
-            running_tar_loss += loss2.item()
-            # del outputs, loss
-            del ds, loss2, loss
-            end_inf_loss_back = time.time() - start_inf_loss_back
-            print(
-                ">>>"
-                + model_path.split("/")[-1]
-                + " - [epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f, time-per-iter: %3f s, time_read: %3f"
-                % (
-                    epoch + 1,
-                    epoch_num,
-                    (i + 1) * batch_size_train,
-                    train_num,
-                    ite_num,
-                    running_loss / ite_num4val,
-                    running_tar_loss / ite_num4val,
-                    time.time() - start_last,
-                    time.time() - start_last - end_inf_loss_back,
-                )
-            )
-            start_last = time.time()
-            if ite_num % model_save_fre == 0:  # validate every 2000 iterations
-                notgood_cnt += 1
-                net.eval()
-                tmp_f1, tmp_mae, val_loss, tar_loss, i_val, tmp_time = valid(
-                    net, valid_dataloaders, valid_datasets, hypar, epoch
-                )
-                net.train()  # resume train
-                tmp_out = 0
-                print("last_f1:", last_f1)
-                print("tmp_f1:", tmp_f1)
-                for fi in range(len(last_f1)):
-                    if tmp_f1[fi] > last_f1[fi]:
-                        tmp_out = 1
-                print("tmp_out:", tmp_out)
-                if tmp_out:
-                    notgood_cnt = 0
-                    last_f1 = tmp_f1
-                    tmp_f1_str = [str(round(f1x, 4)) for f1x in tmp_f1]
-                    tmp_mae_str = [str(round(mx, 4)) for mx in tmp_mae]
-                    maxf1 = "_".join(tmp_f1_str)
-                    meanM = "_".join(tmp_mae_str)
-                    # .cpu().detach().numpy()
-                    model_name = (
-                        "/gpu_itr_"
-                        + str(ite_num)
-                        + "_traLoss_"
-                        + str(np.round(running_loss / ite_num4val, 4))
-                        + "_traTarLoss_"
-                        + str(np.round(running_tar_loss / ite_num4val, 4))
-                        + "_valLoss_"
-                        + str(np.round(val_loss / (i_val + 1), 4))
-                        + "_valTarLoss_"
-                        + str(np.round(tar_loss / (i_val + 1), 4))
-                        + "_maxF1_"
-                        + maxf1
-                        + "_mae_"
-                        + meanM
-                        + "_time_"
-                        + str(
-                            np.round(np.mean(np.array(tmp_time)) / batch_size_valid, 6)
-                        )
-                        + ".pth"
-                    )
-                    torch.save(net.state_dict(), model_path + model_name)
-                running_loss = 0.0
-                running_tar_loss = 0.0
-                ite_num4val = 0
-                if notgood_cnt >= hypar["early_stop"]:
-                    print(
-                        "No improvements in the last "
-                        + str(notgood_cnt)
-                        + " validation periods, so training stopped !"
-                    )
-                    exit()
-    print("Training Reaches The Maximum Epoch Number")
-def main(train_datasets, valid_datasets, hypar):
-    print("--- create training dataloader ---")
-    train_nm_im_gt_list = get_im_gt_name_dict(train_datasets, flag="train")
-    ## build dataloader for training datasets
-    train_dataloaders, train_datasets = create_dataloaders(
-        train_nm_im_gt_list,
-        cache_size=hypar["cache_size"],
-        cache_boost=hypar["cache_boost_train"],
-        my_transforms=[GOSGridDropout(), GOSRandomHFlip()],
-        batch_size=hypar["batch_size_train"],
-        shuffle=True,
-    )
-    valid_nm_im_gt_list = get_im_gt_name_dict(valid_datasets, flag="valid")
-    valid_dataloaders, valid_datasets = create_dataloaders(
-        valid_nm_im_gt_list,
-        cache_size=hypar["cache_size"],
-        cache_boost=hypar["cache_boost_valid"],
-        my_transforms=[],
-        batch_size=hypar["batch_size_valid"],
-        shuffle=False,
-    )
-    net = hypar["model"]
-    if hypar["model_digit"] == "half":
-        net.half()
-        for layer in net.modules():
-            if isinstance(layer, nn.BatchNorm2d):
-                layer.float()
-    if torch.cuda.is_available():
-        net.cuda()
-    if hypar["restore_model"] != "":
-        print("restore model from:")
-        print(hypar["model_path"] + "/" + hypar["restore_model"])
-        if torch.cuda.is_available():
-            net.load_state_dict(
-                torch.load(hypar["model_path"] + "/" + hypar["restore_model"])
-            )
-        else:
-            net.load_state_dict(
-                torch.load(
-                    hypar["model_path"] + "/" + hypar["restore_model"],
-                    map_location="cpu",
-                )
-            )
-    optimizer = optim.Adam(
-        net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0
-    )
-    train(
-        net,
-        optimizer,
-        train_dataloaders,
-        train_datasets,
-        valid_dataloaders,
-        valid_datasets,
-        hypar,
-    )
-if __name__ == "__main__":
-    output_model_folder = "saved_models"
-    Path(output_model_folder).mkdir(parents=True, exist_ok=True)
-    train_datasets, valid_datasets = [], []
-    dataset_1, dataset_1 = {}, {}
-    dataset_training = {
-        "name": "ormbg-training",
-        "im_dir": str(Path("dataset", "training", "im")),
-        "gt_dir": str(Path("dataset", "training", "gt")),
-        "im_ext": ".png",
-        "gt_ext": ".png",
-        "cache_dir": str(Path("cache", "teacher", "training")),
-    }
-    dataset_validation = {
-        "name": "ormbg-training",
-        "im_dir": str(Path("dataset", "validation", "im")),
-        "gt_dir": str(Path("dataset", "validation", "gt")),
-        "im_ext": ".png",
-        "gt_ext": ".png",
-        "cache_dir": str(Path("cache", "teacher", "validation")),
-    }
-    train_datasets = [dataset_training]
-    valid_datasets = [dataset_validation]
-    ### --------------- STEP 2: Configuring the hyperparamters for Training, validation and inferencing ---------------
-    hypar = {}
-    hypar["model"] = ORMBG()
-    hypar["seed"] = 0
-    ## model weights path
-    hypar["model_path"] = "saved_models"
-    ## name of the segmentation model weights .pth for resume training process from last stop or for the inferencing
-    hypar["restore_model"] = ""
-    ## start iteration for the training, can be changed to match the restored training process
-    hypar["start_ite"] = 0
-    ## indicates "half" or "full" accuracy of float number
-    hypar["model_digit"] = "full"
-    ## To handle large size input images, which take a lot of time for loading in training,
-    #  we introduce the cache mechanism for pre-convering and resizing the jpg and png images into .pt file
-    hypar["cache_size"] = [
-        1024,
-        1024,
-    ]
-    ## cached input spatial resolution, can be configured into different size
-    ## "True" or "False", indicates wheather to load all the training datasets into RAM, True will greatly speed the training process while requires more RAM
-    hypar["cache_boost_train"] = False
-    ## "True" or "False", indicates wheather to load all the validation datasets into RAM, True will greatly speed the training process while requires more RAM
-    hypar["cache_boost_valid"] = False
-    ## stop the training when no improvement in the past 20 validation periods, smaller numbers can be used here e.g., 5 or 10.
-    hypar["early_stop"] = 20
-    ## valid and save model weights every 2000 iterations
-    hypar["model_save_fre"] = 2000
-    ## batch size for training
-    hypar["batch_size_train"] = 8
-    ## batch size for validation and inferencing
-    hypar["batch_size_valid"] = 1
-    ## if early stop couldn't stop the training process, stop it by the max_ite_num
-    hypar["max_ite"] = 10000000
-    ## if early stop and max_ite couldn't stop the training process, stop it by the max_epoch_num
-    hypar["max_epoch_num"] = 1000000
-    main(train_datasets, valid_datasets, hypar=hypar)

stack.py DELETED Viewed

@@ -1,37 +0,0 @@
-from PIL import Image
-def stack_images(image_paths, output_path):
-    # Load all images from the provided paths
-    images = [Image.open(path) for path in image_paths]
-    # Determine the size of individual images (assuming all are the same size)
-    width, height = images[0].size
-    # Create a new image with appropriate size (2 columns and 3 rows)
-    total_width = width * 2
-    total_height = height * 3
-    new_image = Image.new("RGB", (total_width, total_height))
-    # Paste each image into the new image
-    for i, image in enumerate(images):
-        # Calculate the position for each image
-        x_offset = (i % 2) * width
-        y_offset = (i // 2) * height
-        new_image.paste(image, (x_offset, y_offset))
-    # Save the new image
-    new_image.save(output_path)
-# Example usage
-image_paths = [
-    "/Users/mav/Desktop/example1.png",
-    "/Users/mav/Desktop/image-1.webp",
-    "/Users/mav/Desktop/example2.png",
-    "/Users/mav/Desktop/image-2.webp",
-    "/Users/mav/Desktop/example3.png",
-    "/Users/mav/Desktop/image-3.webp",
-]
-output_path = "stacked_images.jpg"
-stack_images(image_paths, output_path)

utils/.DS_Store DELETED Viewed

Binary file (6.15 kB)

utils/architecture.py DELETED Viewed

@@ -1,4 +0,0 @@
-from ormbg.models.ormbg import ORMBG
-if __name__ == "__main__":
-    print(ORMBG())

utils/loss_example.py DELETED Viewed

@@ -1,69 +0,0 @@
-import os
-import torch
-import argparse
-import numpy as np
-from skimage import io
-from ormbg.models.ormbg import ORMBG
-import torch.nn.functional as F
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Remove background from images using ORMBG model."
-    )
-    parser.add_argument(
-        "--prediction",
-        type=list,
-        default=[
-            os.path.join("examples", "loss", "loss01.png"),
-            os.path.join("examples", "loss", "loss02.png"),
-            os.path.join("examples", "loss", "loss03.png"),
-            os.path.join("examples", "loss", "loss04.png"),
-            os.path.join("examples", "loss", "loss05.png"),
-        ],
-        help="Path to the input image file.",
-    )
-    parser.add_argument(
-        "--gt",
-        type=str,
-        default=os.path.join("examples", "loss", "gt.png"),
-        help="Ground truth mask",
-    )
-    return parser.parse_args()
-def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
-    if len(im.shape) < 3:
-        im = im[:, :, np.newaxis]
-    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2, 0, 1)
-    im_tensor = F.interpolate(
-        torch.unsqueeze(im_tensor, 0), size=model_input_size, mode="bilinear"
-    ).type(torch.uint8)
-    image = torch.divide(im_tensor, 255.0)
-    return image
-def inference(args):
-    prediction_paths = args.prediction
-    gt_path = args.gt
-    net = ORMBG()
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    for pred_path in prediction_paths:
-        model_input_size = [1024, 1024]
-        loss = io.imread(pred_path)
-        prediction = preprocess_image(loss, model_input_size).to(device)
-        model_input_size = [1024, 1024]
-        gt = io.imread(gt_path)
-        ground_truth = preprocess_image(gt, model_input_size).to(device)
-        _, loss = net.compute_loss([prediction], ground_truth)
-        print(f"Loss: {pred_path} {loss}")
-if __name__ == "__main__":
-    inference(parse_args())

utils/pth_to_onnx.py DELETED Viewed

@@ -1,59 +0,0 @@
-import torch
-import argparse
-from ormbg.models.ormbg import ORMBG
-def export_to_onnx(model_path, onnx_path):
-    net = ORMBG()
-    if torch.cuda.is_available():
-        net.load_state_dict(torch.load(model_path))
-        net = net.cuda()
-    else:
-        net.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
-    net.eval()
-    # Create a dummy input tensor. The size should match the model's input size.
-    # Adjust the dimensions as necessary; here it is assumed the input is a 3-channel image.
-    dummy_input = torch.randn(
-        1,
-        3,
-        1024,
-        1024,
-        device="cuda" if torch.cuda.is_available() else "cpu",
-    )
-    torch.onnx.export(
-        net,
-        dummy_input,
-        onnx_path,
-        export_params=True,
-        opset_version=11,
-        do_constant_folding=True,
-        input_names=["input"],
-        output_names=["output"],
-    )
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Export a trained model to ONNX format."
-    )
-    parser.add_argument(
-        "--model_path",
-        type=str,
-        default="models/ormbg.pth",
-        help="The path to the trained model file.",
-    )
-    parser.add_argument(
-        "--onnx_path",
-        type=str,
-        default="models/ormbg.pth",
-        help="The path where the ONNX model will be saved.",
-    )
-    args = parser.parse_args()
-    export_to_onnx(args.model_path, args.onnx_path)