Spaces:
Build error
Build error
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- .gitignore +174 -0
- LICENSE +21 -0
- README.md +4 -9
- app.py +166 -0
- bird.jpeg +0 -0
- enhaned_kmeans_segmented.png +0 -0
- experiments/SegNet/architecture.py +87 -0
- experiments/SegNet/train.py +81 -0
- experiments/enhanced_kmeans_segmenter.py +100 -0
- experiments/felzenszwalb_segmentation/__init__.py +1 -0
- experiments/felzenszwalb_segmentation/disjoint_set.py +39 -0
- experiments/felzenszwalb_segmentation/segmentation.py +83 -0
- experiments/felzenszwalb_segmentation/utils/__init__.py +2 -0
- experiments/felzenszwalb_segmentation/utils/filter_utils.py +38 -0
- experiments/felzenszwalb_segmentation/utils/utils.py +25 -0
- experiments/kmeans_segmenter.py +95 -0
- experiments/otsu_segmenter.py +95 -0
- experiments/watershed_segmenter.py +111 -0
- kmeans_comparison.png +3 -0
- kmeans_segmented.png +0 -0
- requirements.txt +9 -0
- segnet_efficientnet_voc.pth +3 -0
- watershed_output.png +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
kmeans_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
watershed_output.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
|
| 110 |
+
# pdm
|
| 111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 112 |
+
#pdm.lock
|
| 113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 114 |
+
# in version control.
|
| 115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 116 |
+
.pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 121 |
+
__pypackages__/
|
| 122 |
+
|
| 123 |
+
# Celery stuff
|
| 124 |
+
celerybeat-schedule
|
| 125 |
+
celerybeat.pid
|
| 126 |
+
|
| 127 |
+
# SageMath parsed files
|
| 128 |
+
*.sage.py
|
| 129 |
+
|
| 130 |
+
# Environments
|
| 131 |
+
.env
|
| 132 |
+
.venv
|
| 133 |
+
env/
|
| 134 |
+
venv/
|
| 135 |
+
ENV/
|
| 136 |
+
env.bak/
|
| 137 |
+
venv.bak/
|
| 138 |
+
|
| 139 |
+
# Spyder project settings
|
| 140 |
+
.spyderproject
|
| 141 |
+
.spyproject
|
| 142 |
+
|
| 143 |
+
# Rope project settings
|
| 144 |
+
.ropeproject
|
| 145 |
+
|
| 146 |
+
# mkdocs documentation
|
| 147 |
+
/site
|
| 148 |
+
|
| 149 |
+
# mypy
|
| 150 |
+
.mypy_cache/
|
| 151 |
+
.dmypy.json
|
| 152 |
+
dmypy.json
|
| 153 |
+
|
| 154 |
+
# Pyre type checker
|
| 155 |
+
.pyre/
|
| 156 |
+
|
| 157 |
+
# pytype static type analyzer
|
| 158 |
+
.pytype/
|
| 159 |
+
|
| 160 |
+
# Cython debug symbols
|
| 161 |
+
cython_debug/
|
| 162 |
+
|
| 163 |
+
# PyCharm
|
| 164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 168 |
+
#.idea/
|
| 169 |
+
|
| 170 |
+
# Ruff stuff:
|
| 171 |
+
.ruff_cache/
|
| 172 |
+
|
| 173 |
+
# PyPI configuration file
|
| 174 |
+
.pypirc
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Akshat Jain
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,12 +1,7 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 📚
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: indigo
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.24.0
|
| 8 |
app_file: app.py
|
| 9 |
-
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Image_Segmentation_CV_Project
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
app_file: app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.23.1
|
| 6 |
---
|
| 7 |
+
# CSL7360_Project
|
|
|
app.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from torchvision import transforms
|
| 4 |
+
from experiments.otsu_segmenter import generate_segmented_image
|
| 5 |
+
from experiments.kmeans_segmenter import generate_kmeans_segmented_image
|
| 6 |
+
from experiments.enhanced_kmeans_segmenter import slic_kmeans
|
| 7 |
+
from experiments.watershed_segmenter import generate_watershed
|
| 8 |
+
from experiments.felzenszwalb_segmentation import segment
|
| 9 |
+
from experiments.SegNet.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from matplotlib import cm
|
| 13 |
+
|
| 14 |
+
def generate_kmeans(image_path,k):
|
| 15 |
+
kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
|
| 16 |
+
return kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text
|
| 17 |
+
|
| 18 |
+
def generate_slic(image_path,k,m,max_iter):
|
| 19 |
+
image,seg_img, labels, centers = slic_kmeans(image_path, K=k, m=m, max_iter=max_iter)
|
| 20 |
+
return image,seg_img
|
| 21 |
+
|
| 22 |
+
def generate_felzenszwalb(image_path, sigma, k, min_size_factor):
|
| 23 |
+
image = Image.open(image_path).convert("RGB")
|
| 24 |
+
image_np = np.array(image)
|
| 25 |
+
segments_fz = segment(image_np, sigma=sigma, k=k, min_size=min_size_factor)
|
| 26 |
+
segments_fz = segments_fz.astype(np.uint8)
|
| 27 |
+
|
| 28 |
+
return image, segments_fz
|
| 29 |
+
|
| 30 |
+
def SegNet_efficient_b0(image_path):
|
| 31 |
+
model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
|
| 32 |
+
model.load_state_dict(torch.load("segnet_efficientnet_voc.pth", map_location=DEVICE))
|
| 33 |
+
model.eval()
|
| 34 |
+
transform = transforms.Compose([
|
| 35 |
+
transforms.Resize(IMAGE_SIZE),
|
| 36 |
+
transforms.ToTensor(),
|
| 37 |
+
transforms.Normalize([0.485, 0.456, 0.406],
|
| 38 |
+
[0.229, 0.224, 0.225])
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
image = Image.open(image_path).convert("RGB")
|
| 42 |
+
input_tensor = transform(image).unsqueeze(0).to(DEVICE)
|
| 43 |
+
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
output = model(input_tensor)
|
| 46 |
+
pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
|
| 47 |
+
|
| 48 |
+
# Convert original image for Gradio display
|
| 49 |
+
original_image_resized = image.resize(IMAGE_SIZE)
|
| 50 |
+
|
| 51 |
+
# Convert predicted mask to a color image using a colormap
|
| 52 |
+
colormap = cm.get_cmap('nipy_spectral')
|
| 53 |
+
colored_mask = colormap(pred_mask / pred_mask.max()) # Normalize
|
| 54 |
+
colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8) # Drop alpha and convert to uint8
|
| 55 |
+
mask_pil = Image.fromarray(colored_mask)
|
| 56 |
+
|
| 57 |
+
return original_image_resized, mask_pil
|
| 58 |
+
|
| 59 |
+
with gr.Blocks() as demo:
|
| 60 |
+
gr.Markdown("# Image Segmentation using Classical CV")
|
| 61 |
+
|
| 62 |
+
with gr.Tabs() as tabs:
|
| 63 |
+
with gr.TabItem("Otsu's Method"):
|
| 64 |
+
with gr.Row():
|
| 65 |
+
with gr.Column(scale=1):
|
| 66 |
+
file_input = gr.File(label="Upload Image File")
|
| 67 |
+
display_btn = gr.Button("Segment this image")
|
| 68 |
+
threshold_text = gr.Textbox(label="Threshold Comparison", value="", interactive=False)
|
| 69 |
+
|
| 70 |
+
with gr.Column(scale=2):
|
| 71 |
+
image_output = gr.Image(label="Original Image", container=False)
|
| 72 |
+
histogram_output = gr.Image(label="Histogram", container=False)
|
| 73 |
+
segmented_image_output = gr.Image(label="Our Segmented Image", container=False)
|
| 74 |
+
opencv_segmented_image_output = gr.Image(label="OpenCV Segmented Image", container=False)
|
| 75 |
+
display_btn.click(
|
| 76 |
+
fn=generate_segmented_image,
|
| 77 |
+
inputs=file_input,
|
| 78 |
+
outputs=[image_output, segmented_image_output, opencv_segmented_image_output, histogram_output, threshold_text]
|
| 79 |
+
)
|
| 80 |
+
with gr.TabItem("K-means Segmentation"):
|
| 81 |
+
with gr.Row():
|
| 82 |
+
with gr.Column(scale=1):
|
| 83 |
+
kmeans_file_input = gr.File(label="Upload Image File")
|
| 84 |
+
kmeans_k_value = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (K)")
|
| 85 |
+
kmeans_display_btn = gr.Button("Segment this image")
|
| 86 |
+
kmeans_threshold_text = gr.Textbox(label="K-means Info", value="", interactive=False)
|
| 87 |
+
|
| 88 |
+
with gr.Column(scale=2):
|
| 89 |
+
kmeans_image_output = gr.Image(label="Original Image", container=False)
|
| 90 |
+
kmeans_segmented_image_output = gr.Image(label="K-means Segmented Image", container=False)
|
| 91 |
+
|
| 92 |
+
kmeans_display_btn.click(
|
| 93 |
+
fn=generate_kmeans,
|
| 94 |
+
inputs=[kmeans_file_input, kmeans_k_value],
|
| 95 |
+
outputs=[kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text]
|
| 96 |
+
)
|
| 97 |
+
with gr.TabItem("SLIC Segmentation"):
|
| 98 |
+
with gr.Row():
|
| 99 |
+
with gr.Column(scale=1):
|
| 100 |
+
slic_file_input = gr.File(label="Upload Image File")
|
| 101 |
+
slic_k_value = gr.Slider(minimum=2, maximum=200, value=3, step=1, label="Number of superpixels")
|
| 102 |
+
slic_m_value = gr.Slider(minimum=1, maximum=40, value=3, step=1, label="Compactness factor")
|
| 103 |
+
slic_max_iter_value = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of iterations")
|
| 104 |
+
slic_display_btn = gr.Button("Segment this image")
|
| 105 |
+
|
| 106 |
+
with gr.Column(scale=2):
|
| 107 |
+
slic_image_output = gr.Image(label="Original Image", container=False)
|
| 108 |
+
slic_segmented_image_output = gr.Image(label="SLIC Segmented Image", container=False)
|
| 109 |
+
|
| 110 |
+
slic_display_btn.click(
|
| 111 |
+
fn=generate_slic,
|
| 112 |
+
inputs=[slic_file_input, slic_k_value,slic_m_value,slic_max_iter_value],
|
| 113 |
+
outputs=[slic_image_output,slic_segmented_image_output]
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
with gr.TabItem("Watershed Algorithm Segmentation"):
|
| 117 |
+
with gr.Row():
|
| 118 |
+
with gr.Column(scale=1):
|
| 119 |
+
watershed_file_input = gr.File(label="Upload Image File")
|
| 120 |
+
watershed_display_btn = gr.Button("Segment this image")
|
| 121 |
+
|
| 122 |
+
with gr.Column(scale=2):
|
| 123 |
+
watershed_image_output = gr.Image(label="Original Image", container=False)
|
| 124 |
+
watershed_segmented_image_output = gr.Image(label="watershed Segmented Image", container=False)
|
| 125 |
+
|
| 126 |
+
watershed_display_btn.click(
|
| 127 |
+
fn=generate_watershed,
|
| 128 |
+
inputs=[watershed_file_input],
|
| 129 |
+
outputs=[watershed_image_output,watershed_segmented_image_output]
|
| 130 |
+
)
|
| 131 |
+
with gr.TabItem("Felzenszwalb Algorithm Segmentation"):
|
| 132 |
+
with gr.Row():
|
| 133 |
+
with gr.Column(scale=1):
|
| 134 |
+
felzenszwalb_file_input = gr.File(label="Upload Image File")
|
| 135 |
+
sigma_value = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="Sigma")
|
| 136 |
+
K_value = gr.Slider(minimum=2, maximum=1000, value=2, step=1, label="K value")
|
| 137 |
+
min_size_value = gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Min Size Factor")
|
| 138 |
+
felzenszwalb_display_btn = gr.Button("Segment this image")
|
| 139 |
+
|
| 140 |
+
with gr.Column(scale=2):
|
| 141 |
+
felzenszwalb_image_output = gr.Image(label="Original Image", container=False)
|
| 142 |
+
felzenszwalb_segmented_image_output = gr.Image(label="felzenszwalb Segmented Image", container=False)
|
| 143 |
+
|
| 144 |
+
felzenszwalb_display_btn.click(
|
| 145 |
+
fn=generate_felzenszwalb,
|
| 146 |
+
inputs=[felzenszwalb_file_input,sigma_value,K_value,min_size_value],
|
| 147 |
+
outputs=[felzenszwalb_image_output,felzenszwalb_segmented_image_output]
|
| 148 |
+
)
|
| 149 |
+
with gr.TabItem("SegNet EfficientNet B0 Segmentation"):
|
| 150 |
+
with gr.Row():
|
| 151 |
+
with gr.Column(scale=1):
|
| 152 |
+
segnet_file_input = gr.File(label="Upload Image File")
|
| 153 |
+
segnet_display_btn = gr.Button("Segment this image")
|
| 154 |
+
|
| 155 |
+
with gr.Column(scale=2):
|
| 156 |
+
segnet_image_output = gr.Image(label="Original Image", container=False)
|
| 157 |
+
segnet_segmented_image_output = gr.Image(label="SegNet Segmented Image", container=False)
|
| 158 |
+
|
| 159 |
+
segnet_display_btn.click(
|
| 160 |
+
fn=SegNet_efficient_b0,
|
| 161 |
+
inputs=[segnet_file_input],
|
| 162 |
+
outputs=[segnet_image_output,segnet_segmented_image_output]
|
| 163 |
+
)
|
| 164 |
+
if __name__ == "__main__":
|
| 165 |
+
demo.launch(server_name="172.31.100.127")
|
| 166 |
+
|
bird.jpeg
ADDED
|
enhaned_kmeans_segmented.png
ADDED
|
experiments/SegNet/architecture.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from torchvision import models, transforms
|
| 5 |
+
from torchvision.datasets import VOCSegmentation
|
| 6 |
+
from torch.utils.data import DataLoader
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import numpy as np
|
| 9 |
+
import wandb
|
| 10 |
+
import os
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
|
| 13 |
+
torch.manual_seed(42)
|
| 14 |
+
np.random.seed(42)
|
| 15 |
+
|
| 16 |
+
# wandb.login(key="your_wandb_api_key_here")
|
| 17 |
+
|
| 18 |
+
EPOCHS = 25
|
| 19 |
+
BATCH_SIZE = 8
|
| 20 |
+
LR = 1e-3
|
| 21 |
+
NUM_CLASSES = 21 # Pascal VOC has 21 classes including background
|
| 22 |
+
IMAGE_SIZE = (256, 256)
|
| 23 |
+
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 24 |
+
|
| 25 |
+
# wandb.init(project="segnet-efficientnet-voc", config={
|
| 26 |
+
# "epochs": EPOCHS,
|
| 27 |
+
# "batch_size": BATCH_SIZE,
|
| 28 |
+
# "learning_rate": LR,
|
| 29 |
+
# "architecture": "SegNet-EfficientNet",
|
| 30 |
+
# "dataset": "PascalVOC2012"
|
| 31 |
+
# })
|
| 32 |
+
|
| 33 |
+
class SegNetEfficientNet(nn.Module):
|
| 34 |
+
def __init__(self, num_classes):
|
| 35 |
+
super(SegNetEfficientNet, self).__init__()
|
| 36 |
+
base_model = models.efficientnet_b0(pretrained=True)
|
| 37 |
+
features = list(base_model.features.children())
|
| 38 |
+
|
| 39 |
+
# Encoder: Use EfficientNet blocks
|
| 40 |
+
self.encoder = nn.Sequential(*features)
|
| 41 |
+
|
| 42 |
+
# Decoder: Up-convolutions
|
| 43 |
+
self.decoder = nn.Sequential(
|
| 44 |
+
nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
|
| 45 |
+
nn.ReLU(inplace=True),
|
| 46 |
+
nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
|
| 47 |
+
nn.ReLU(inplace=True),
|
| 48 |
+
nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
|
| 49 |
+
nn.ReLU(inplace=True),
|
| 50 |
+
nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
|
| 51 |
+
nn.ReLU(inplace=True),
|
| 52 |
+
nn.ConvTranspose2d(64, num_classes, kernel_size=1)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
def forward(self, x):
|
| 56 |
+
x = self.encoder(x)
|
| 57 |
+
x = self.decoder(x)
|
| 58 |
+
x = F.interpolate(x, size=IMAGE_SIZE, mode='bilinear', align_corners=False)
|
| 59 |
+
return x
|
| 60 |
+
|
| 61 |
+
class VOCSegmentationDataset(VOCSegmentation):
|
| 62 |
+
def __init__(self, root, image_set='train', transform=None, target_transform=None):
|
| 63 |
+
super().__init__(root=root, year='2012', image_set=image_set, download=True)
|
| 64 |
+
self.transform = transform
|
| 65 |
+
self.target_transform = target_transform
|
| 66 |
+
|
| 67 |
+
def __getitem__(self, index):
|
| 68 |
+
img, target = super().__getitem__(index)
|
| 69 |
+
if self.transform:
|
| 70 |
+
img = self.transform(img)
|
| 71 |
+
if self.target_transform:
|
| 72 |
+
target = self.target_transform(target)
|
| 73 |
+
target = torch.as_tensor(np.array(target), dtype=torch.long)
|
| 74 |
+
return img, target
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
image_transform = transforms.Compose([
|
| 77 |
+
transforms.Resize(IMAGE_SIZE),
|
| 78 |
+
transforms.ToTensor(),
|
| 79 |
+
transforms.Normalize([0.485, 0.456, 0.406],
|
| 80 |
+
[0.229, 0.224, 0.225])
|
| 81 |
+
])
|
| 82 |
+
mask_transform = transforms.Resize(IMAGE_SIZE, interpolation=Image.NEAREST)
|
| 83 |
+
|
| 84 |
+
train_dataset = VOCSegmentationDataset("voc_data", 'train', image_transform, mask_transform)
|
| 85 |
+
val_dataset = VOCSegmentationDataset("voc_data", 'val', image_transform, mask_transform)
|
| 86 |
+
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
|
| 87 |
+
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
|
experiments/SegNet/train.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from torchvision import models, transforms
|
| 5 |
+
from torchvision.datasets import VOCSegmentation
|
| 6 |
+
from torch.utils.data import DataLoader
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import numpy as np
|
| 9 |
+
import wandb
|
| 10 |
+
import os
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
|
| 16 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
|
| 17 |
+
criterion = nn.CrossEntropyLoss(ignore_index=255)
|
| 18 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 19 |
+
|
| 20 |
+
def pixel_accuracy(preds, labels):
|
| 21 |
+
_, preds = torch.max(preds, 1)
|
| 22 |
+
correct = (preds == labels).float()
|
| 23 |
+
acc = correct.sum() / correct.numel()
|
| 24 |
+
return acc
|
| 25 |
+
|
| 26 |
+
# def mean_iou(preds, labels, num_classes=NUM_CLASSES):
|
| 27 |
+
# _, preds = torch.max(preds, 1)
|
| 28 |
+
# ious = []
|
| 29 |
+
# for cls in range(num_classes):
|
| 30 |
+
# intersection = ((preds == cls) & (labels == cls)).float().sum()
|
| 31 |
+
# union = ((preds == cls) | (labels == cls)).float().sum()
|
| 32 |
+
# if union > 0:
|
| 33 |
+
# ious.append(intersection / union)
|
| 34 |
+
# return sum(ious) / len(ious) if ious else 0
|
| 35 |
+
|
| 36 |
+
for epoch in tqdm(range(EPOCHS)):
|
| 37 |
+
model.train()
|
| 38 |
+
train_loss, train_acc = 0.0, 0.0
|
| 39 |
+
|
| 40 |
+
for images, masks in train_loader:
|
| 41 |
+
images, masks = images.to(DEVICE), masks.to(DEVICE)
|
| 42 |
+
optimizer.zero_grad()
|
| 43 |
+
outputs = model(images)
|
| 44 |
+
loss = criterion(outputs, masks)
|
| 45 |
+
loss.backward()
|
| 46 |
+
optimizer.step()
|
| 47 |
+
|
| 48 |
+
train_loss += loss.item()
|
| 49 |
+
train_acc += pixel_accuracy(outputs, masks).item()
|
| 50 |
+
|
| 51 |
+
train_loss /= len(train_loader)
|
| 52 |
+
train_acc /= len(train_loader)
|
| 53 |
+
|
| 54 |
+
# Validation
|
| 55 |
+
model.eval()
|
| 56 |
+
val_loss, val_acc = 0.0, 0.0
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
for images, masks in val_loader:
|
| 59 |
+
images, masks = images.to(DEVICE), masks.to(DEVICE)
|
| 60 |
+
outputs = model(images)
|
| 61 |
+
loss = criterion(outputs, masks)
|
| 62 |
+
|
| 63 |
+
val_loss += loss.item()
|
| 64 |
+
val_acc += pixel_accuracy(outputs, masks).item()
|
| 65 |
+
|
| 66 |
+
val_loss /= len(val_loader)
|
| 67 |
+
val_acc /= len(val_loader)
|
| 68 |
+
|
| 69 |
+
# wandb.log({
|
| 70 |
+
# "epoch": epoch + 1,
|
| 71 |
+
# "train_loss": train_loss,
|
| 72 |
+
# "train_accuracy": train_acc,
|
| 73 |
+
# "val_loss": val_loss,
|
| 74 |
+
# "val_accuracy": val_acc
|
| 75 |
+
# })
|
| 76 |
+
|
| 77 |
+
print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
|
| 78 |
+
|
| 79 |
+
torch.save(model.state_dict(), "segnet_efficientnet_voc.pth")
|
| 80 |
+
# wandb.finish()
|
| 81 |
+
|
experiments/enhanced_kmeans_segmenter.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
def slic_kmeans(image_path, K=100, m=10, max_iter=10):
|
| 8 |
+
"""
|
| 9 |
+
Perform superpixel segmentation using enhanced K-means with LAB+XY.
|
| 10 |
+
Args:
|
| 11 |
+
image (np.ndarray): RGB input image.
|
| 12 |
+
K (int): Number of superpixels.
|
| 13 |
+
m (float): Compactness factor.
|
| 14 |
+
max_iter (int): Number of iterations.
|
| 15 |
+
Returns:
|
| 16 |
+
segmented_img: The segmented image with cluster colors.
|
| 17 |
+
labels: Cluster label for each pixel.
|
| 18 |
+
"""
|
| 19 |
+
jpg_image = Image.open(image_path)
|
| 20 |
+
image = np.array(jpg_image)
|
| 21 |
+
h, w = image.shape[:2]
|
| 22 |
+
S = int(np.sqrt(h * w / K)) # grid interval
|
| 23 |
+
|
| 24 |
+
# Convert to LAB color space
|
| 25 |
+
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB).astype(np.float32)
|
| 26 |
+
|
| 27 |
+
# Create 5D feature vector [L, a, b, x, y]
|
| 28 |
+
X, Y = np.meshgrid(np.arange(w), np.arange(h))
|
| 29 |
+
features = np.dstack((lab, X, Y)).reshape((-1, 5))
|
| 30 |
+
|
| 31 |
+
# Initialize cluster centers on grid
|
| 32 |
+
centers = []
|
| 33 |
+
for y in range(S // 2, h, S):
|
| 34 |
+
for x in range(S // 2, w, S):
|
| 35 |
+
center = features[y * w + x]
|
| 36 |
+
centers.append(center)
|
| 37 |
+
centers = np.array(centers)
|
| 38 |
+
|
| 39 |
+
labels = np.full((h * w,), -1, dtype=np.int32)
|
| 40 |
+
distances = np.full((h * w,), np.inf)
|
| 41 |
+
|
| 42 |
+
for iteration in tqdm(range(max_iter)):
|
| 43 |
+
for idx, center in enumerate(centers):
|
| 44 |
+
l, a, b, cx, cy = center
|
| 45 |
+
x_start, x_end = max(0, int(cx - S)), min(w, int(cx + S))
|
| 46 |
+
y_start, y_end = max(0, int(cy - S)), min(h, int(cy + S))
|
| 47 |
+
|
| 48 |
+
for y in range(y_start, y_end):
|
| 49 |
+
for x in range(x_start, x_end):
|
| 50 |
+
i = y * w + x
|
| 51 |
+
fp = features[i]
|
| 52 |
+
dc = np.linalg.norm(fp[:3] - center[:3]) # LAB distance
|
| 53 |
+
ds = np.linalg.norm(fp[3:] - center[3:]) # XY distance
|
| 54 |
+
D = np.sqrt(dc**2 + (ds / S)**2 * m**2)
|
| 55 |
+
|
| 56 |
+
if D < distances[i]:
|
| 57 |
+
distances[i] = D
|
| 58 |
+
labels[i] = idx
|
| 59 |
+
|
| 60 |
+
# Update cluster centers
|
| 61 |
+
new_centers = np.zeros_like(centers)
|
| 62 |
+
count = np.zeros(len(centers))
|
| 63 |
+
for i in range(h * w):
|
| 64 |
+
lbl = labels[i]
|
| 65 |
+
new_centers[lbl] += features[i]
|
| 66 |
+
count[lbl] += 1
|
| 67 |
+
for i in range(len(centers)):
|
| 68 |
+
if count[i] > 0:
|
| 69 |
+
new_centers[i] /= count[i]
|
| 70 |
+
centers = new_centers
|
| 71 |
+
|
| 72 |
+
# Recolor image based on cluster centers
|
| 73 |
+
segmented_img = np.zeros((h, w, 3), dtype=np.uint8)
|
| 74 |
+
for i in range(h * w):
|
| 75 |
+
lbl = labels[i]
|
| 76 |
+
lab_val = centers[lbl][:3]
|
| 77 |
+
lab_pixel = np.uint8([[lab_val]])
|
| 78 |
+
rgb_pixel = cv2.cvtColor(lab_pixel, cv2.COLOR_LAB2RGB)[0][0]
|
| 79 |
+
segmented_img[i // w, i % w] = rgb_pixel
|
| 80 |
+
|
| 81 |
+
return jpg_image, Image.fromarray(segmented_img), labels.reshape((h, w)), centers
|
| 82 |
+
|
| 83 |
+
# img_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
|
| 84 |
+
# image = cv2.imread(img_path)
|
| 85 |
+
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 86 |
+
|
| 87 |
+
# _,seg_img, labels, centers = slic_kmeans(image, K=2, m=20)
|
| 88 |
+
# seg_img.save("enhaned_kmeans_segmented.png")
|
| 89 |
+
# plt.figure(figsize=(10, 5))
|
| 90 |
+
# plt.subplot(1, 2, 1)
|
| 91 |
+
# plt.imshow(image)
|
| 92 |
+
# plt.title("Original Image")
|
| 93 |
+
# plt.axis("off")
|
| 94 |
+
|
| 95 |
+
# plt.subplot(1, 2, 2)
|
| 96 |
+
# plt.imshow(seg_img)
|
| 97 |
+
# plt.title("SLIC-like K-Means Segmentation")
|
| 98 |
+
# plt.axis("off")
|
| 99 |
+
# plt.tight_layout()
|
| 100 |
+
# plt.show()
|
experiments/felzenszwalb_segmentation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .segmentation import segment
|
experiments/felzenszwalb_segmentation/disjoint_set.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class DisjointSet:
|
| 5 |
+
|
| 6 |
+
def __init__(self, n_elements):
|
| 7 |
+
self.num = n_elements
|
| 8 |
+
self.elements = np.empty(
|
| 9 |
+
shape=(n_elements, 3),
|
| 10 |
+
dtype=int
|
| 11 |
+
)
|
| 12 |
+
for i in range(n_elements):
|
| 13 |
+
self.elements[i, 0] = 0
|
| 14 |
+
self.elements[i, 1] = 1
|
| 15 |
+
self.elements[i, 2] = i
|
| 16 |
+
|
| 17 |
+
def size(self, x):
|
| 18 |
+
return self.elements[x, 1]
|
| 19 |
+
|
| 20 |
+
def num_sets(self):
|
| 21 |
+
return self.num
|
| 22 |
+
|
| 23 |
+
def find(self, x):
|
| 24 |
+
y = int(x)
|
| 25 |
+
while y != self.elements[y, 2]:
|
| 26 |
+
y = self.elements[y, 2]
|
| 27 |
+
self.elements[x, 2] = y
|
| 28 |
+
return y
|
| 29 |
+
|
| 30 |
+
def join(self, x, y):
|
| 31 |
+
if self.elements[x, 0] > self.elements[y, 0]:
|
| 32 |
+
self.elements[y, 2] = x
|
| 33 |
+
self.elements[x, 1] += self.elements[y, 1]
|
| 34 |
+
else:
|
| 35 |
+
self.elements[x, 2] = y
|
| 36 |
+
self.elements[y, 1] += self.elements[x, 1]
|
| 37 |
+
if self.elements[x, 0] == self.elements[y, 0]:
|
| 38 |
+
self.elements[y, 0] += 1
|
| 39 |
+
self.num -= 1
|
experiments/felzenszwalb_segmentation/segmentation.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from .disjoint_set import DisjointSet
|
| 3 |
+
from .utils import smoothen, difference, get_random_rgb_image
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def segment_graph(num_vertices, num_edges, edges, c):
|
| 7 |
+
edges[0 : num_edges, :] = edges[edges[0 : num_edges, 2].argsort()]
|
| 8 |
+
u = DisjointSet(num_vertices)
|
| 9 |
+
threshold = np.zeros(shape=num_vertices, dtype=float)
|
| 10 |
+
for i in range(num_vertices):
|
| 11 |
+
threshold[i] = c
|
| 12 |
+
for i in range(num_edges):
|
| 13 |
+
pedge = edges[i, :]
|
| 14 |
+
a = u.find(pedge[0])
|
| 15 |
+
b = u.find(pedge[1])
|
| 16 |
+
if a != b:
|
| 17 |
+
if (pedge[2] <= threshold[a]) and (pedge[2] <= threshold[b]):
|
| 18 |
+
u.join(a, b)
|
| 19 |
+
a = u.find(a)
|
| 20 |
+
threshold[a] = pedge[2] + (c / u.size(a))
|
| 21 |
+
return u
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def segment(in_image, sigma, k, min_size):
|
| 25 |
+
height, width, band = in_image.shape
|
| 26 |
+
smooth_red_band = smoothen(in_image[:, :, 0], sigma)
|
| 27 |
+
smooth_green_band = smoothen(in_image[:, :, 1], sigma)
|
| 28 |
+
smooth_blue_band = smoothen(in_image[:, :, 2], sigma)
|
| 29 |
+
# build graph
|
| 30 |
+
edges_size = width * height * 4
|
| 31 |
+
edges = np.zeros(shape=(edges_size, 3), dtype=object)
|
| 32 |
+
num = 0
|
| 33 |
+
for y in range(height):
|
| 34 |
+
for x in range(width):
|
| 35 |
+
if x < width - 1:
|
| 36 |
+
edges[num, 0] = int(y * width + x)
|
| 37 |
+
edges[num, 1] = int(y * width + (x + 1))
|
| 38 |
+
edges[num, 2] = difference(
|
| 39 |
+
smooth_red_band, smooth_green_band,
|
| 40 |
+
smooth_blue_band, x, y, x + 1, y
|
| 41 |
+
)
|
| 42 |
+
num += 1
|
| 43 |
+
if y < height - 1:
|
| 44 |
+
edges[num, 0] = int(y * width + x)
|
| 45 |
+
edges[num, 1] = int((y + 1) * width + x)
|
| 46 |
+
edges[num, 2] = difference(
|
| 47 |
+
smooth_red_band, smooth_green_band,
|
| 48 |
+
smooth_blue_band, x, y, x, y + 1
|
| 49 |
+
)
|
| 50 |
+
num += 1
|
| 51 |
+
if (x < width - 1) and (y < height - 2):
|
| 52 |
+
edges[num, 0] = int(y * width + x)
|
| 53 |
+
edges[num, 1] = int((y + 1) * width + (x + 1))
|
| 54 |
+
edges[num, 2] = difference(
|
| 55 |
+
smooth_red_band, smooth_green_band,
|
| 56 |
+
smooth_blue_band, x, y, x + 1, y + 1
|
| 57 |
+
)
|
| 58 |
+
num += 1
|
| 59 |
+
if (x < width - 1) and (y > 0):
|
| 60 |
+
edges[num, 0] = int(y * width + x)
|
| 61 |
+
edges[num, 1] = int((y - 1) * width + (x + 1))
|
| 62 |
+
edges[num, 2] = difference(
|
| 63 |
+
smooth_red_band, smooth_green_band,
|
| 64 |
+
smooth_blue_band, x, y, x + 1, y - 1
|
| 65 |
+
)
|
| 66 |
+
num += 1
|
| 67 |
+
u = segment_graph(width * height, num, edges, k)
|
| 68 |
+
for i in range(num):
|
| 69 |
+
a = u.find(edges[i, 0])
|
| 70 |
+
b = u.find(edges[i, 1])
|
| 71 |
+
if (a != b) and ((u.size(a) < min_size) or (u.size(b) < min_size)):
|
| 72 |
+
u.join(a, b)
|
| 73 |
+
num_cc = u.num_sets()
|
| 74 |
+
output = np.zeros(shape=(height, width, 3))
|
| 75 |
+
|
| 76 |
+
colors = np.zeros(shape=(height * width, 3))
|
| 77 |
+
for i in range(height * width):
|
| 78 |
+
colors[i, :] = get_random_rgb_image()
|
| 79 |
+
for y in range(height):
|
| 80 |
+
for x in range(width):
|
| 81 |
+
comp = u.find(y * width + x)
|
| 82 |
+
output[y, x, :] = colors[comp, :]
|
| 83 |
+
return output
|
experiments/felzenszwalb_segmentation/utils/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .utils import *
|
| 2 |
+
from .filter_utils import *
|
experiments/felzenszwalb_segmentation/utils/filter_utils.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from math import ceil, exp, pow
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def convolve(src, mask):
|
| 6 |
+
output = np.zeros(shape=src.shape, dtype=float)
|
| 7 |
+
height, width = src.shape
|
| 8 |
+
length = len(mask)
|
| 9 |
+
for y in range(height):
|
| 10 |
+
for x in range(width):
|
| 11 |
+
sum = float(mask[0] * src[y, x])
|
| 12 |
+
for i in range(1, length):
|
| 13 |
+
sum += mask[i] * (
|
| 14 |
+
src[y, max(x - i, 0)] + src[y, min(x + i, width - 1)])
|
| 15 |
+
output[y, x] = sum
|
| 16 |
+
return output
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def normalize(mask):
|
| 20 |
+
sum = 2 * np.sum(np.absolute(mask)) + abs(mask[0])
|
| 21 |
+
return np.divide(mask, sum)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def smoothen(src, sigma):
|
| 25 |
+
mask = make_gaussian_filter(sigma)
|
| 26 |
+
mask = normalize(mask)
|
| 27 |
+
tmp = convolve(src, mask)
|
| 28 |
+
dst = convolve(tmp, mask)
|
| 29 |
+
return dst
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def make_gaussian_filter(sigma):
|
| 33 |
+
sigma = max(sigma, 0.01)
|
| 34 |
+
length = int(ceil(sigma * 4.0)) + 1
|
| 35 |
+
mask = np.zeros(shape=length, dtype=float)
|
| 36 |
+
for i in range(length):
|
| 37 |
+
mask[i] = exp(-0.5 * pow(i / sigma, i / sigma))
|
| 38 |
+
return mask
|
experiments/felzenszwalb_segmentation/utils/utils.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from math import sqrt
|
| 3 |
+
from random import randint
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def difference(red_band, green_band, blue_band, x1, y1, x2, y2):
|
| 7 |
+
return sqrt(
|
| 8 |
+
(red_band[y1, x1] - red_band[y2, x2]) ** 2 +\
|
| 9 |
+
(green_band[y1, x1] - green_band[y2, x2]) ** 2 +\
|
| 10 |
+
(blue_band[y1, x1] - blue_band[y2, x2]) ** 2
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_random_rgb_image():
|
| 15 |
+
rgb = np.zeros(3, dtype=int)
|
| 16 |
+
rgb[0] = randint(0, 255)
|
| 17 |
+
rgb[1] = randint(0, 255)
|
| 18 |
+
rgb[2] = randint(0, 255)
|
| 19 |
+
return rgb
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_random_gray_image():
|
| 23 |
+
gray = np.zeros(1, dtype=int)
|
| 24 |
+
gray[0] = randint(0, 255)
|
| 25 |
+
return gray
|
experiments/kmeans_segmenter.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import cv2
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
def initialize_centroids(data, K):
|
| 8 |
+
"""Randomly choose K data points as initial centroids."""
|
| 9 |
+
indices = np.random.choice(data.shape[0], K, replace=False)
|
| 10 |
+
return data[indices]
|
| 11 |
+
|
| 12 |
+
def compute_distances(data, centroids):
|
| 13 |
+
"""Compute the Euclidean distance between each data point and each centroid."""
|
| 14 |
+
return np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
|
| 15 |
+
|
| 16 |
+
def update_centroids(data, labels, K):
|
| 17 |
+
"""Update centroids as the mean of the points assigned to each cluster."""
|
| 18 |
+
new_centroids = np.zeros((K, data.shape[1]))
|
| 19 |
+
for k in range(K):
|
| 20 |
+
cluster_points = data[labels == k]
|
| 21 |
+
if len(cluster_points) > 0:
|
| 22 |
+
new_centroids[k] = np.mean(cluster_points, axis=0)
|
| 23 |
+
return new_centroids
|
| 24 |
+
|
| 25 |
+
def kmeans_from_scratch(image, K=4, max_iters=100, tol=1e-4):
|
| 26 |
+
"""Apply K-means clustering from scratch to segment the image."""
|
| 27 |
+
data = image.reshape((-1, 3)).astype(np.float32)
|
| 28 |
+
|
| 29 |
+
centroids = initialize_centroids(data, K)
|
| 30 |
+
|
| 31 |
+
for i in range(max_iters):
|
| 32 |
+
distances = compute_distances(data, centroids)
|
| 33 |
+
labels = np.argmin(distances, axis=1)
|
| 34 |
+
|
| 35 |
+
new_centroids = update_centroids(data, labels, K)
|
| 36 |
+
shift = np.linalg.norm(new_centroids - centroids)
|
| 37 |
+
|
| 38 |
+
if shift < tol:
|
| 39 |
+
break
|
| 40 |
+
centroids = new_centroids
|
| 41 |
+
|
| 42 |
+
segmented_data = centroids[labels].astype(np.uint8)
|
| 43 |
+
segmented_image = segmented_data.reshape(image.shape)
|
| 44 |
+
|
| 45 |
+
return segmented_image, labels.reshape(image.shape[:2]), centroids.astype(np.uint8)
|
| 46 |
+
|
| 47 |
+
def generate_kmeans_segmented_image(image_path, k=3):
|
| 48 |
+
"""Process image with K-means for Gradio app"""
|
| 49 |
+
image = Image.open(image_path)
|
| 50 |
+
image_np = np.array(image)
|
| 51 |
+
|
| 52 |
+
if len(image_np.shape) == 3:
|
| 53 |
+
image_rgb = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
|
| 54 |
+
image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
|
| 55 |
+
else:
|
| 56 |
+
image_rgb = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
|
| 57 |
+
|
| 58 |
+
seg_img, labels, centers = kmeans_from_scratch(image_rgb, K=k)
|
| 59 |
+
|
| 60 |
+
colors_image = np.zeros((50 * k, 100, 3), dtype=np.uint8)
|
| 61 |
+
for i, color in enumerate(centers):
|
| 62 |
+
colors_image[i*50:(i+1)*50, :] = color
|
| 63 |
+
|
| 64 |
+
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
|
| 65 |
+
|
| 66 |
+
axes[0].imshow(image_rgb)
|
| 67 |
+
axes[0].set_title("Original Image")
|
| 68 |
+
axes[0].axis('off')
|
| 69 |
+
|
| 70 |
+
axes[1].imshow(seg_img)
|
| 71 |
+
axes[1].set_title(f"K-Means (K={k})")
|
| 72 |
+
axes[1].axis('off')
|
| 73 |
+
|
| 74 |
+
axes[2].imshow(colors_image)
|
| 75 |
+
axes[2].set_title("Cluster Colors")
|
| 76 |
+
axes[2].axis('off')
|
| 77 |
+
|
| 78 |
+
plt.tight_layout()
|
| 79 |
+
|
| 80 |
+
buf = io.BytesIO()
|
| 81 |
+
fig.savefig(buf, format='png')
|
| 82 |
+
buf.seek(0)
|
| 83 |
+
comparison_image = Image.open(buf)
|
| 84 |
+
plt.close(fig)
|
| 85 |
+
|
| 86 |
+
return image, Image.fromarray(seg_img), comparison_image, f"K-Means clustering with K={k}"
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
image_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
|
| 90 |
+
original, segmented, comparison, text = generate_kmeans_segmented_image(image_path, k=3)
|
| 91 |
+
|
| 92 |
+
# Save output images instead of displaying them
|
| 93 |
+
segmented.save("kmeans_segmented.png")
|
| 94 |
+
comparison.save("kmeans_comparison.png")
|
| 95 |
+
print(text)
|
experiments/otsu_segmenter.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
def otsu_threshold(image):
|
| 8 |
+
hist, bin_edges = np.histogram(image.flatten(), bins=256, range=[0, 256])
|
| 9 |
+
hist = hist.astype(float)
|
| 10 |
+
total_pixels = image.size
|
| 11 |
+
pixel_probability = hist / total_pixels
|
| 12 |
+
|
| 13 |
+
max_variance = 0
|
| 14 |
+
optimal_threshold = 0
|
| 15 |
+
|
| 16 |
+
for threshold in range(1, 256):
|
| 17 |
+
weight_background = np.sum(pixel_probability[:threshold])
|
| 18 |
+
weight_foreground = np.sum(pixel_probability[threshold:])
|
| 19 |
+
|
| 20 |
+
if weight_background == 0 or weight_foreground == 0:
|
| 21 |
+
continue
|
| 22 |
+
|
| 23 |
+
mean_background = np.sum(np.arange(threshold) * pixel_probability[:threshold]) / weight_background
|
| 24 |
+
mean_foreground = np.sum(np.arange(threshold, 256) * pixel_probability[threshold:]) / weight_foreground
|
| 25 |
+
|
| 26 |
+
variance = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
|
| 27 |
+
|
| 28 |
+
if variance > max_variance:
|
| 29 |
+
max_variance = variance
|
| 30 |
+
optimal_threshold = threshold
|
| 31 |
+
|
| 32 |
+
segmented_image = np.zeros_like(image)
|
| 33 |
+
segmented_image[image >= optimal_threshold] = 255
|
| 34 |
+
|
| 35 |
+
return optimal_threshold, segmented_image
|
| 36 |
+
|
| 37 |
+
def generate_segmented_image(image_path):
|
| 38 |
+
# Convert PIL to OpenCV format
|
| 39 |
+
print(f"Image path: {image_path}")
|
| 40 |
+
image = Image.open(image_path)
|
| 41 |
+
image_np = np.array(image)
|
| 42 |
+
original_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
|
| 43 |
+
|
| 44 |
+
if len(original_image.shape) == 3:
|
| 45 |
+
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
|
| 46 |
+
else:
|
| 47 |
+
gray_image = original_image.copy()
|
| 48 |
+
|
| 49 |
+
blurred = cv2.GaussianBlur(gray_image, (5, 5), 0)
|
| 50 |
+
|
| 51 |
+
# Our implementation
|
| 52 |
+
our_threshold, our_segmented = otsu_threshold(blurred)
|
| 53 |
+
|
| 54 |
+
# OpenCV's implementation
|
| 55 |
+
opencv_threshold, opencv_segmented = cv2.threshold(
|
| 56 |
+
blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Create histogram figure
|
| 60 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 61 |
+
ax.hist(gray_image.ravel(), 256, [0, 256], color='gray')
|
| 62 |
+
ax.axvline(x=our_threshold, color='red', linestyle='--', label=f'Ours: {our_threshold}')
|
| 63 |
+
ax.axvline(x=opencv_threshold, color='green', linestyle='--', label=f'OpenCV: {opencv_threshold}')
|
| 64 |
+
ax.set_title("Histogram with Thresholds")
|
| 65 |
+
ax.legend()
|
| 66 |
+
|
| 67 |
+
# Convert Matplotlib figure to image
|
| 68 |
+
buf = io.BytesIO()
|
| 69 |
+
plt.savefig(buf, format='png')
|
| 70 |
+
buf.seek(0)
|
| 71 |
+
hist_image = Image.open(buf)
|
| 72 |
+
plt.close(fig) # Close the figure to free memory
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
return (
|
| 76 |
+
image,
|
| 77 |
+
Image.fromarray(our_segmented),
|
| 78 |
+
Image.fromarray(opencv_segmented),
|
| 79 |
+
hist_image,
|
| 80 |
+
f"Our Threshold: {our_threshold}\nOpenCV Threshold: {opencv_threshold}",
|
| 81 |
+
)
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
#example usage
|
| 84 |
+
# Ensure you have the image path set correctly
|
| 85 |
+
image_path = '/home/akshat/projects/CSL7360_Project/bird.jpeg'
|
| 86 |
+
image = cv2.imread('/home/akshat/projects/CSL7360_Project/bird.jpeg')
|
| 87 |
+
# Call the function
|
| 88 |
+
generate_segmented_image(image)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# # Optionally, save results to files
|
| 93 |
+
# cv2.imwrite("our_segmented.png", our_segmented)
|
| 94 |
+
# cv2.imwrite("opencv_segmented.png", opencv_segmented)
|
| 95 |
+
|
experiments/watershed_segmenter.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import heapq
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from collections import deque
|
| 6 |
+
|
| 7 |
+
# 1. Compute local minima as markers
|
| 8 |
+
def get_local_minima(gray):
|
| 9 |
+
kernel = np.ones((3, 3), np.uint8)
|
| 10 |
+
eroded = cv2.erode(gray, kernel)
|
| 11 |
+
minima = (gray == eroded)
|
| 12 |
+
return minima.astype(np.uint8)
|
| 13 |
+
|
| 14 |
+
# 2. Label each connected component (marker)
|
| 15 |
+
def label_markers(minima):
|
| 16 |
+
num_labels, markers = cv2.connectedComponents(minima)
|
| 17 |
+
return markers, num_labels
|
| 18 |
+
|
| 19 |
+
# 3. Watershed from scratch
|
| 20 |
+
def watershed_from_scratch(gray, markers):
|
| 21 |
+
h, w = gray.shape
|
| 22 |
+
# Constants
|
| 23 |
+
WATERSHED = -1
|
| 24 |
+
INIT = -2
|
| 25 |
+
|
| 26 |
+
# Initialize label and visited map
|
| 27 |
+
label_map = np.full((h, w), INIT, dtype=np.int32)
|
| 28 |
+
label_map[markers > 0] = markers[markers > 0]
|
| 29 |
+
|
| 30 |
+
# Priority queue for pixels: (intensity, y, x)
|
| 31 |
+
pq = []
|
| 32 |
+
|
| 33 |
+
# Populate queue with boundary of initial markers
|
| 34 |
+
for y in range(h):
|
| 35 |
+
for x in range(w):
|
| 36 |
+
if markers[y, x] > 0:
|
| 37 |
+
for dy in [-1, 0, 1]:
|
| 38 |
+
for dx in [-1, 0, 1]:
|
| 39 |
+
ny, nx = y + dy, x + dx
|
| 40 |
+
if 0 <= ny < h and 0 <= nx < w:
|
| 41 |
+
if markers[ny, nx] == 0 and label_map[ny, nx] == INIT:
|
| 42 |
+
heapq.heappush(pq, (gray[ny, nx], ny, nx))
|
| 43 |
+
label_map[ny, nx] = 0 # Mark as in queue
|
| 44 |
+
|
| 45 |
+
# Flooding
|
| 46 |
+
while pq:
|
| 47 |
+
intensity, y, x = heapq.heappop(pq)
|
| 48 |
+
|
| 49 |
+
neighbor_labels = set()
|
| 50 |
+
for dy in [-1, 0, 1]:
|
| 51 |
+
for dx in [-1, 0, 1]:
|
| 52 |
+
ny, nx = y + dy, x + dx
|
| 53 |
+
if 0 <= ny < h and 0 <= nx < w:
|
| 54 |
+
lbl = label_map[ny, nx]
|
| 55 |
+
if lbl > 0:
|
| 56 |
+
neighbor_labels.add(lbl)
|
| 57 |
+
|
| 58 |
+
if len(neighbor_labels) == 1:
|
| 59 |
+
label_map[y, x] = neighbor_labels.pop()
|
| 60 |
+
elif len(neighbor_labels) > 1:
|
| 61 |
+
label_map[y, x] = WATERSHED
|
| 62 |
+
|
| 63 |
+
# Add unvisited neighbors to the queue
|
| 64 |
+
for dy in [-1, 0, 1]:
|
| 65 |
+
for dx in [-1, 0, 1]:
|
| 66 |
+
ny, nx = y + dy, x + dx
|
| 67 |
+
if 0 <= ny < h and 0 <= nx < w:
|
| 68 |
+
if label_map[ny, nx] == INIT:
|
| 69 |
+
heapq.heappush(pq, (gray[ny, nx], ny, nx))
|
| 70 |
+
label_map[ny, nx] = 0 # Mark as in queue
|
| 71 |
+
|
| 72 |
+
return label_map
|
| 73 |
+
|
| 74 |
+
def generate_watershed(iamge_path):
|
| 75 |
+
# Load grayscale image
|
| 76 |
+
image = cv2.imread(iamge_path, cv2.IMREAD_GRAYSCALE)
|
| 77 |
+
image = cv2.GaussianBlur(image, (5, 5), 0)
|
| 78 |
+
minima = get_local_minima(image)
|
| 79 |
+
markers, num_labels = label_markers(minima)
|
| 80 |
+
result = watershed_from_scratch(image, markers)
|
| 81 |
+
|
| 82 |
+
# Visualization
|
| 83 |
+
output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
|
| 84 |
+
output[result == -1] = [255, 0, 0] # Watershed lines in red
|
| 85 |
+
output[result > 0] = [0, 255, 0] # Segments in green
|
| 86 |
+
output[markers > 0] = [0, 0, 255] # Original minima in blue
|
| 87 |
+
return image,output
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
# Run the process
|
| 90 |
+
# Load grayscale image
|
| 91 |
+
image = cv2.imread("/home/akshat/projects/CSL7360_Project/bird.jpeg", cv2.IMREAD_GRAYSCALE)
|
| 92 |
+
image = cv2.GaussianBlur(image, (5, 5), 0)
|
| 93 |
+
minima = get_local_minima(image)
|
| 94 |
+
markers, num_labels = label_markers(minima)
|
| 95 |
+
result = watershed_from_scratch(image, markers)
|
| 96 |
+
|
| 97 |
+
# Visualization
|
| 98 |
+
output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
|
| 99 |
+
output[result == -1] = [255, 0, 0] # Watershed lines in red
|
| 100 |
+
output[result > 0] = [0, 255, 0] # Segments in green
|
| 101 |
+
output[markers > 0] = [0, 0, 255] # Original minima in blue
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# Save the original grayscale and the output image
|
| 105 |
+
cv2.imwrite("original_grayscale.png", image)
|
| 106 |
+
cv2.imwrite("watershed_output.png", output)
|
| 107 |
+
|
| 108 |
+
print("Images saved as 'original_grayscale.png' and 'watershed_output.png'")
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
kmeans_comparison.png
ADDED
|
Git LFS Details
|
kmeans_segmented.png
ADDED
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch==2.5.1
|
| 2 |
+
torchvision==0.20.1
|
| 3 |
+
gradio==5.23.1
|
| 4 |
+
pillow==10.4.0
|
| 5 |
+
numpy==2.2.2
|
| 6 |
+
opencv-python==4.10.0
|
| 7 |
+
matplotlib==3.10.0
|
| 8 |
+
wandb==0.19.6
|
| 9 |
+
tqdm==4.67.1
|
segnet_efficientnet_voc.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5225a079173dc4b5b1f786e79a474d64c2d17a9aa8f35bbb0908cfbb0f2b9baa
|
| 3 |
+
size 29583954
|
watershed_output.png
ADDED
|
Git LFS Details
|