Spaces:

AJain1234
/

Image_Segmentation_

Build error

App Files Files Community

AJain1234 commited on Apr 12, 2025

Commit

0f9608b

verified ·

1 Parent(s): 7ac57e1

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

.gitattributes +2 -0
.github/workflows/update_space.yml +28 -0
.gitignore +176 -0
LICENSE +21 -0
README.md +133 -8
app.py +300 -0
bird.jpeg +0 -0
enhaned_kmeans_segmented.png +0 -0
experiments/SegNet/efficient_b0_backbone/architecture.py +178 -0
experiments/SegNet/efficient_b0_backbone/train.py +81 -0
experiments/SegNet/vgg_backbone/SegNet_with_VGG16_backbone.ipynb +0 -0
experiments/SegNet/vgg_backbone/model.py +48 -0
experiments/enhanced_kmeans_segmenter.py +100 -0
experiments/ensemble_method.py +148 -0
experiments/felzenszwalb_segmentation/__init__.py +1 -0
experiments/felzenszwalb_segmentation/disjoint_set.py +39 -0
experiments/felzenszwalb_segmentation/segmentation.py +83 -0
experiments/felzenszwalb_segmentation/utils/__init__.py +2 -0
experiments/felzenszwalb_segmentation/utils/filter_utils.py +38 -0
experiments/felzenszwalb_segmentation/utils/utils.py +25 -0
experiments/kmeans_segmenter.py +95 -0
experiments/otsu_segmenter.py +95 -0
experiments/watershed_segmenter.py +208 -0
kmeans_comparison.png +3 -0
kmeans_segmented.png +0 -0
requirements.txt +11 -0
saved_models/segnet_efficientnet_camvid.pth +3 -0
saved_models/segnet_vgg.pth +3 -0
segnet_efficientnet_voc.pth +3 -0
watershed_output.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+kmeans_comparison.png filter=lfs diff=lfs merge=lfs -text
+watershed_output.png filter=lfs diff=lfs merge=lfs -text

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Run Python script
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install Gradio
+      run: python -m pip install gradio
+    - name: Log in to Hugging Face
+      run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
+    - name: Deploy to Spaces
+      run: gradio deploy

.gitignore ADDED Viewed

	@@ -0,0 +1,176 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+./saved_models/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Akshat Jain
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,137 @@
 ---
-title: 'Image Segmentation '
-emoji: 📈
-colorFrom: red
-colorTo: pink
-sdk: gradio
-sdk_version: 5.25.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Image_Segmentation_
 app_file: app.py
+sdk: gradio
+sdk_version: 5.23.1
 ---
+# Image Segmentation Toolkit
+## Overview
+This project implements a comprehensive image segmentation toolkit that combines classical computer vision techniques with deep learning-based approaches. The application provides an interactive interface to compare different segmentation algorithms on user-provided images.
+## Features
+- **Classical Segmentation Methods**:
+  - Otsu's Thresholding: Optimal global thresholding for binary segmentation
+  - K-means Clustering: Color-based segmentation with adjustable clusters
+  - SLIC (Simple Linear Iterative Clustering): Superpixel segmentation
+  - Watershed Algorithm: Gradient-based segmentation for separating touching objects
+  - Felzenszwalb Algorithm: Graph-based segmentation with adaptive thresholding
+- **Deep Learning Models**:
+  - SegNet with EfficientNet B0 backbone: Pretrained semantic segmentation model
+  - SegNet with VGG backbone: Alternative architecture for comparison
+- **Ensemble Methods**:
+  - Otsu + SegNet: Combining boundary information from Otsu with semantic labels from SegNet
+  - Custom ensemble segmentation with adjustable parameters
+## Installation
+### Prerequisites
+- Python 3.8+
+- PyTorch 1.10+
+- CUDA-compatible GPU (recommended)
+### Setup
+1. Clone the repository:
+```bash
+git clone https://github.com/yourusername/CSL7360_Project.git
+cd CSL7360_Project
+```
+2. Create and activate a virtual environment (optional but recommended):
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+3. Install required packages:
+```bash
+pip install -r requirements.txt
+```
+4. Download pretrained models:
+```bash
+python download_models.py
+```
+   The application will also automatically download models when first launched.
+## Usage
+### Running the Application
+Start the Gradio web interface:
+```bash
+python app.py
+```
+The interface will be available at http://127.0.0.1:7860 in your web browser.
+### Using the Interface
+1. Select a segmentation method from the tabs at the top
+2. Upload an image using the file picker
+3. Adjust algorithm parameters if available
+4. Click the "Segment this image" button
+5. View the results in the display area
+### Algorithm Parameters
+#### Otsu's Method
+- No parameters, fully automatic threshold selection
+#### K-means Segmentation
+- **Number of Clusters (K)**: Controls how many color groups to segment into
+#### SLIC Segmentation
+- **Number of superpixels**: Controls the granularity of segmentation
+- **Compactness factor**: Controls how much superpixels adhere to boundaries
+- **Number of iterations**: Controls refinement of superpixel boundaries
+#### Felzenszwalb Algorithm
+- **Sigma**: Gaussian pre-processing smoothing parameter
+- **K value**: Controls segment size preference
+- **Min Size Factor**: Minimum component size
+#### Ensemble Segmentation
+- **Boundary Refinement Weight**: Controls influence of classical methods on deep learning boundaries
+## Project Structure
+```
+CSL7360_Project/
+├── app.py                      # Main application with  pretrained models
+├── experiments/                # Implementation of segmentation algorithms
+│   ├── ensemble_method.py      # Ensemble segmentation implementation
+│   ├── felzenszwalb_segmentation/ # Felzenszwalb algorithm implementation
+│   ├── kmeans_segmenter.py     # K-means segmentation implementation
+│   ├── enhanced_kmeans_segmenter.py # SLIC implementation
+│   ├── otsu_segmenter.py       # Otsu thresholding implementation
+│   ├── watershed_segmenter.py  # Watershed algorithm implementation
+│   └── SegNet/                 # Deep learning models
+│       ├── efficient_b0_backbone/ # EfficientNet backbone for SegNet
+│       └── vgg_backbone/       # VGG backbone for SegNet
+├── saved_models/              # Directory for pretrained weights
+└── requirements.txt           # Package dependencies
+```
+## Examples
+The application works well on a variety of images:
+- Natural scenes
+- Urban environments
+- Medical images
+- Aerial/satellite imagery
+- Objects with clear boundaries
+## Technologies Used
+- **PyTorch**: Deep learning framework
+- **OpenCV**: Classical computer vision algorithms
+- **NumPy**: Numerical computations
+- **PIL/Pillow**: Image loading and manipulation
+- **Gradio**: Interactive web interface
+- **Matplotlib**: Visualization of results
+## Credits
+- Built as part of CSL7360 course project
+- Uses pretrained models based on Pascal VOC and CamVid datasets
+- Implements algorithms from classical computer vision literature
+## License
+This project is available under the MIT License.

app.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import gradio as gr
+import torch
+from torchvision import transforms
+from experiments.otsu_segmenter import generate_segmented_image
+from experiments.kmeans_segmenter import generate_kmeans_segmented_image
+from experiments.enhanced_kmeans_segmenter import slic_kmeans
+from experiments.watershed_segmenter import generate_watershed
+from experiments.felzenszwalb_segmentation import segment
+from experiments.SegNet.efficient_b0_backbone.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE
+from experiments.SegNet.vgg_backbone.model import SegNet
+# from experiments.ensemble_method import generate_ensemble_segmentation
+import numpy as np
+from PIL import Image
+from matplotlib import cm
+import gdown
+import os
+# Check if the saved_models directory exists, if not create it
+if not os.path.exists("saved_models"):
+    os.makedirs("saved_models")
+# Check if the model file already exists before downloading
+if not os.path.exists("saved_models/segnet_vgg.pth"):
+    print("Downloading SegNet VGG weights...")
+    segnet_vgg_weights = "https://drive.google.com/file/d/1EFXKQ_3bDW9FbZCqOLdrE0DOI0V4W82o/view?usp=sharing"
+    gdown.download(segnet_vgg_weights, "saved_models/segnet_vgg.pth", fuzzy=True)
+    print("Download complete!")
+else:
+    print("SegNet VGG weights already exist, skipping download.")
+def generate_segnet_vgg(image_path):
+    model = SegNet(32).to(DEVICE)
+    model.load_state_dict(torch.load("saved_models/segnet_vgg.pth", map_location=DEVICE))
+    # Set model to evaluation mode
+    model.eval()
+    # Load and preprocess the image
+    image = Image.open(image_path).convert('RGB')
+    original_image = image.copy()
+    # Apply same preprocessing as during training
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),  # Adjust size to match your model's expected input
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+    # Get prediction
+    with torch.no_grad():
+        output = model(input_tensor)
+        pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
+    # Convert prediction to visualization
+    # Option 1: Use a colormap for visualization
+    colormap = cm.get_cmap('nipy_spectral')
+    colored_mask = colormap(pred_mask / (pred_mask.max() or 1))  # Normalize, handle case where max is 0
+    colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8)  # Drop alpha and convert to uint8
+    segmented_image = Image.fromarray(colored_mask)
+    # Resize segmented image to match original image size
+    segmented_image = segmented_image.resize(original_image.size, Image.NEAREST)
+    return original_image, segmented_image
+def generate_kmeans(image_path,k):
+    kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
+    return kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text
+def generate_slic(image_path,k,m,max_iter):
+    image,seg_img, labels, centers = slic_kmeans(image_path, K=k, m=m, max_iter=max_iter)
+    return image,seg_img
+def generate_felzenszwalb(image_path, sigma, k, min_size_factor):
+    image = Image.open(image_path).convert("RGB")
+    image_np = np.array(image)
+    segments_fz = segment(image_np, sigma=sigma, k=k, min_size=min_size_factor)
+    segments_fz = segments_fz.astype(np.uint8)
+    return image, segments_fz
+def SegNet_efficient_b0(image_path):
+    model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+    model.load_state_dict(torch.load("saved_models/segnet_efficientnet_camvid.pth", map_location=DEVICE))
+    model.eval()
+    transform = transforms.Compose([
+    transforms.Resize((360, 480)),  # Or larger if needed
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+    image = Image.open(image_path).convert("RGB")
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+    with torch.no_grad():
+        output = model(input_tensor)
+        pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
+    # Convert original image for Gradio display
+    original_image_resized = image
+    # Convert predicted mask to a color image using a colormap
+    colormap = cm.get_cmap('nipy_spectral')
+    colored_mask = colormap(pred_mask / pred_mask.max())  # Normalize
+    colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8)  # Drop alpha and convert to uint8
+    mask_pil = Image.fromarray(colored_mask)
+    return original_image_resized, mask_pil
+def ensemble_segmentation(image_path):
+    """
+    Ensemble segmentation combining SegNet and Otsu,
+    assuming Otsu produces a mask with the foreground as black (value 0)
+    and background as white (value 255).
+    In this ensemble, we force the SegNet prediction to background (class 0)
+    where Otsu indicates background (after inversion, i.e., where otsu_bin==0).
+    Parameters:
+        image_path (str): Path to the input image.
+    Returns:
+        original_image: The original resized image used for segmentation.
+        segnet_mask_pil: SegNet multi-class segmentation output (PIL image).
+        otsu_mask_pil: The original Otsu binary segmentation mask (PIL image).
+        ensemble_mask_pil: Final ensemble segmentation mask (PIL image).
+    """
+    # Run SegNet segmentation (model outputs a multi-class mask).
+    segnet_orig, segnet_mask_pil = SegNet_efficient_b0(image_path)
+    # Convert SegNet output to a NumPy array (assumed grayscale labeling, e.g., background=0).
+    segnet_mask_np = np.array(segnet_mask_pil.convert("L"))
+    # Run Otsu segmentation. (generate_segmented_image returns several outputs.)
+    _, otsu_segmented_pil, _, _, _ = generate_segmented_image(image_path)
+    # Resize Otsu mask to match SegNet output shape, e.g., (480, 360) if SegNet works in that resolution.
+    resized_shape = (segnet_mask_np.shape[1], segnet_mask_np.shape[0])
+    otsu_mask_resized = otsu_segmented_pil.resize(resized_shape, Image.NEAREST)
+    otsu_mask_np = np.array(otsu_mask_resized)
+    # Invert Otsu's binary mask:
+    # Assuming that in otsu_mask_np, foreground is black (0) and background is white (255),
+    # we build a binary mask where "1" represents the object's area.
+    otsu_bin = (otsu_mask_np == 0).astype(np.uint8)  # Now, foreground is 1 and background is 0.
+    # Create the ensemble segmentation:
+    # Where Otsu indicates foreground (otsu_bin==1), keep SegNet's prediction;
+    # where Otsu is background (otsu_bin==0), force it to background class (0).
+    ensemble_seg = np.where(otsu_bin == 1, segnet_mask_np, 0)
+    # Convert back to a PIL image.
+    ensemble_mask_pil = Image.fromarray(ensemble_seg.astype(np.uint8))
+    return segnet_orig, segnet_mask_pil, otsu_segmented_pil, ensemble_mask_pil
+with gr.Blocks() as demo:
+    gr.Markdown("# Image Segmentation using Classical CV")
+    with gr.Tabs() as tabs:
+        with gr.TabItem("Otsu's Method"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    file_input = gr.File(label="Upload Image File")
+                    display_btn = gr.Button("Segment this image")
+                    threshold_text = gr.Textbox(label="Threshold Comparison", value="", interactive=False)
+                with gr.Column(scale=2):
+                    image_output = gr.Image(label="Original Image")
+                    histogram_output = gr.Image(label="Histogram")
+                    segmented_image_output = gr.Image(label="Our Segmented Image")
+                    opencv_segmented_image_output = gr.Image(label="OpenCV Segmented Image")
+            display_btn.click(
+                fn=generate_segmented_image,
+                inputs=file_input,
+                outputs=[image_output, segmented_image_output, opencv_segmented_image_output, histogram_output, threshold_text]
+            )
+        with gr.TabItem("K-means Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    kmeans_file_input = gr.File(label="Upload Image File")
+                    kmeans_k_value = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (K)")
+                    kmeans_display_btn = gr.Button("Segment this image")
+                    kmeans_threshold_text = gr.Textbox(label="K-means Info", value="", interactive=False)
+                with gr.Column(scale=2):
+                    kmeans_image_output = gr.Image(label="Original Image")
+                    kmeans_segmented_image_output = gr.Image(label="K-means Segmented Image")
+            kmeans_display_btn.click(
+                fn=generate_kmeans,
+                inputs=[kmeans_file_input, kmeans_k_value],
+                outputs=[kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text]
+        )
+        with gr.TabItem("SLIC Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    slic_file_input = gr.File(label="Upload Image File")
+                    slic_k_value = gr.Slider(minimum=2, maximum=200, value=3, step=1, label="Number of superpixels")
+                    slic_m_value = gr.Slider(minimum=1, maximum=40, value=3, step=1, label="Compactness factor")
+                    slic_max_iter_value = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of iterations")
+                    slic_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    slic_image_output = gr.Image(label="Original Image",container=True)
+                    slic_segmented_image_output = gr.Image(label="SLIC Segmented Image",container=True)
+            slic_display_btn.click(
+                fn=generate_slic,
+                inputs=[slic_file_input, slic_k_value,slic_m_value,slic_max_iter_value],
+                outputs=[slic_image_output,slic_segmented_image_output]
+        )
+        with gr.TabItem("Watershed"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    watershed_file = gr.File(label="Upload Image")
+                    watershed_btn = gr.Button("Run Watershed")
+                with gr.Column(scale=2):
+                    original_img = gr.Image(label="1. Original")
+                    blurred_img = gr.Image(label="2. Blurred")
+                    threshold_img = gr.Image(label="3. Threshold")
+            watershed_btn.click(
+                fn=generate_watershed,
+                inputs=[watershed_file],
+                outputs=[original_img, blurred_img, threshold_img]
+            )
+        with gr.TabItem("Felzenszwalb Algorithm Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    felzenszwalb_file_input = gr.File(label="Upload Image File")
+                    sigma_value = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="Sigma")
+                    K_value = gr.Slider(minimum=2, maximum=1000, value=2, step=1, label="K value")
+                    min_size_value = gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Min Size Factor")
+                    felzenszwalb_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    felzenszwalb_image_output = gr.Image(label="Original Image",container=True)
+                    felzenszwalb_segmented_image_output = gr.Image(label="felzenszwalb Segmented Image",container=True)
+            felzenszwalb_display_btn.click(
+                fn=generate_felzenszwalb,
+                inputs=[felzenszwalb_file_input,sigma_value,K_value,min_size_value],
+                outputs=[felzenszwalb_image_output,felzenszwalb_segmented_image_output]
+        )
+        with gr.TabItem("SegNet EfficientNet B0 Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    segnet_file_input = gr.File(label="Upload Image File")
+                    segnet_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    segnet_image_output = gr.Image(label="Original Image")
+                    segnet_segmented_image_output = gr.Image(label="SegNet Segmented Image")
+            segnet_display_btn.click(
+                fn=SegNet_efficient_b0,
+                inputs=[segnet_file_input],
+                outputs=[segnet_image_output,segnet_segmented_image_output]
+        )
+        with gr.TabItem("SegNet VGG Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    segnet_file_input = gr.File(label="Upload Image File")
+                    segnet_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    segnet_image_output = gr.Image(label="Original Image")
+                    segnet_segmented_image_output = gr.Image(label="SegNet VGG Segmented Image")
+            segnet_display_btn.click(
+                fn=generate_segnet_vgg,
+                inputs=[segnet_file_input],
+                outputs=[segnet_image_output,segnet_segmented_image_output]
+        )
+        # In app.py
+        with gr.TabItem("Ensemble Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    ensemble_file_input = gr.File(label="Upload Image File")
+                    ensemble_display_btn = gr.Button("Segment with Ensemble Method")
+                with gr.Column(scale=2):
+                    ensemble_image_output = gr.Image(label="Original Image")
+                    ensemble_mask = gr.Image(label="Ensemble Segmented Image")
+                    ensemble_segnet_segmented_output = gr.Image(label="SegNet Efficient B0 Segmented Image")
+                    ensemble_otsu_segmented_output = gr.Image(label="Otsu Segmented Image")
+            ensemble_display_btn.click(
+                fn=ensemble_segmentation,
+                inputs=[ensemble_file_input],
+                outputs=[ensemble_image_output, ensemble_segnet_segmented_output, ensemble_otsu_segmented_output, ensemble_mask]
+            )
+if __name__ == "__main__":
+    demo.launch()

bird.jpeg ADDED Viewed

enhaned_kmeans_segmented.png ADDED Viewed

experiments/SegNet/efficient_b0_backbone/architecture.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from torch.utils.data import Dataset
+import glob
+from PIL import Image
+import numpy as np
+import wandb
+import pandas as pd
+import os
+import matplotlib.pyplot as plt
+import opendatasets as opd
+import zipfile
+torch.manual_seed(42)
+np.random.seed(42)
+# wandb.login(key="your_wandb_api_key_here")
+EPOCHS = 25
+BATCH_SIZE = 8
+LR = 1e-3
+NUM_CLASSES = 32
+DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# wandb.init(project="segnet-efficientnet-camvid", config={
+#     "epochs": EPOCHS,
+#     "batch_size": BATCH_SIZE,
+#     "learning_rate": LR,
+#     "architecture": "SegNet-EfficientNet",
+#     "dataset": "CamVid"
+# })
+class SegNetEfficientNet(nn.Module):
+    def __init__(self, num_classes=32):
+        super(SegNetEfficientNet, self).__init__()
+        base_model = models.efficientnet_b0(pretrained=True)
+        features = list(base_model.features.children())
+        # EfficientNet-B0 backbone (output channels gradually increase to 1280)
+        self.encoder = nn.Sequential(*features)  # Output: [B, 1280, H/32, W/32]
+        # Decoder blocks (mirroring encoder with ConvTranspose2d)
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+        )
+        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)
+    def forward(self, x):
+        x = self.encoder(x)  # Downsampled features from EfficientNet
+        x = self.decoder(x)  # Upsampled
+        x = self.classifier(x)
+        x = F.interpolate(x, size=(360, 480), mode='bilinear', align_corners=False)
+        return x
+class CamVidDataset(Dataset):
+    """
+    CamVid dataset loader with RGB mask to class index conversion.
+    Expects directory structure:
+        camvid/
+            train/
+            train_labels/
+            val/
+            val_labels/
+            test/
+            test_labels/
+    """
+    def __init__(self, root, split='train', transform=None, image_size=(360, 480), target_transform=None, class_dict_path='camvid/CamVid/class_dict.csv'):
+        self.root = root
+        self.split = split
+        self.transform = transform
+        self.target_transform = target_transform
+        self.image_dir = os.path.join(root, split)
+        self.label_dir = os.path.join(root, f"{split}_labels")
+        self.image_paths = sorted(glob.glob(os.path.join(self.image_dir, '*.png')))
+        self.label_paths = sorted(glob.glob(os.path.join(self.label_dir, '*.png')))
+        self.label_resize = transforms.Resize(image_size, interpolation=Image.NEAREST)
+        self.image_resize = transforms.Resize(image_size, interpolation=Image.BILINEAR)
+        assert len(self.image_paths) == len(self.label_paths), "Mismatch between images and labels."
+        # Load class_dict.csv and build color-to-class mapping
+        df = pd.read_csv(class_dict_path)
+        self.color_to_class = {
+            (row['r'], row['g'], row['b']): idx for idx, row in df.iterrows()
+        }
+    def __len__(self):
+        return len(self.image_paths)
+    def rgb_to_class(self, mask):
+        """Convert an RGB mask (PIL.Image) to a 2D class index mask."""
+        mask_np = np.array(mask)
+        h, w, _ = mask_np.shape
+        class_mask = np.zeros((h, w), dtype=np.uint8)
+        for rgb, class_idx in self.color_to_class.items():
+            matches = (mask_np == rgb).all(axis=2)
+            class_mask[matches] = class_idx
+        return class_mask
+    def __getitem__(self, idx):
+        image = Image.open(self.image_paths[idx]).convert('RGB')
+        label = Image.open(self.label_paths[idx]).convert('RGB')
+        # Resize both to 360x480
+        image = self.image_resize(image)
+        label = self.label_resize(label)
+        if self.transform:
+            image = self.transform(image)
+        label = self.rgb_to_class(label)
+        label = torch.from_numpy(label).long()
+        return image, label
+if __name__ == "__main__":
+    dataset_url = "https://www.kaggle.com/datasets/carlolepelaars/camvid"
+    opd.download(dataset_url)
+    # Set dataset folder (adjust path if needed)
+    dataset_folder = "camvid"
+    print("Dataset directory contents:")
+    print(os.listdir(dataset_folder))
+    input_transform = transforms.Compose([
+    transforms.Resize((360, 480)),  # Or larger if needed
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+    def label_transform(label):
+        # Resize using nearest neighbor so that labels are not interpolated
+        label = label.resize((480, 360), Image.NEAREST)
+        label = np.array(label, dtype=np.int64)
+        return torch.from_numpy(label)
+    num_classes = 32
+    data_root = 'camvid/CamVid/'  # make sure this matches your structure
+    # Load datasets and dataloaders (assuming CamVidDataset is already defined)
+    train_dataset = CamVidDataset(root=data_root, split='train',
+                                transform=input_transform, target_transform=label_transform)
+    val_dataset = CamVidDataset(root=data_root, split='val',
+                                transform=input_transform, target_transform=label_transform)
+    test_dataset = CamVidDataset(root=data_root, split='test',
+                                transform=input_transform, target_transform=label_transform)
+    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)
+    val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4)
+    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=4)

experiments/SegNet/efficient_b0_backbone/train.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from PIL import Image
+import numpy as np
+import wandb
+import os
+import matplotlib.pyplot as plt
+from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
+from tqdm import tqdm
+model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+optimizer = torch.optim.Adam(model.parameters(), lr=LR)
+criterion = nn.CrossEntropyLoss(ignore_index=255)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def pixel_accuracy(preds, labels):
+    _, preds = torch.max(preds, 1)
+    correct = (preds == labels).float()
+    acc = correct.sum() / correct.numel()
+    return acc
+# def mean_iou(preds, labels, num_classes=NUM_CLASSES):
+#     _, preds = torch.max(preds, 1)
+#     ious = []
+#     for cls in range(num_classes):
+#         intersection = ((preds == cls) & (labels == cls)).float().sum()
+#         union = ((preds == cls) | (labels == cls)).float().sum()
+#         if union > 0:
+#             ious.append(intersection / union)
+#     return sum(ious) / len(ious) if ious else 0
+for epoch in tqdm(range(EPOCHS)):
+    model.train()
+    train_loss, train_acc = 0.0, 0.0
+    for images, masks in train_loader:
+        images, masks = images.to(DEVICE), masks.to(DEVICE)
+        optimizer.zero_grad()
+        outputs = model(images)
+        loss = criterion(outputs, masks)
+        loss.backward()
+        optimizer.step()
+        train_loss += loss.item()
+        train_acc += pixel_accuracy(outputs, masks).item()
+    train_loss /= len(train_loader)
+    train_acc /= len(train_loader)
+    # Validation
+    model.eval()
+    val_loss, val_acc = 0.0, 0.0
+    with torch.no_grad():
+        for images, masks in val_loader:
+            images, masks = images.to(DEVICE), masks.to(DEVICE)
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            val_loss += loss.item()
+            val_acc += pixel_accuracy(outputs, masks).item()
+    val_loss /= len(val_loader)
+    val_acc /= len(val_loader)
+    # wandb.log({
+    #     "epoch": epoch + 1,
+    #     "train_loss": train_loss,
+    #     "train_accuracy": train_acc,
+    #     "val_loss": val_loss,
+    #     "val_accuracy": val_acc
+    # })
+    print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
+torch.save(model.state_dict(), "segnet_efficientnet_camvid.pth")
+# wandb.finish()

experiments/SegNet/vgg_backbone/SegNet_with_VGG16_backbone.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/SegNet/vgg_backbone/model.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+class SegNet(nn.Module):
+    def __init__(self, num_classes=32):
+        super(SegNet, self).__init__()
+        vgg16 = models.vgg16_bn(pretrained=True)
+        self.pool = nn.MaxPool2d(2, 2, return_indices=True)
+        self.unpool = nn.MaxUnpool2d(2, 2)
+        self.enc1 = nn.Sequential(*vgg16.features[:6])
+        self.enc2 = nn.Sequential(*vgg16.features[7:13])
+        self.enc3 = nn.Sequential(*vgg16.features[14:23])
+        self.enc4 = nn.Sequential(*vgg16.features[24:33])
+        self.dec4 = self.decoder_block(512, 256)
+        self.dec3 = self.decoder_block(256, 128)
+        self.dec2 = self.decoder_block(128, 64)
+        self.dec1 = self.decoder_block(64, 64)
+        self.classifier = nn.Conv2d(64, num_classes, kernel_size=1)
+    def decoder_block(self, in_channels, out_channels):
+        return nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, 3, padding=1),
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x1 = self.enc1(x)
+        x1p, ind1 = self.pool(x1)
+        x2 = self.enc2(x1p)
+        x2p, ind2 = self.pool(x2)
+        x3 = self.enc3(x2p)
+        x3p, ind3 = self.pool(x3)
+        x4 = self.enc4(x3p)
+        x4p, ind4 = self.pool(x4)
+        d4 = self.unpool(x4p, ind4, output_size=x4.size())
+        d4 = self.dec4(d4)
+        d3 = self.unpool(d4, ind3, output_size=x3.size())
+        d3 = self.dec3(d3)
+        d2 = self.unpool(d3, ind2, output_size=x2.size())
+        d2 = self.dec2(d2)
+        d1 = self.unpool(d2, ind1, output_size=x1.size())
+        d1 = self.dec1(d1)
+        return self.classifier(d1)

experiments/enhanced_kmeans_segmenter.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from PIL import Image
+def slic_kmeans(image_path, K=100, m=10, max_iter=10):
+    """
+    Perform superpixel segmentation using enhanced K-means with LAB+XY.
+    Args:
+        image (np.ndarray): RGB input image.
+        K (int): Number of superpixels.
+        m (float): Compactness factor.
+        max_iter (int): Number of iterations.
+    Returns:
+        segmented_img: The segmented image with cluster colors.
+        labels: Cluster label for each pixel.
+    """
+    jpg_image = Image.open(image_path)
+    image = np.array(jpg_image)
+    h, w = image.shape[:2]
+    S = int(np.sqrt(h * w / K))  # grid interval
+    # Convert to LAB color space
+    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB).astype(np.float32)
+    # Create 5D feature vector [L, a, b, x, y]
+    X, Y = np.meshgrid(np.arange(w), np.arange(h))
+    features = np.dstack((lab, X, Y)).reshape((-1, 5))
+    # Initialize cluster centers on grid
+    centers = []
+    for y in range(S // 2, h, S):
+        for x in range(S // 2, w, S):
+            center = features[y * w + x]
+            centers.append(center)
+    centers = np.array(centers)
+    labels = np.full((h * w,), -1, dtype=np.int32)
+    distances = np.full((h * w,), np.inf)
+    for iteration in tqdm(range(max_iter)):
+        for idx, center in enumerate(centers):
+            l, a, b, cx, cy = center
+            x_start, x_end = max(0, int(cx - S)), min(w, int(cx + S))
+            y_start, y_end = max(0, int(cy - S)), min(h, int(cy + S))
+            for y in range(y_start, y_end):
+                for x in range(x_start, x_end):
+                    i = y * w + x
+                    fp = features[i]
+                    dc = np.linalg.norm(fp[:3] - center[:3])  # LAB distance
+                    ds = np.linalg.norm(fp[3:] - center[3:])  # XY distance
+                    D = np.sqrt(dc**2 + (ds / S)**2 * m**2)
+                    if D < distances[i]:
+                        distances[i] = D
+                        labels[i] = idx
+        # Update cluster centers
+        new_centers = np.zeros_like(centers)
+        count = np.zeros(len(centers))
+        for i in range(h * w):
+            lbl = labels[i]
+            new_centers[lbl] += features[i]
+            count[lbl] += 1
+        for i in range(len(centers)):
+            if count[i] > 0:
+                new_centers[i] /= count[i]
+        centers = new_centers
+    # Recolor image based on cluster centers
+    segmented_img = np.zeros((h, w, 3), dtype=np.uint8)
+    for i in range(h * w):
+        lbl = labels[i]
+        lab_val = centers[lbl][:3]
+        lab_pixel = np.uint8([[lab_val]])
+        rgb_pixel = cv2.cvtColor(lab_pixel, cv2.COLOR_LAB2RGB)[0][0]
+        segmented_img[i // w, i % w] = rgb_pixel
+    return jpg_image, Image.fromarray(segmented_img), labels.reshape((h, w)), centers
+# img_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
+# image = cv2.imread(img_path)
+# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+# _,seg_img, labels, centers = slic_kmeans(image, K=2, m=20)
+# seg_img.save("enhaned_kmeans_segmented.png")
+# plt.figure(figsize=(10, 5))
+# plt.subplot(1, 2, 1)
+# plt.imshow(image)
+# plt.title("Original Image")
+# plt.axis("off")
+# plt.subplot(1, 2, 2)
+# plt.imshow(seg_img)
+# plt.title("SLIC-like K-Means Segmentation")
+# plt.axis("off")
+# plt.tight_layout()
+# plt.show()

experiments/ensemble_method.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import torch
+import numpy as np
+from PIL import Image
+import cv2
+from torchvision import transforms
+from experiments.otsu_segmenter import otsu_threshold
+from experiments.SegNet.efficient_b0_backbone.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE
+def ensemble_segmentation(image_path, model_path="segnet_efficientnet_voc.pth", boundary_weight=0.3):
+    """
+    Ensemble segmentation combining Otsu thresholding and SegNet
+    Args:
+        image_path: Path to input image
+        model_path: Path to SegNet model weights
+        boundary_weight: Weight for boundary refinement (0-1)
+    Returns:
+        original_image: Original input image (PIL)
+        ensemble_result: Ensemble segmentation result (PIL)
+        method_comparison: Visualization of all methods side by side (PIL)
+    """
+    # 1. Load the image
+    image = Image.open(image_path).convert('RGB')
+    original = image.copy()
+    image_np = np.array(image)
+    # 2. Run Otsu thresholding for boundary detection
+    # Convert to grayscale and apply Gaussian blur
+    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    otsu_threshold_value, otsu_mask = otsu_threshold(blurred)
+    # 3. Run SegNet for semantic segmentation
+    model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
+    model.eval()
+    transform = transforms.Compose([
+    transforms.Resize((360, 480)),  # Or larger if needed
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+    with torch.no_grad():
+        output = model(input_tensor)
+        segnet_pred = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
+    # 4. Create edge map from Otsu result
+    edges = cv2.Canny(otsu_mask, 50, 150)
+    # Resize to match SegNet output size
+    edges_resized = cv2.resize(edges, (segnet_pred.shape[1], segnet_pred.shape[0]),
+                               interpolation=cv2.INTER_NEAREST)
+    # 5. Ensemble: Use Otsu edges to refine SegNet boundaries
+    # Create a distance transform from the edges
+    dist_transform = cv2.distanceTransform(255 - edges_resized, cv2.DIST_L2, 5)
+    dist_transform = dist_transform / dist_transform.max()  # Normalize to 0-1
+    # Areas close to edges get more influence from Otsu
+    edge_weight_map = np.exp(-dist_transform * 5) * boundary_weight
+    # Create binary mask from SegNet (foreground = any class other than background)
+    segnet_binary = (segnet_pred > 0).astype(np.uint8) * 255
+    # Resize Otsu mask to match SegNet output
+    otsu_resized = cv2.resize(otsu_mask, (segnet_pred.shape[1], segnet_pred.shape[0]),
+                              interpolation=cv2.INTER_NEAREST)
+    # Combine: Use SegNet classes but refine boundaries with Otsu
+    # For boundary regions, adjust the segmentation based on Otsu
+    refined_binary = segnet_binary.copy()
+    boundary_region = edge_weight_map > 0.1
+    refined_binary[boundary_region] = (
+        (1 - edge_weight_map[boundary_region]) * segnet_binary[boundary_region] +
+        edge_weight_map[boundary_region] * otsu_resized[boundary_region]
+    ).astype(np.uint8)
+    # Apply the refined binary mask to the original SegNet prediction
+    ensemble_result = segnet_pred.copy()
+    # Where the refined binary is 0, set to background class (0)
+    ensemble_result[refined_binary < 128] = 0
+    # 6. Visualize results
+    from matplotlib import cm
+    import matplotlib.pyplot as plt
+    import io
+    # Convert semantic maps to color visualizations
+    colormap = cm.get_cmap('nipy_spectral')
+    segnet_colored = colormap(segnet_pred / (NUM_CLASSES - 1))
+    segnet_colored = (segnet_colored[:, :, :3] * 255).astype(np.uint8)
+    ensemble_colored = colormap(ensemble_result / (NUM_CLASSES - 1))
+    ensemble_colored = (ensemble_colored[:, :, :3] * 255).astype(np.uint8)
+    # Create side-by-side comparison
+    fig, axes = plt.subplots(1, 4, figsize=(16, 4))
+    # Resize original image to match the segmentation size
+    original_resized = original.resize((segnet_pred.shape[1], segnet_pred.shape[0]))
+    axes[0].imshow(original_resized)
+    axes[0].set_title("Original Image")
+    axes[0].axis('off')
+    axes[1].imshow(otsu_mask, cmap='gray')
+    axes[1].set_title(f"Otsu (t={otsu_threshold_value})")
+    axes[1].axis('off')
+    axes[2].imshow(segnet_colored)
+    axes[2].set_title("SegNet Prediction")
+    axes[2].axis('off')
+    axes[3].imshow(ensemble_colored)
+    axes[3].set_title("Ensemble Result")
+    axes[3].axis('off')
+    plt.tight_layout()
+    # Convert the plot to an image
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    comparison_image = Image.open(buf)
+    plt.close(fig)
+    # Return results
+    ensemble_pil = Image.fromarray(ensemble_colored)
+    ensemble_pil = ensemble_pil.resize(original.size, Image.NEAREST)
+    return original, ensemble_pil, comparison_image
+# Add this function to your app.py
+def generate_ensemble_segmentation(image_path, boundary_weight=0.3):
+    """Wrapper for Gradio interface"""
+    original, ensemble_result, comparison = ensemble_segmentation(
+        image_path,
+        model_path="saved_models/segnet_efficientnet_camvid.pth",
+        boundary_weight=boundary_weight
+    )
+    return original, ensemble_result, comparison

experiments/felzenszwalb_segmentation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .segmentation import segment

experiments/felzenszwalb_segmentation/disjoint_set.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import numpy as np
+class DisjointSet:
+    def __init__(self, n_elements):
+        self.num = n_elements
+        self.elements = np.empty(
+            shape=(n_elements, 3),
+            dtype=int
+        )
+        for i in range(n_elements):
+            self.elements[i, 0] = 0
+            self.elements[i, 1] = 1
+            self.elements[i, 2] = i
+    def size(self, x):
+        return self.elements[x, 1]
+    def num_sets(self):
+        return self.num
+    def find(self, x):
+        y = int(x)
+        while y != self.elements[y, 2]:
+            y = self.elements[y, 2]
+        self.elements[x, 2] = y
+        return y
+    def join(self, x, y):
+        if self.elements[x, 0] > self.elements[y, 0]:
+            self.elements[y, 2] = x
+            self.elements[x, 1] += self.elements[y, 1]
+        else:
+            self.elements[x, 2] = y
+            self.elements[y, 1] += self.elements[x, 1]
+            if self.elements[x, 0] == self.elements[y, 0]:
+                self.elements[y, 0] += 1
+        self.num -= 1

experiments/felzenszwalb_segmentation/segmentation.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import numpy as np
+from .disjoint_set import DisjointSet
+from .utils import smoothen, difference, get_random_rgb_image
+def segment_graph(num_vertices, num_edges, edges, c):
+    edges[0 : num_edges, :] = edges[edges[0 : num_edges, 2].argsort()]
+    u = DisjointSet(num_vertices)
+    threshold = np.zeros(shape=num_vertices, dtype=float)
+    for i in range(num_vertices):
+        threshold[i] = c
+    for i in range(num_edges):
+        pedge = edges[i, :]
+        a = u.find(pedge[0])
+        b = u.find(pedge[1])
+        if a != b:
+            if (pedge[2] <= threshold[a]) and (pedge[2] <= threshold[b]):
+                u.join(a, b)
+                a = u.find(a)
+                threshold[a] = pedge[2] + (c / u.size(a))
+    return u
+def segment(in_image, sigma, k, min_size):
+    height, width, band = in_image.shape
+    smooth_red_band = smoothen(in_image[:, :, 0], sigma)
+    smooth_green_band = smoothen(in_image[:, :, 1], sigma)
+    smooth_blue_band = smoothen(in_image[:, :, 2], sigma)
+    # build graph
+    edges_size = width * height * 4
+    edges = np.zeros(shape=(edges_size, 3), dtype=object)
+    num = 0
+    for y in range(height):
+        for x in range(width):
+            if x < width - 1:
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int(y * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y
+                )
+                num += 1
+            if y < height - 1:
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y + 1) * width + x)
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x, y + 1
+                )
+                num += 1
+            if (x < width - 1) and (y < height - 2):
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y + 1) * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y + 1
+                )
+                num += 1
+            if (x < width - 1) and (y > 0):
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y - 1) * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y - 1
+                )
+                num += 1
+    u = segment_graph(width * height, num, edges, k)
+    for i in range(num):
+        a = u.find(edges[i, 0])
+        b = u.find(edges[i, 1])
+        if (a != b) and ((u.size(a) < min_size) or (u.size(b) < min_size)):
+            u.join(a, b)
+    num_cc = u.num_sets()
+    output = np.zeros(shape=(height, width, 3))
+    colors = np.zeros(shape=(height * width, 3))
+    for i in range(height * width):
+        colors[i, :] = get_random_rgb_image()
+    for y in range(height):
+        for x in range(width):
+            comp = u.find(y * width + x)
+            output[y, x, :] = colors[comp, :]
+    return output

experiments/felzenszwalb_segmentation/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .utils import *
2	+ from .filter_utils import *

experiments/felzenszwalb_segmentation/utils/filter_utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+from math import ceil, exp, pow
+def convolve(src, mask):
+    output = np.zeros(shape=src.shape, dtype=float)
+    height, width = src.shape
+    length = len(mask)
+    for y in range(height):
+        for x in range(width):
+            sum = float(mask[0] * src[y, x])
+            for i in range(1, length):
+                sum += mask[i] * (
+                    src[y, max(x - i, 0)] + src[y, min(x + i, width - 1)])
+            output[y, x] = sum
+    return output
+def normalize(mask):
+    sum = 2 * np.sum(np.absolute(mask)) + abs(mask[0])
+    return np.divide(mask, sum)
+def smoothen(src, sigma):
+    mask = make_gaussian_filter(sigma)
+    mask = normalize(mask)
+    tmp = convolve(src, mask)
+    dst = convolve(tmp, mask)
+    return dst
+def make_gaussian_filter(sigma):
+    sigma = max(sigma, 0.01)
+    length = int(ceil(sigma * 4.0)) + 1
+    mask = np.zeros(shape=length, dtype=float)
+    for i in range(length):
+        mask[i] = exp(-0.5 * pow(i / sigma, i / sigma))
+    return mask

experiments/felzenszwalb_segmentation/utils/utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import numpy as np
+from math import sqrt
+from random import randint
+def difference(red_band, green_band, blue_band, x1, y1, x2, y2):
+    return sqrt(
+        (red_band[y1, x1] - red_band[y2, x2]) ** 2 +\
+            (green_band[y1, x1] - green_band[y2, x2]) ** 2 +\
+                (blue_band[y1, x1] - blue_band[y2, x2]) ** 2
+    )
+def get_random_rgb_image():
+    rgb = np.zeros(3, dtype=int)
+    rgb[0] = randint(0, 255)
+    rgb[1] = randint(0, 255)
+    rgb[2] = randint(0, 255)
+    return rgb
+def get_random_gray_image():
+    gray = np.zeros(1, dtype=int)
+    gray[0] = randint(0, 255)
+    return gray

experiments/kmeans_segmenter.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+from PIL import Image
+import io
+def initialize_centroids(data, K):
+    """Randomly choose K data points as initial centroids."""
+    indices = np.random.choice(data.shape[0], K, replace=False)
+    return data[indices]
+def compute_distances(data, centroids):
+    """Compute the Euclidean distance between each data point and each centroid."""
+    return np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
+def update_centroids(data, labels, K):
+    """Update centroids as the mean of the points assigned to each cluster."""
+    new_centroids = np.zeros((K, data.shape[1]))
+    for k in range(K):
+        cluster_points = data[labels == k]
+        if len(cluster_points) > 0:
+            new_centroids[k] = np.mean(cluster_points, axis=0)
+    return new_centroids
+def kmeans_from_scratch(image, K=4, max_iters=100, tol=1e-4):
+    """Apply K-means clustering from scratch to segment the image."""
+    data = image.reshape((-1, 3)).astype(np.float32)
+    centroids = initialize_centroids(data, K)
+    for i in range(max_iters):
+        distances = compute_distances(data, centroids)
+        labels = np.argmin(distances, axis=1)
+        new_centroids = update_centroids(data, labels, K)
+        shift = np.linalg.norm(new_centroids - centroids)
+        if shift < tol:
+            break
+        centroids = new_centroids
+    segmented_data = centroids[labels].astype(np.uint8)
+    segmented_image = segmented_data.reshape(image.shape)
+    return segmented_image, labels.reshape(image.shape[:2]), centroids.astype(np.uint8)
+def generate_kmeans_segmented_image(image_path, k=3):
+    """Process image with K-means for Gradio app"""
+    image = Image.open(image_path)
+    image_np = np.array(image)
+    if len(image_np.shape) == 3:
+        image_rgb = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
+    else:
+        image_rgb = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
+    seg_img, labels, centers = kmeans_from_scratch(image_rgb, K=k)
+    colors_image = np.zeros((50 * k, 100, 3), dtype=np.uint8)
+    for i, color in enumerate(centers):
+        colors_image[i*50:(i+1)*50, :] = color
+    fig, axes = plt.subplots(1, 3, figsize=(12, 4))
+    axes[0].imshow(image_rgb)
+    axes[0].set_title("Original Image")
+    axes[0].axis('off')
+    axes[1].imshow(seg_img)
+    axes[1].set_title(f"K-Means (K={k})")
+    axes[1].axis('off')
+    axes[2].imshow(colors_image)
+    axes[2].set_title("Cluster Colors")
+    axes[2].axis('off')
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png')
+    buf.seek(0)
+    comparison_image = Image.open(buf)
+    plt.close(fig)
+    return image, Image.fromarray(seg_img), comparison_image, f"K-Means clustering with K={k}"
+if __name__ == "__main__":
+    image_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
+    original, segmented, comparison, text = generate_kmeans_segmented_image(image_path, k=3)
+    # Save output images instead of displaying them
+    segmented.save("kmeans_segmented.png")
+    comparison.save("kmeans_comparison.png")
+    print(text)

experiments/otsu_segmenter.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from PIL import Image
+import io
+def otsu_threshold(image):
+    hist, bin_edges = np.histogram(image.flatten(), bins=256, range=[0, 256])
+    hist = hist.astype(float)
+    total_pixels = image.size
+    pixel_probability = hist / total_pixels
+    max_variance = 0
+    optimal_threshold = 0
+    for threshold in range(1, 256):
+        weight_background = np.sum(pixel_probability[:threshold])
+        weight_foreground = np.sum(pixel_probability[threshold:])
+        if weight_background == 0 or weight_foreground == 0:
+            continue
+        mean_background = np.sum(np.arange(threshold) * pixel_probability[:threshold]) / weight_background
+        mean_foreground = np.sum(np.arange(threshold, 256) * pixel_probability[threshold:]) / weight_foreground
+        variance = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
+        if variance > max_variance:
+            max_variance = variance
+            optimal_threshold = threshold
+    segmented_image = np.zeros_like(image)
+    segmented_image[image >= optimal_threshold] = 255
+    return optimal_threshold, segmented_image
+def generate_segmented_image(image_path):
+    # Convert PIL to OpenCV format
+    print(f"Image path: {image_path}")
+    image = Image.open(image_path)
+    image_np = np.array(image)
+    original_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    if len(original_image.shape) == 3:
+        gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray_image = original_image.copy()
+    blurred = cv2.GaussianBlur(gray_image, (5, 5), 0)
+    # Our implementation
+    our_threshold, our_segmented = otsu_threshold(blurred)
+    # OpenCV's implementation
+    opencv_threshold, opencv_segmented = cv2.threshold(
+        blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+    )
+    # Create histogram figure
+    fig, ax = plt.subplots(figsize=(6, 4))
+    ax.hist(gray_image.ravel(), 256, [0, 256], color='gray')
+    ax.axvline(x=our_threshold, color='red', linestyle='--', label=f'Ours: {our_threshold}')
+    ax.axvline(x=opencv_threshold, color='green', linestyle='--', label=f'OpenCV: {opencv_threshold}')
+    ax.set_title("Histogram with Thresholds")
+    ax.legend()
+    # Convert Matplotlib figure to image
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    hist_image = Image.open(buf)
+    plt.close(fig)  # Close the figure to free memory
+    return (
+        image,
+        Image.fromarray(our_segmented),
+        Image.fromarray(opencv_segmented),
+        hist_image,
+        f"Our Threshold: {our_threshold}\nOpenCV Threshold: {opencv_threshold}",
+    )
+if __name__ == "__main__":
+    #example usage
+    # Ensure you have the image path set correctly
+    image_path = '/home/akshat/projects/CSL7360_Project/bird.jpeg'
+    image = cv2.imread('/home/akshat/projects/CSL7360_Project/bird.jpeg')
+    # Call the function
+    generate_segmented_image(image)
+# # Optionally, save results to files
+# cv2.imwrite("our_segmented.png", our_segmented)
+# cv2.imwrite("opencv_segmented.png", opencv_segmented)

experiments/watershed_segmenter.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import numpy as np
+import cv2
+import heapq
+import matplotlib.pyplot as plt
+from collections import deque
+# 1. Compute local minima as markers
+def get_local_minima(gray):
+    kernel = np.ones((3, 3), np.uint8)
+    eroded = cv2.erode(gray, kernel)
+    minima = (gray == eroded)
+    return minima.astype(np.uint8)
+# 2. Label each connected component (marker)
+def label_markers(minima):
+    num_labels, markers = cv2.connectedComponents(minima)
+    return markers, num_labels
+# 3. Watershed from scratch
+def watershed_from_scratch(gray, markers):
+    h, w = gray.shape
+    # Constants
+    WATERSHED = -1
+    INIT = -2
+    # Initialize label and visited map
+    label_map = np.full((h, w), INIT, dtype=np.int32)
+    label_map[markers > 0] = markers[markers > 0]
+    # Priority queue for pixels: (intensity, y, x)
+    pq = []
+    # Populate queue with boundary of initial markers
+    for y in range(h):
+        for x in range(w):
+            if markers[y, x] > 0:
+                for dy in [-1, 0, 1]:
+                    for dx in [-1, 0, 1]:
+                        ny, nx = y + dy, x + dx
+                        if 0 <= ny < h and 0 <= nx < w:
+                            if markers[ny, nx] == 0 and label_map[ny, nx] == INIT:
+                                heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                                label_map[ny, nx] = 0  # Mark as in queue
+    # Flooding
+    while pq:
+        intensity, y, x = heapq.heappop(pq)
+        neighbor_labels = set()
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                ny, nx = y + dy, x + dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    lbl = label_map[ny, nx]
+                    if lbl > 0:
+                        neighbor_labels.add(lbl)
+        if len(neighbor_labels) == 1:
+            label_map[y, x] = neighbor_labels.pop()
+        elif len(neighbor_labels) > 1:
+            label_map[y, x] = WATERSHED
+        # Add unvisited neighbors to the queue
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                ny, nx = y + dy, x + dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    if label_map[ny, nx] == INIT:
+                        heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                        label_map[ny, nx] = 0  # Mark as in queue
+    return label_map
+import numpy as np
+import cv2
+import heapq
+def improved_watershed(image_path):
+    # Load and preprocess image
+    original = cv2.imread(image_path)
+    gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (9, 9), 2)
+    # Step 1: Better marker detection using adaptive thresholding
+    thresh = cv2.adaptiveThreshold(blurred, 255,
+                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                  cv2.THRESH_BINARY_INV, 21, 4)
+    # Step 2: Noise removal and sure background area
+    kernel = np.ones((3,3), np.uint8)
+    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
+    # Step 3: Distance transform for better foreground detection
+    dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
+    _, sure_fg = cv2.threshold(dist_transform, 0.5*dist_transform.max(), 255, 0)
+    sure_fg = np.uint8(sure_fg)
+    # Step 4: Create markers using connected components
+    _, markers = cv2.connectedComponents(sure_fg)
+    markers += 1  # Add 1 to all labels so background is 1
+    # Step 5: Apply custom watershed algorithm
+    label_map = watershed_from_scratch(blurred, markers)
+    # Enhanced visualization
+    output = original.copy()
+    boundaries = (label_map == -1).astype(np.uint8) * 255
+    contours, _ = cv2.findContours(boundaries, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    cv2.drawContours(output, contours, -1, (0,0,255), 1)
+    # Create intermediate step visualization
+    process_steps = {
+        "Original": original,
+        "Blurred": cv2.cvtColor(blurred, cv2.COLOR_GRAY2BGR),
+        "Threshold": cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR),
+        "Foreground Markers": cv2.cvtColor(sure_fg, cv2.COLOR_GRAY2BGR),
+        "Final Segmentation": output
+    }
+    return process_steps
+def watershed_from_scratch(gray, markers):
+    h, w = gray.shape
+    WATERSHED = -1
+    INIT = -2
+    label_map = np.full((h, w), INIT, dtype=np.int32)
+    label_map[markers > 1] = markers[markers > 1]  # Skip background marker
+    pq = []
+    # Initialize queue with marker boundaries
+    for y in range(h):
+        for x in range(w):
+            if label_map[y, x] > 0:
+                for dy in [-1, 0, 1]:
+                    for dx in [-1, 0, 1]:
+                        ny, nx = y+dy, x+dx
+                        if 0 <= ny < h and 0 <= nx < w:
+                            if label_map[ny, nx] == INIT:
+                                heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                                label_map[ny, nx] = 0  # Queued
+    # Improved flooding with gradient consideration
+    while pq:
+        intensity, y, x = heapq.heappop(pq)
+        neighbors = []
+        # Check 8 neighbors
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                if dy == 0 and dx == 0:
+                    continue
+                ny, nx = y+dy, x+dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    neighbors.append(label_map[ny, nx])
+        # Find unique labels excluding watershed and background
+        unique = set(n for n in neighbors if n > 0)
+        if len(unique) == 0:
+            label_map[y, x] = 1  # Background
+        elif len(unique) == 1:
+            label_map[y, x] = unique.pop()
+        else:
+            label_map[y, x] = WATERSHED
+        # Add neighbors to queue
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                ny, nx = y+dy, x+dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    if label_map[ny, nx] == INIT:
+                        heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                        label_map[ny, nx] = 0
+    return label_map
+# Gradio integration would use:
+def generate_watershed(image_path):
+    results = improved_watershed(image_path)
+    return (
+        results["Original"],
+        results["Blurred"],
+        results["Threshold"],
+    )
+if __name__ == "__main__":
+    # Run the process
+    # Load grayscale image
+    image = cv2.imread("/home/akshat/projects/CSL7360_Project/bird.jpeg", cv2.IMREAD_GRAYSCALE)
+    image = cv2.GaussianBlur(image, (5, 5), 0)
+    minima = get_local_minima(image)
+    markers, num_labels = label_markers(minima)
+    result = watershed_from_scratch(image, markers)
+    # Visualization
+    output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
+    output[result == -1] = [255, 0, 0]  # Watershed lines in red
+    output[result > 0] = [0, 255, 0]    # Segments in green
+    output[markers > 0] = [0, 0, 255]   # Original minima in blue
+    # Save the original grayscale and the output image
+    cv2.imwrite("original_grayscale.png", image)
+    cv2.imwrite("watershed_output.png", output)
+    print("Images saved as 'original_grayscale.png' and 'watershed_output.png'")

kmeans_comparison.png ADDED Viewed

Git LFS Details

SHA256: d4a9201e30341e13019433ac556b9b2a3ffc44dd2d4adeae0d97e4829ab6860b
Pointer size: 131 Bytes
Size of remote file: 181 kB

kmeans_segmented.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch==2.5.1
+torchvision==0.20.1
+gradio==5.23.1
+pillow==10.4.0
+numpy==2.2.2
+opencv-python==4.10.0.84
+matplotlib==3.10.0
+wandb==0.19.6
+tqdm==4.67.1
+gdown==5.2.0
+opendatasets==0.1.22

saved_models/segnet_efficientnet_camvid.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1e96df359eb0e1c153627880dc93e662b2ae5f998f9ed946ec71e726739481
+size 29641657

saved_models/segnet_vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac7681151184571d468e4c408c30107dd8b44170b602a06b97a24240f0fb83b
+size 49538462

segnet_efficientnet_voc.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5225a079173dc4b5b1f786e79a474d64c2d17a9aa8f35bbb0908cfbb0f2b9baa
+size 29583954

watershed_output.png ADDED Viewed

Git LFS Details

SHA256: 0107d6ecbbe737c32e931bf30b6739d567082c318d16a738080361165ed045c6
Pointer size: 131 Bytes
Size of remote file: 135 kB