Spaces:

AJain1234
/

Image_Segmentation_CV_Project

Build error

App Files Files Community

AJain1234 commited on Apr 9, 2025

Commit

4bb934b

verified ·

1 Parent(s): e1b65d4

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

.gitattributes +2 -0
.gitignore +174 -0
LICENSE +21 -0
README.md +4 -9
app.py +166 -0
bird.jpeg +0 -0
enhaned_kmeans_segmented.png +0 -0
experiments/SegNet/architecture.py +87 -0
experiments/SegNet/train.py +81 -0
experiments/enhanced_kmeans_segmenter.py +100 -0
experiments/felzenszwalb_segmentation/__init__.py +1 -0
experiments/felzenszwalb_segmentation/disjoint_set.py +39 -0
experiments/felzenszwalb_segmentation/segmentation.py +83 -0
experiments/felzenszwalb_segmentation/utils/__init__.py +2 -0
experiments/felzenszwalb_segmentation/utils/filter_utils.py +38 -0
experiments/felzenszwalb_segmentation/utils/utils.py +25 -0
experiments/kmeans_segmenter.py +95 -0
experiments/otsu_segmenter.py +95 -0
experiments/watershed_segmenter.py +111 -0
kmeans_comparison.png +3 -0
kmeans_segmented.png +0 -0
requirements.txt +9 -0
segnet_efficientnet_voc.pth +3 -0
watershed_output.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+kmeans_comparison.png filter=lfs diff=lfs merge=lfs -text
+watershed_output.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,174 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Akshat Jain
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,7 @@
 ---
-title: Image Segmentation CV Project
-emoji: 📚
-colorFrom: purple
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.24.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Image_Segmentation_CV_Project
 app_file: app.py
+sdk: gradio
+sdk_version: 5.23.1
 ---
+# CSL7360_Project

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import gradio as gr
+import torch
+from torchvision import transforms
+from experiments.otsu_segmenter import generate_segmented_image
+from experiments.kmeans_segmenter import generate_kmeans_segmented_image
+from experiments.enhanced_kmeans_segmenter import slic_kmeans
+from experiments.watershed_segmenter import generate_watershed
+from experiments.felzenszwalb_segmentation import segment
+from experiments.SegNet.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
+import numpy as np
+from PIL import Image
+from matplotlib import cm
+def generate_kmeans(image_path,k):
+    kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
+    return kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text
+def generate_slic(image_path,k,m,max_iter):
+    image,seg_img, labels, centers = slic_kmeans(image_path, K=k, m=m, max_iter=max_iter)
+    return image,seg_img
+def generate_felzenszwalb(image_path, sigma, k, min_size_factor):
+    image = Image.open(image_path).convert("RGB")
+    image_np = np.array(image)
+    segments_fz = segment(image_np, sigma=sigma, k=k, min_size=min_size_factor)
+    segments_fz = segments_fz.astype(np.uint8)
+    return image, segments_fz
+def SegNet_efficient_b0(image_path):
+    model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+    model.load_state_dict(torch.load("segnet_efficientnet_voc.pth", map_location=DEVICE))
+    model.eval()
+    transform = transforms.Compose([
+        transforms.Resize(IMAGE_SIZE),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406],
+                             [0.229, 0.224, 0.225])
+    ])
+    image = Image.open(image_path).convert("RGB")
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+    with torch.no_grad():
+        output = model(input_tensor)
+        pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
+    # Convert original image for Gradio display
+    original_image_resized = image.resize(IMAGE_SIZE)
+    # Convert predicted mask to a color image using a colormap
+    colormap = cm.get_cmap('nipy_spectral')
+    colored_mask = colormap(pred_mask / pred_mask.max())  # Normalize
+    colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8)  # Drop alpha and convert to uint8
+    mask_pil = Image.fromarray(colored_mask)
+    return original_image_resized, mask_pil
+with gr.Blocks() as demo:
+    gr.Markdown("# Image Segmentation using Classical CV")
+    with gr.Tabs() as tabs:
+        with gr.TabItem("Otsu's Method"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    file_input = gr.File(label="Upload Image File")
+                    display_btn = gr.Button("Segment this image")
+                    threshold_text = gr.Textbox(label="Threshold Comparison", value="", interactive=False)
+                with gr.Column(scale=2):
+                    image_output = gr.Image(label="Original Image", container=False)
+                    histogram_output = gr.Image(label="Histogram", container=False)
+                    segmented_image_output = gr.Image(label="Our Segmented Image", container=False)
+                    opencv_segmented_image_output = gr.Image(label="OpenCV Segmented Image", container=False)
+            display_btn.click(
+                fn=generate_segmented_image,
+                inputs=file_input,
+                outputs=[image_output, segmented_image_output, opencv_segmented_image_output, histogram_output, threshold_text]
+            )
+        with gr.TabItem("K-means Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    kmeans_file_input = gr.File(label="Upload Image File")
+                    kmeans_k_value = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (K)")
+                    kmeans_display_btn = gr.Button("Segment this image")
+                    kmeans_threshold_text = gr.Textbox(label="K-means Info", value="", interactive=False)
+                with gr.Column(scale=2):
+                    kmeans_image_output = gr.Image(label="Original Image", container=False)
+                    kmeans_segmented_image_output = gr.Image(label="K-means Segmented Image", container=False)
+            kmeans_display_btn.click(
+                fn=generate_kmeans,
+                inputs=[kmeans_file_input, kmeans_k_value],
+                outputs=[kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text]
+        )
+        with gr.TabItem("SLIC Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    slic_file_input = gr.File(label="Upload Image File")
+                    slic_k_value = gr.Slider(minimum=2, maximum=200, value=3, step=1, label="Number of superpixels")
+                    slic_m_value = gr.Slider(minimum=1, maximum=40, value=3, step=1, label="Compactness factor")
+                    slic_max_iter_value = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of iterations")
+                    slic_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    slic_image_output = gr.Image(label="Original Image", container=False)
+                    slic_segmented_image_output = gr.Image(label="SLIC Segmented Image", container=False)
+            slic_display_btn.click(
+                fn=generate_slic,
+                inputs=[slic_file_input, slic_k_value,slic_m_value,slic_max_iter_value],
+                outputs=[slic_image_output,slic_segmented_image_output]
+        )
+        with gr.TabItem("Watershed Algorithm Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    watershed_file_input = gr.File(label="Upload Image File")
+                    watershed_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    watershed_image_output = gr.Image(label="Original Image", container=False)
+                    watershed_segmented_image_output = gr.Image(label="watershed Segmented Image", container=False)
+            watershed_display_btn.click(
+                fn=generate_watershed,
+                inputs=[watershed_file_input],
+                outputs=[watershed_image_output,watershed_segmented_image_output]
+        )
+        with gr.TabItem("Felzenszwalb Algorithm Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    felzenszwalb_file_input = gr.File(label="Upload Image File")
+                    sigma_value = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="Sigma")
+                    K_value = gr.Slider(minimum=2, maximum=1000, value=2, step=1, label="K value")
+                    min_size_value = gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Min Size Factor")
+                    felzenszwalb_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    felzenszwalb_image_output = gr.Image(label="Original Image", container=False)
+                    felzenszwalb_segmented_image_output = gr.Image(label="felzenszwalb Segmented Image", container=False)
+            felzenszwalb_display_btn.click(
+                fn=generate_felzenszwalb,
+                inputs=[felzenszwalb_file_input,sigma_value,K_value,min_size_value],
+                outputs=[felzenszwalb_image_output,felzenszwalb_segmented_image_output]
+        )
+        with gr.TabItem("SegNet EfficientNet B0 Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    segnet_file_input = gr.File(label="Upload Image File")
+                    segnet_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    segnet_image_output = gr.Image(label="Original Image", container=False)
+                    segnet_segmented_image_output = gr.Image(label="SegNet Segmented Image", container=False)
+            segnet_display_btn.click(
+                fn=SegNet_efficient_b0,
+                inputs=[segnet_file_input],
+                outputs=[segnet_image_output,segnet_segmented_image_output]
+        )
+if __name__ == "__main__":
+    demo.launch(server_name="172.31.100.127")

bird.jpeg ADDED Viewed

enhaned_kmeans_segmented.png ADDED Viewed

experiments/SegNet/architecture.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from PIL import Image
+import numpy as np
+import wandb
+import os
+import matplotlib.pyplot as plt
+torch.manual_seed(42)
+np.random.seed(42)
+# wandb.login(key="your_wandb_api_key_here")
+EPOCHS = 25
+BATCH_SIZE = 8
+LR = 1e-3
+NUM_CLASSES = 21  # Pascal VOC has 21 classes including background
+IMAGE_SIZE = (256, 256)
+DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# wandb.init(project="segnet-efficientnet-voc", config={
+#     "epochs": EPOCHS,
+#     "batch_size": BATCH_SIZE,
+#     "learning_rate": LR,
+#     "architecture": "SegNet-EfficientNet",
+#     "dataset": "PascalVOC2012"
+# })
+class SegNetEfficientNet(nn.Module):
+    def __init__(self, num_classes):
+        super(SegNetEfficientNet, self).__init__()
+        base_model = models.efficientnet_b0(pretrained=True)
+        features = list(base_model.features.children())
+        # Encoder: Use EfficientNet blocks
+        self.encoder = nn.Sequential(*features)
+        # Decoder: Up-convolutions
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(64, num_classes, kernel_size=1)
+        )
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        x = F.interpolate(x, size=IMAGE_SIZE, mode='bilinear', align_corners=False)
+        return x
+class VOCSegmentationDataset(VOCSegmentation):
+    def __init__(self, root, image_set='train', transform=None, target_transform=None):
+        super().__init__(root=root, year='2012', image_set=image_set, download=True)
+        self.transform = transform
+        self.target_transform = target_transform
+    def __getitem__(self, index):
+        img, target = super().__getitem__(index)
+        if self.transform:
+            img = self.transform(img)
+        if self.target_transform:
+            target = self.target_transform(target)
+        target = torch.as_tensor(np.array(target), dtype=torch.long)
+        return img, target
+if __name__ == "__main__":
+    image_transform = transforms.Compose([
+        transforms.Resize(IMAGE_SIZE),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406],
+                            [0.229, 0.224, 0.225])
+    ])
+    mask_transform = transforms.Resize(IMAGE_SIZE, interpolation=Image.NEAREST)
+    train_dataset = VOCSegmentationDataset("voc_data", 'train', image_transform, mask_transform)
+    val_dataset = VOCSegmentationDataset("voc_data", 'val', image_transform, mask_transform)
+    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
+    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

experiments/SegNet/train.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from PIL import Image
+import numpy as np
+import wandb
+import os
+import matplotlib.pyplot as plt
+from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
+from tqdm import tqdm
+model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+optimizer = torch.optim.Adam(model.parameters(), lr=LR)
+criterion = nn.CrossEntropyLoss(ignore_index=255)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def pixel_accuracy(preds, labels):
+    _, preds = torch.max(preds, 1)
+    correct = (preds == labels).float()
+    acc = correct.sum() / correct.numel()
+    return acc
+# def mean_iou(preds, labels, num_classes=NUM_CLASSES):
+#     _, preds = torch.max(preds, 1)
+#     ious = []
+#     for cls in range(num_classes):
+#         intersection = ((preds == cls) & (labels == cls)).float().sum()
+#         union = ((preds == cls) | (labels == cls)).float().sum()
+#         if union > 0:
+#             ious.append(intersection / union)
+#     return sum(ious) / len(ious) if ious else 0
+for epoch in tqdm(range(EPOCHS)):
+    model.train()
+    train_loss, train_acc = 0.0, 0.0
+    for images, masks in train_loader:
+        images, masks = images.to(DEVICE), masks.to(DEVICE)
+        optimizer.zero_grad()
+        outputs = model(images)
+        loss = criterion(outputs, masks)
+        loss.backward()
+        optimizer.step()
+        train_loss += loss.item()
+        train_acc += pixel_accuracy(outputs, masks).item()
+    train_loss /= len(train_loader)
+    train_acc /= len(train_loader)
+    # Validation
+    model.eval()
+    val_loss, val_acc = 0.0, 0.0
+    with torch.no_grad():
+        for images, masks in val_loader:
+            images, masks = images.to(DEVICE), masks.to(DEVICE)
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            val_loss += loss.item()
+            val_acc += pixel_accuracy(outputs, masks).item()
+    val_loss /= len(val_loader)
+    val_acc /= len(val_loader)
+    # wandb.log({
+    #     "epoch": epoch + 1,
+    #     "train_loss": train_loss,
+    #     "train_accuracy": train_acc,
+    #     "val_loss": val_loss,
+    #     "val_accuracy": val_acc
+    # })
+    print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
+torch.save(model.state_dict(), "segnet_efficientnet_voc.pth")
+# wandb.finish()

experiments/enhanced_kmeans_segmenter.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from PIL import Image
+def slic_kmeans(image_path, K=100, m=10, max_iter=10):
+    """
+    Perform superpixel segmentation using enhanced K-means with LAB+XY.
+    Args:
+        image (np.ndarray): RGB input image.
+        K (int): Number of superpixels.
+        m (float): Compactness factor.
+        max_iter (int): Number of iterations.
+    Returns:
+        segmented_img: The segmented image with cluster colors.
+        labels: Cluster label for each pixel.
+    """
+    jpg_image = Image.open(image_path)
+    image = np.array(jpg_image)
+    h, w = image.shape[:2]
+    S = int(np.sqrt(h * w / K))  # grid interval
+    # Convert to LAB color space
+    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB).astype(np.float32)
+    # Create 5D feature vector [L, a, b, x, y]
+    X, Y = np.meshgrid(np.arange(w), np.arange(h))
+    features = np.dstack((lab, X, Y)).reshape((-1, 5))
+    # Initialize cluster centers on grid
+    centers = []
+    for y in range(S // 2, h, S):
+        for x in range(S // 2, w, S):
+            center = features[y * w + x]
+            centers.append(center)
+    centers = np.array(centers)
+    labels = np.full((h * w,), -1, dtype=np.int32)
+    distances = np.full((h * w,), np.inf)
+    for iteration in tqdm(range(max_iter)):
+        for idx, center in enumerate(centers):
+            l, a, b, cx, cy = center
+            x_start, x_end = max(0, int(cx - S)), min(w, int(cx + S))
+            y_start, y_end = max(0, int(cy - S)), min(h, int(cy + S))
+            for y in range(y_start, y_end):
+                for x in range(x_start, x_end):
+                    i = y * w + x
+                    fp = features[i]
+                    dc = np.linalg.norm(fp[:3] - center[:3])  # LAB distance
+                    ds = np.linalg.norm(fp[3:] - center[3:])  # XY distance
+                    D = np.sqrt(dc**2 + (ds / S)**2 * m**2)
+                    if D < distances[i]:
+                        distances[i] = D
+                        labels[i] = idx
+        # Update cluster centers
+        new_centers = np.zeros_like(centers)
+        count = np.zeros(len(centers))
+        for i in range(h * w):
+            lbl = labels[i]
+            new_centers[lbl] += features[i]
+            count[lbl] += 1
+        for i in range(len(centers)):
+            if count[i] > 0:
+                new_centers[i] /= count[i]
+        centers = new_centers
+    # Recolor image based on cluster centers
+    segmented_img = np.zeros((h, w, 3), dtype=np.uint8)
+    for i in range(h * w):
+        lbl = labels[i]
+        lab_val = centers[lbl][:3]
+        lab_pixel = np.uint8([[lab_val]])
+        rgb_pixel = cv2.cvtColor(lab_pixel, cv2.COLOR_LAB2RGB)[0][0]
+        segmented_img[i // w, i % w] = rgb_pixel
+    return jpg_image, Image.fromarray(segmented_img), labels.reshape((h, w)), centers
+# img_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
+# image = cv2.imread(img_path)
+# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+# _,seg_img, labels, centers = slic_kmeans(image, K=2, m=20)
+# seg_img.save("enhaned_kmeans_segmented.png")
+# plt.figure(figsize=(10, 5))
+# plt.subplot(1, 2, 1)
+# plt.imshow(image)
+# plt.title("Original Image")
+# plt.axis("off")
+# plt.subplot(1, 2, 2)
+# plt.imshow(seg_img)
+# plt.title("SLIC-like K-Means Segmentation")
+# plt.axis("off")
+# plt.tight_layout()
+# plt.show()

experiments/felzenszwalb_segmentation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .segmentation import segment

experiments/felzenszwalb_segmentation/disjoint_set.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import numpy as np
+class DisjointSet:
+    def __init__(self, n_elements):
+        self.num = n_elements
+        self.elements = np.empty(
+            shape=(n_elements, 3),
+            dtype=int
+        )
+        for i in range(n_elements):
+            self.elements[i, 0] = 0
+            self.elements[i, 1] = 1
+            self.elements[i, 2] = i
+    def size(self, x):
+        return self.elements[x, 1]
+    def num_sets(self):
+        return self.num
+    def find(self, x):
+        y = int(x)
+        while y != self.elements[y, 2]:
+            y = self.elements[y, 2]
+        self.elements[x, 2] = y
+        return y
+    def join(self, x, y):
+        if self.elements[x, 0] > self.elements[y, 0]:
+            self.elements[y, 2] = x
+            self.elements[x, 1] += self.elements[y, 1]
+        else:
+            self.elements[x, 2] = y
+            self.elements[y, 1] += self.elements[x, 1]
+            if self.elements[x, 0] == self.elements[y, 0]:
+                self.elements[y, 0] += 1
+        self.num -= 1

experiments/felzenszwalb_segmentation/segmentation.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import numpy as np
+from .disjoint_set import DisjointSet
+from .utils import smoothen, difference, get_random_rgb_image
+def segment_graph(num_vertices, num_edges, edges, c):
+    edges[0 : num_edges, :] = edges[edges[0 : num_edges, 2].argsort()]
+    u = DisjointSet(num_vertices)
+    threshold = np.zeros(shape=num_vertices, dtype=float)
+    for i in range(num_vertices):
+        threshold[i] = c
+    for i in range(num_edges):
+        pedge = edges[i, :]
+        a = u.find(pedge[0])
+        b = u.find(pedge[1])
+        if a != b:
+            if (pedge[2] <= threshold[a]) and (pedge[2] <= threshold[b]):
+                u.join(a, b)
+                a = u.find(a)
+                threshold[a] = pedge[2] + (c / u.size(a))
+    return u
+def segment(in_image, sigma, k, min_size):
+    height, width, band = in_image.shape
+    smooth_red_band = smoothen(in_image[:, :, 0], sigma)
+    smooth_green_band = smoothen(in_image[:, :, 1], sigma)
+    smooth_blue_band = smoothen(in_image[:, :, 2], sigma)
+    # build graph
+    edges_size = width * height * 4
+    edges = np.zeros(shape=(edges_size, 3), dtype=object)
+    num = 0
+    for y in range(height):
+        for x in range(width):
+            if x < width - 1:
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int(y * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y
+                )
+                num += 1
+            if y < height - 1:
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y + 1) * width + x)
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x, y + 1
+                )
+                num += 1
+            if (x < width - 1) and (y < height - 2):
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y + 1) * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y + 1
+                )
+                num += 1
+            if (x < width - 1) and (y > 0):
+                edges[num, 0] = int(y * width + x)
+                edges[num, 1] = int((y - 1) * width + (x + 1))
+                edges[num, 2] = difference(
+                    smooth_red_band, smooth_green_band,
+                    smooth_blue_band, x, y, x + 1, y - 1
+                )
+                num += 1
+    u = segment_graph(width * height, num, edges, k)
+    for i in range(num):
+        a = u.find(edges[i, 0])
+        b = u.find(edges[i, 1])
+        if (a != b) and ((u.size(a) < min_size) or (u.size(b) < min_size)):
+            u.join(a, b)
+    num_cc = u.num_sets()
+    output = np.zeros(shape=(height, width, 3))
+    colors = np.zeros(shape=(height * width, 3))
+    for i in range(height * width):
+        colors[i, :] = get_random_rgb_image()
+    for y in range(height):
+        for x in range(width):
+            comp = u.find(y * width + x)
+            output[y, x, :] = colors[comp, :]
+    return output

experiments/felzenszwalb_segmentation/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .utils import *
2	+ from .filter_utils import *

experiments/felzenszwalb_segmentation/utils/filter_utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+from math import ceil, exp, pow
+def convolve(src, mask):
+    output = np.zeros(shape=src.shape, dtype=float)
+    height, width = src.shape
+    length = len(mask)
+    for y in range(height):
+        for x in range(width):
+            sum = float(mask[0] * src[y, x])
+            for i in range(1, length):
+                sum += mask[i] * (
+                    src[y, max(x - i, 0)] + src[y, min(x + i, width - 1)])
+            output[y, x] = sum
+    return output
+def normalize(mask):
+    sum = 2 * np.sum(np.absolute(mask)) + abs(mask[0])
+    return np.divide(mask, sum)
+def smoothen(src, sigma):
+    mask = make_gaussian_filter(sigma)
+    mask = normalize(mask)
+    tmp = convolve(src, mask)
+    dst = convolve(tmp, mask)
+    return dst
+def make_gaussian_filter(sigma):
+    sigma = max(sigma, 0.01)
+    length = int(ceil(sigma * 4.0)) + 1
+    mask = np.zeros(shape=length, dtype=float)
+    for i in range(length):
+        mask[i] = exp(-0.5 * pow(i / sigma, i / sigma))
+    return mask

experiments/felzenszwalb_segmentation/utils/utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import numpy as np
+from math import sqrt
+from random import randint
+def difference(red_band, green_band, blue_band, x1, y1, x2, y2):
+    return sqrt(
+        (red_band[y1, x1] - red_band[y2, x2]) ** 2 +\
+            (green_band[y1, x1] - green_band[y2, x2]) ** 2 +\
+                (blue_band[y1, x1] - blue_band[y2, x2]) ** 2
+    )
+def get_random_rgb_image():
+    rgb = np.zeros(3, dtype=int)
+    rgb[0] = randint(0, 255)
+    rgb[1] = randint(0, 255)
+    rgb[2] = randint(0, 255)
+    return rgb
+def get_random_gray_image():
+    gray = np.zeros(1, dtype=int)
+    gray[0] = randint(0, 255)
+    return gray

experiments/kmeans_segmenter.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+from PIL import Image
+import io
+def initialize_centroids(data, K):
+    """Randomly choose K data points as initial centroids."""
+    indices = np.random.choice(data.shape[0], K, replace=False)
+    return data[indices]
+def compute_distances(data, centroids):
+    """Compute the Euclidean distance between each data point and each centroid."""
+    return np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
+def update_centroids(data, labels, K):
+    """Update centroids as the mean of the points assigned to each cluster."""
+    new_centroids = np.zeros((K, data.shape[1]))
+    for k in range(K):
+        cluster_points = data[labels == k]
+        if len(cluster_points) > 0:
+            new_centroids[k] = np.mean(cluster_points, axis=0)
+    return new_centroids
+def kmeans_from_scratch(image, K=4, max_iters=100, tol=1e-4):
+    """Apply K-means clustering from scratch to segment the image."""
+    data = image.reshape((-1, 3)).astype(np.float32)
+    centroids = initialize_centroids(data, K)
+    for i in range(max_iters):
+        distances = compute_distances(data, centroids)
+        labels = np.argmin(distances, axis=1)
+        new_centroids = update_centroids(data, labels, K)
+        shift = np.linalg.norm(new_centroids - centroids)
+        if shift < tol:
+            break
+        centroids = new_centroids
+    segmented_data = centroids[labels].astype(np.uint8)
+    segmented_image = segmented_data.reshape(image.shape)
+    return segmented_image, labels.reshape(image.shape[:2]), centroids.astype(np.uint8)
+def generate_kmeans_segmented_image(image_path, k=3):
+    """Process image with K-means for Gradio app"""
+    image = Image.open(image_path)
+    image_np = np.array(image)
+    if len(image_np.shape) == 3:
+        image_rgb = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
+    else:
+        image_rgb = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
+    seg_img, labels, centers = kmeans_from_scratch(image_rgb, K=k)
+    colors_image = np.zeros((50 * k, 100, 3), dtype=np.uint8)
+    for i, color in enumerate(centers):
+        colors_image[i*50:(i+1)*50, :] = color
+    fig, axes = plt.subplots(1, 3, figsize=(12, 4))
+    axes[0].imshow(image_rgb)
+    axes[0].set_title("Original Image")
+    axes[0].axis('off')
+    axes[1].imshow(seg_img)
+    axes[1].set_title(f"K-Means (K={k})")
+    axes[1].axis('off')
+    axes[2].imshow(colors_image)
+    axes[2].set_title("Cluster Colors")
+    axes[2].axis('off')
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png')
+    buf.seek(0)
+    comparison_image = Image.open(buf)
+    plt.close(fig)
+    return image, Image.fromarray(seg_img), comparison_image, f"K-Means clustering with K={k}"
+if __name__ == "__main__":
+    image_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
+    original, segmented, comparison, text = generate_kmeans_segmented_image(image_path, k=3)
+    # Save output images instead of displaying them
+    segmented.save("kmeans_segmented.png")
+    comparison.save("kmeans_comparison.png")
+    print(text)

experiments/otsu_segmenter.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from PIL import Image
+import io
+def otsu_threshold(image):
+    hist, bin_edges = np.histogram(image.flatten(), bins=256, range=[0, 256])
+    hist = hist.astype(float)
+    total_pixels = image.size
+    pixel_probability = hist / total_pixels
+    max_variance = 0
+    optimal_threshold = 0
+    for threshold in range(1, 256):
+        weight_background = np.sum(pixel_probability[:threshold])
+        weight_foreground = np.sum(pixel_probability[threshold:])
+        if weight_background == 0 or weight_foreground == 0:
+            continue
+        mean_background = np.sum(np.arange(threshold) * pixel_probability[:threshold]) / weight_background
+        mean_foreground = np.sum(np.arange(threshold, 256) * pixel_probability[threshold:]) / weight_foreground
+        variance = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
+        if variance > max_variance:
+            max_variance = variance
+            optimal_threshold = threshold
+    segmented_image = np.zeros_like(image)
+    segmented_image[image >= optimal_threshold] = 255
+    return optimal_threshold, segmented_image
+def generate_segmented_image(image_path):
+    # Convert PIL to OpenCV format
+    print(f"Image path: {image_path}")
+    image = Image.open(image_path)
+    image_np = np.array(image)
+    original_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    if len(original_image.shape) == 3:
+        gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray_image = original_image.copy()
+    blurred = cv2.GaussianBlur(gray_image, (5, 5), 0)
+    # Our implementation
+    our_threshold, our_segmented = otsu_threshold(blurred)
+    # OpenCV's implementation
+    opencv_threshold, opencv_segmented = cv2.threshold(
+        blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+    )
+    # Create histogram figure
+    fig, ax = plt.subplots(figsize=(6, 4))
+    ax.hist(gray_image.ravel(), 256, [0, 256], color='gray')
+    ax.axvline(x=our_threshold, color='red', linestyle='--', label=f'Ours: {our_threshold}')
+    ax.axvline(x=opencv_threshold, color='green', linestyle='--', label=f'OpenCV: {opencv_threshold}')
+    ax.set_title("Histogram with Thresholds")
+    ax.legend()
+    # Convert Matplotlib figure to image
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    hist_image = Image.open(buf)
+    plt.close(fig)  # Close the figure to free memory
+    return (
+        image,
+        Image.fromarray(our_segmented),
+        Image.fromarray(opencv_segmented),
+        hist_image,
+        f"Our Threshold: {our_threshold}\nOpenCV Threshold: {opencv_threshold}",
+    )
+if __name__ == "__main__":
+    #example usage
+    # Ensure you have the image path set correctly
+    image_path = '/home/akshat/projects/CSL7360_Project/bird.jpeg'
+    image = cv2.imread('/home/akshat/projects/CSL7360_Project/bird.jpeg')
+    # Call the function
+    generate_segmented_image(image)
+# # Optionally, save results to files
+# cv2.imwrite("our_segmented.png", our_segmented)
+# cv2.imwrite("opencv_segmented.png", opencv_segmented)

experiments/watershed_segmenter.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import numpy as np
+import cv2
+import heapq
+import matplotlib.pyplot as plt
+from collections import deque
+# 1. Compute local minima as markers
+def get_local_minima(gray):
+    kernel = np.ones((3, 3), np.uint8)
+    eroded = cv2.erode(gray, kernel)
+    minima = (gray == eroded)
+    return minima.astype(np.uint8)
+# 2. Label each connected component (marker)
+def label_markers(minima):
+    num_labels, markers = cv2.connectedComponents(minima)
+    return markers, num_labels
+# 3. Watershed from scratch
+def watershed_from_scratch(gray, markers):
+    h, w = gray.shape
+    # Constants
+    WATERSHED = -1
+    INIT = -2
+    # Initialize label and visited map
+    label_map = np.full((h, w), INIT, dtype=np.int32)
+    label_map[markers > 0] = markers[markers > 0]
+    # Priority queue for pixels: (intensity, y, x)
+    pq = []
+    # Populate queue with boundary of initial markers
+    for y in range(h):
+        for x in range(w):
+            if markers[y, x] > 0:
+                for dy in [-1, 0, 1]:
+                    for dx in [-1, 0, 1]:
+                        ny, nx = y + dy, x + dx
+                        if 0 <= ny < h and 0 <= nx < w:
+                            if markers[ny, nx] == 0 and label_map[ny, nx] == INIT:
+                                heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                                label_map[ny, nx] = 0  # Mark as in queue
+    # Flooding
+    while pq:
+        intensity, y, x = heapq.heappop(pq)
+        neighbor_labels = set()
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                ny, nx = y + dy, x + dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    lbl = label_map[ny, nx]
+                    if lbl > 0:
+                        neighbor_labels.add(lbl)
+        if len(neighbor_labels) == 1:
+            label_map[y, x] = neighbor_labels.pop()
+        elif len(neighbor_labels) > 1:
+            label_map[y, x] = WATERSHED
+        # Add unvisited neighbors to the queue
+        for dy in [-1, 0, 1]:
+            for dx in [-1, 0, 1]:
+                ny, nx = y + dy, x + dx
+                if 0 <= ny < h and 0 <= nx < w:
+                    if label_map[ny, nx] == INIT:
+                        heapq.heappush(pq, (gray[ny, nx], ny, nx))
+                        label_map[ny, nx] = 0  # Mark as in queue
+    return label_map
+def generate_watershed(iamge_path):
+    # Load grayscale image
+    image = cv2.imread(iamge_path, cv2.IMREAD_GRAYSCALE)
+    image = cv2.GaussianBlur(image, (5, 5), 0)
+    minima = get_local_minima(image)
+    markers, num_labels = label_markers(minima)
+    result = watershed_from_scratch(image, markers)
+    # Visualization
+    output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
+    output[result == -1] = [255, 0, 0]  # Watershed lines in red
+    output[result > 0] = [0, 255, 0]    # Segments in green
+    output[markers > 0] = [0, 0, 255]   # Original minima in blue
+    return image,output
+if __name__ == "__main__":
+    # Run the process
+    # Load grayscale image
+    image = cv2.imread("/home/akshat/projects/CSL7360_Project/bird.jpeg", cv2.IMREAD_GRAYSCALE)
+    image = cv2.GaussianBlur(image, (5, 5), 0)
+    minima = get_local_minima(image)
+    markers, num_labels = label_markers(minima)
+    result = watershed_from_scratch(image, markers)
+    # Visualization
+    output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
+    output[result == -1] = [255, 0, 0]  # Watershed lines in red
+    output[result > 0] = [0, 255, 0]    # Segments in green
+    output[markers > 0] = [0, 0, 255]   # Original minima in blue
+    # Save the original grayscale and the output image
+    cv2.imwrite("original_grayscale.png", image)
+    cv2.imwrite("watershed_output.png", output)
+    print("Images saved as 'original_grayscale.png' and 'watershed_output.png'")

kmeans_comparison.png ADDED Viewed

Git LFS Details

SHA256: d4a9201e30341e13019433ac556b9b2a3ffc44dd2d4adeae0d97e4829ab6860b
Pointer size: 131 Bytes
Size of remote file: 181 kB

kmeans_segmented.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch==2.5.1
+torchvision==0.20.1
+gradio==5.23.1
+pillow==10.4.0
+numpy==2.2.2
+opencv-python==4.10.0
+matplotlib==3.10.0
+wandb==0.19.6
+tqdm==4.67.1

segnet_efficientnet_voc.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5225a079173dc4b5b1f786e79a474d64c2d17a9aa8f35bbb0908cfbb0f2b9baa
+size 29583954

watershed_output.png ADDED Viewed

Git LFS Details

SHA256: 0107d6ecbbe737c32e931bf30b6739d567082c318d16a738080361165ed045c6
Pointer size: 131 Bytes
Size of remote file: 135 kB