AJain1234 commited on
Commit
4bb934b
·
verified ·
1 Parent(s): e1b65d4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kmeans_comparison.png filter=lfs diff=lfs merge=lfs -text
37
+ watershed_output.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Akshat Jain
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,7 @@
1
  ---
2
- title: Image Segmentation CV Project
3
- emoji: 📚
4
- colorFrom: purple
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.24.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Image_Segmentation_CV_Project
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.23.1
6
  ---
7
+ # CSL7360_Project
 
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from torchvision import transforms
4
+ from experiments.otsu_segmenter import generate_segmented_image
5
+ from experiments.kmeans_segmenter import generate_kmeans_segmented_image
6
+ from experiments.enhanced_kmeans_segmenter import slic_kmeans
7
+ from experiments.watershed_segmenter import generate_watershed
8
+ from experiments.felzenszwalb_segmentation import segment
9
+ from experiments.SegNet.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
10
+ import numpy as np
11
+ from PIL import Image
12
+ from matplotlib import cm
13
+
14
+ def generate_kmeans(image_path,k):
15
+ kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
16
+ return kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text
17
+
18
+ def generate_slic(image_path,k,m,max_iter):
19
+ image,seg_img, labels, centers = slic_kmeans(image_path, K=k, m=m, max_iter=max_iter)
20
+ return image,seg_img
21
+
22
+ def generate_felzenszwalb(image_path, sigma, k, min_size_factor):
23
+ image = Image.open(image_path).convert("RGB")
24
+ image_np = np.array(image)
25
+ segments_fz = segment(image_np, sigma=sigma, k=k, min_size=min_size_factor)
26
+ segments_fz = segments_fz.astype(np.uint8)
27
+
28
+ return image, segments_fz
29
+
30
+ def SegNet_efficient_b0(image_path):
31
+ model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
32
+ model.load_state_dict(torch.load("segnet_efficientnet_voc.pth", map_location=DEVICE))
33
+ model.eval()
34
+ transform = transforms.Compose([
35
+ transforms.Resize(IMAGE_SIZE),
36
+ transforms.ToTensor(),
37
+ transforms.Normalize([0.485, 0.456, 0.406],
38
+ [0.229, 0.224, 0.225])
39
+ ])
40
+
41
+ image = Image.open(image_path).convert("RGB")
42
+ input_tensor = transform(image).unsqueeze(0).to(DEVICE)
43
+
44
+ with torch.no_grad():
45
+ output = model(input_tensor)
46
+ pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
47
+
48
+ # Convert original image for Gradio display
49
+ original_image_resized = image.resize(IMAGE_SIZE)
50
+
51
+ # Convert predicted mask to a color image using a colormap
52
+ colormap = cm.get_cmap('nipy_spectral')
53
+ colored_mask = colormap(pred_mask / pred_mask.max()) # Normalize
54
+ colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8) # Drop alpha and convert to uint8
55
+ mask_pil = Image.fromarray(colored_mask)
56
+
57
+ return original_image_resized, mask_pil
58
+
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("# Image Segmentation using Classical CV")
61
+
62
+ with gr.Tabs() as tabs:
63
+ with gr.TabItem("Otsu's Method"):
64
+ with gr.Row():
65
+ with gr.Column(scale=1):
66
+ file_input = gr.File(label="Upload Image File")
67
+ display_btn = gr.Button("Segment this image")
68
+ threshold_text = gr.Textbox(label="Threshold Comparison", value="", interactive=False)
69
+
70
+ with gr.Column(scale=2):
71
+ image_output = gr.Image(label="Original Image", container=False)
72
+ histogram_output = gr.Image(label="Histogram", container=False)
73
+ segmented_image_output = gr.Image(label="Our Segmented Image", container=False)
74
+ opencv_segmented_image_output = gr.Image(label="OpenCV Segmented Image", container=False)
75
+ display_btn.click(
76
+ fn=generate_segmented_image,
77
+ inputs=file_input,
78
+ outputs=[image_output, segmented_image_output, opencv_segmented_image_output, histogram_output, threshold_text]
79
+ )
80
+ with gr.TabItem("K-means Segmentation"):
81
+ with gr.Row():
82
+ with gr.Column(scale=1):
83
+ kmeans_file_input = gr.File(label="Upload Image File")
84
+ kmeans_k_value = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (K)")
85
+ kmeans_display_btn = gr.Button("Segment this image")
86
+ kmeans_threshold_text = gr.Textbox(label="K-means Info", value="", interactive=False)
87
+
88
+ with gr.Column(scale=2):
89
+ kmeans_image_output = gr.Image(label="Original Image", container=False)
90
+ kmeans_segmented_image_output = gr.Image(label="K-means Segmented Image", container=False)
91
+
92
+ kmeans_display_btn.click(
93
+ fn=generate_kmeans,
94
+ inputs=[kmeans_file_input, kmeans_k_value],
95
+ outputs=[kmeans_image_output, kmeans_segmented_image_output, kmeans_threshold_text]
96
+ )
97
+ with gr.TabItem("SLIC Segmentation"):
98
+ with gr.Row():
99
+ with gr.Column(scale=1):
100
+ slic_file_input = gr.File(label="Upload Image File")
101
+ slic_k_value = gr.Slider(minimum=2, maximum=200, value=3, step=1, label="Number of superpixels")
102
+ slic_m_value = gr.Slider(minimum=1, maximum=40, value=3, step=1, label="Compactness factor")
103
+ slic_max_iter_value = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of iterations")
104
+ slic_display_btn = gr.Button("Segment this image")
105
+
106
+ with gr.Column(scale=2):
107
+ slic_image_output = gr.Image(label="Original Image", container=False)
108
+ slic_segmented_image_output = gr.Image(label="SLIC Segmented Image", container=False)
109
+
110
+ slic_display_btn.click(
111
+ fn=generate_slic,
112
+ inputs=[slic_file_input, slic_k_value,slic_m_value,slic_max_iter_value],
113
+ outputs=[slic_image_output,slic_segmented_image_output]
114
+ )
115
+
116
+ with gr.TabItem("Watershed Algorithm Segmentation"):
117
+ with gr.Row():
118
+ with gr.Column(scale=1):
119
+ watershed_file_input = gr.File(label="Upload Image File")
120
+ watershed_display_btn = gr.Button("Segment this image")
121
+
122
+ with gr.Column(scale=2):
123
+ watershed_image_output = gr.Image(label="Original Image", container=False)
124
+ watershed_segmented_image_output = gr.Image(label="watershed Segmented Image", container=False)
125
+
126
+ watershed_display_btn.click(
127
+ fn=generate_watershed,
128
+ inputs=[watershed_file_input],
129
+ outputs=[watershed_image_output,watershed_segmented_image_output]
130
+ )
131
+ with gr.TabItem("Felzenszwalb Algorithm Segmentation"):
132
+ with gr.Row():
133
+ with gr.Column(scale=1):
134
+ felzenszwalb_file_input = gr.File(label="Upload Image File")
135
+ sigma_value = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="Sigma")
136
+ K_value = gr.Slider(minimum=2, maximum=1000, value=2, step=1, label="K value")
137
+ min_size_value = gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Min Size Factor")
138
+ felzenszwalb_display_btn = gr.Button("Segment this image")
139
+
140
+ with gr.Column(scale=2):
141
+ felzenszwalb_image_output = gr.Image(label="Original Image", container=False)
142
+ felzenszwalb_segmented_image_output = gr.Image(label="felzenszwalb Segmented Image", container=False)
143
+
144
+ felzenszwalb_display_btn.click(
145
+ fn=generate_felzenszwalb,
146
+ inputs=[felzenszwalb_file_input,sigma_value,K_value,min_size_value],
147
+ outputs=[felzenszwalb_image_output,felzenszwalb_segmented_image_output]
148
+ )
149
+ with gr.TabItem("SegNet EfficientNet B0 Segmentation"):
150
+ with gr.Row():
151
+ with gr.Column(scale=1):
152
+ segnet_file_input = gr.File(label="Upload Image File")
153
+ segnet_display_btn = gr.Button("Segment this image")
154
+
155
+ with gr.Column(scale=2):
156
+ segnet_image_output = gr.Image(label="Original Image", container=False)
157
+ segnet_segmented_image_output = gr.Image(label="SegNet Segmented Image", container=False)
158
+
159
+ segnet_display_btn.click(
160
+ fn=SegNet_efficient_b0,
161
+ inputs=[segnet_file_input],
162
+ outputs=[segnet_image_output,segnet_segmented_image_output]
163
+ )
164
+ if __name__ == "__main__":
165
+ demo.launch(server_name="172.31.100.127")
166
+
bird.jpeg ADDED
enhaned_kmeans_segmented.png ADDED
experiments/SegNet/architecture.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchvision import models, transforms
5
+ from torchvision.datasets import VOCSegmentation
6
+ from torch.utils.data import DataLoader
7
+ from PIL import Image
8
+ import numpy as np
9
+ import wandb
10
+ import os
11
+ import matplotlib.pyplot as plt
12
+
13
+ torch.manual_seed(42)
14
+ np.random.seed(42)
15
+
16
+ # wandb.login(key="your_wandb_api_key_here")
17
+
18
+ EPOCHS = 25
19
+ BATCH_SIZE = 8
20
+ LR = 1e-3
21
+ NUM_CLASSES = 21 # Pascal VOC has 21 classes including background
22
+ IMAGE_SIZE = (256, 256)
23
+ DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
+
25
+ # wandb.init(project="segnet-efficientnet-voc", config={
26
+ # "epochs": EPOCHS,
27
+ # "batch_size": BATCH_SIZE,
28
+ # "learning_rate": LR,
29
+ # "architecture": "SegNet-EfficientNet",
30
+ # "dataset": "PascalVOC2012"
31
+ # })
32
+
33
+ class SegNetEfficientNet(nn.Module):
34
+ def __init__(self, num_classes):
35
+ super(SegNetEfficientNet, self).__init__()
36
+ base_model = models.efficientnet_b0(pretrained=True)
37
+ features = list(base_model.features.children())
38
+
39
+ # Encoder: Use EfficientNet blocks
40
+ self.encoder = nn.Sequential(*features)
41
+
42
+ # Decoder: Up-convolutions
43
+ self.decoder = nn.Sequential(
44
+ nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
45
+ nn.ReLU(inplace=True),
46
+ nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
47
+ nn.ReLU(inplace=True),
48
+ nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
49
+ nn.ReLU(inplace=True),
50
+ nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
51
+ nn.ReLU(inplace=True),
52
+ nn.ConvTranspose2d(64, num_classes, kernel_size=1)
53
+ )
54
+
55
+ def forward(self, x):
56
+ x = self.encoder(x)
57
+ x = self.decoder(x)
58
+ x = F.interpolate(x, size=IMAGE_SIZE, mode='bilinear', align_corners=False)
59
+ return x
60
+
61
+ class VOCSegmentationDataset(VOCSegmentation):
62
+ def __init__(self, root, image_set='train', transform=None, target_transform=None):
63
+ super().__init__(root=root, year='2012', image_set=image_set, download=True)
64
+ self.transform = transform
65
+ self.target_transform = target_transform
66
+
67
+ def __getitem__(self, index):
68
+ img, target = super().__getitem__(index)
69
+ if self.transform:
70
+ img = self.transform(img)
71
+ if self.target_transform:
72
+ target = self.target_transform(target)
73
+ target = torch.as_tensor(np.array(target), dtype=torch.long)
74
+ return img, target
75
+ if __name__ == "__main__":
76
+ image_transform = transforms.Compose([
77
+ transforms.Resize(IMAGE_SIZE),
78
+ transforms.ToTensor(),
79
+ transforms.Normalize([0.485, 0.456, 0.406],
80
+ [0.229, 0.224, 0.225])
81
+ ])
82
+ mask_transform = transforms.Resize(IMAGE_SIZE, interpolation=Image.NEAREST)
83
+
84
+ train_dataset = VOCSegmentationDataset("voc_data", 'train', image_transform, mask_transform)
85
+ val_dataset = VOCSegmentationDataset("voc_data", 'val', image_transform, mask_transform)
86
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
87
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
experiments/SegNet/train.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchvision import models, transforms
5
+ from torchvision.datasets import VOCSegmentation
6
+ from torch.utils.data import DataLoader
7
+ from PIL import Image
8
+ import numpy as np
9
+ import wandb
10
+ import os
11
+ import matplotlib.pyplot as plt
12
+ from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
13
+ from tqdm import tqdm
14
+
15
+ model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
16
+ optimizer = torch.optim.Adam(model.parameters(), lr=LR)
17
+ criterion = nn.CrossEntropyLoss(ignore_index=255)
18
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
19
+
20
+ def pixel_accuracy(preds, labels):
21
+ _, preds = torch.max(preds, 1)
22
+ correct = (preds == labels).float()
23
+ acc = correct.sum() / correct.numel()
24
+ return acc
25
+
26
+ # def mean_iou(preds, labels, num_classes=NUM_CLASSES):
27
+ # _, preds = torch.max(preds, 1)
28
+ # ious = []
29
+ # for cls in range(num_classes):
30
+ # intersection = ((preds == cls) & (labels == cls)).float().sum()
31
+ # union = ((preds == cls) | (labels == cls)).float().sum()
32
+ # if union > 0:
33
+ # ious.append(intersection / union)
34
+ # return sum(ious) / len(ious) if ious else 0
35
+
36
+ for epoch in tqdm(range(EPOCHS)):
37
+ model.train()
38
+ train_loss, train_acc = 0.0, 0.0
39
+
40
+ for images, masks in train_loader:
41
+ images, masks = images.to(DEVICE), masks.to(DEVICE)
42
+ optimizer.zero_grad()
43
+ outputs = model(images)
44
+ loss = criterion(outputs, masks)
45
+ loss.backward()
46
+ optimizer.step()
47
+
48
+ train_loss += loss.item()
49
+ train_acc += pixel_accuracy(outputs, masks).item()
50
+
51
+ train_loss /= len(train_loader)
52
+ train_acc /= len(train_loader)
53
+
54
+ # Validation
55
+ model.eval()
56
+ val_loss, val_acc = 0.0, 0.0
57
+ with torch.no_grad():
58
+ for images, masks in val_loader:
59
+ images, masks = images.to(DEVICE), masks.to(DEVICE)
60
+ outputs = model(images)
61
+ loss = criterion(outputs, masks)
62
+
63
+ val_loss += loss.item()
64
+ val_acc += pixel_accuracy(outputs, masks).item()
65
+
66
+ val_loss /= len(val_loader)
67
+ val_acc /= len(val_loader)
68
+
69
+ # wandb.log({
70
+ # "epoch": epoch + 1,
71
+ # "train_loss": train_loss,
72
+ # "train_accuracy": train_acc,
73
+ # "val_loss": val_loss,
74
+ # "val_accuracy": val_acc
75
+ # })
76
+
77
+ print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
78
+
79
+ torch.save(model.state_dict(), "segnet_efficientnet_voc.pth")
80
+ # wandb.finish()
81
+
experiments/enhanced_kmeans_segmenter.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import matplotlib.pyplot as plt
4
+ from tqdm import tqdm
5
+ from PIL import Image
6
+
7
+ def slic_kmeans(image_path, K=100, m=10, max_iter=10):
8
+ """
9
+ Perform superpixel segmentation using enhanced K-means with LAB+XY.
10
+ Args:
11
+ image (np.ndarray): RGB input image.
12
+ K (int): Number of superpixels.
13
+ m (float): Compactness factor.
14
+ max_iter (int): Number of iterations.
15
+ Returns:
16
+ segmented_img: The segmented image with cluster colors.
17
+ labels: Cluster label for each pixel.
18
+ """
19
+ jpg_image = Image.open(image_path)
20
+ image = np.array(jpg_image)
21
+ h, w = image.shape[:2]
22
+ S = int(np.sqrt(h * w / K)) # grid interval
23
+
24
+ # Convert to LAB color space
25
+ lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB).astype(np.float32)
26
+
27
+ # Create 5D feature vector [L, a, b, x, y]
28
+ X, Y = np.meshgrid(np.arange(w), np.arange(h))
29
+ features = np.dstack((lab, X, Y)).reshape((-1, 5))
30
+
31
+ # Initialize cluster centers on grid
32
+ centers = []
33
+ for y in range(S // 2, h, S):
34
+ for x in range(S // 2, w, S):
35
+ center = features[y * w + x]
36
+ centers.append(center)
37
+ centers = np.array(centers)
38
+
39
+ labels = np.full((h * w,), -1, dtype=np.int32)
40
+ distances = np.full((h * w,), np.inf)
41
+
42
+ for iteration in tqdm(range(max_iter)):
43
+ for idx, center in enumerate(centers):
44
+ l, a, b, cx, cy = center
45
+ x_start, x_end = max(0, int(cx - S)), min(w, int(cx + S))
46
+ y_start, y_end = max(0, int(cy - S)), min(h, int(cy + S))
47
+
48
+ for y in range(y_start, y_end):
49
+ for x in range(x_start, x_end):
50
+ i = y * w + x
51
+ fp = features[i]
52
+ dc = np.linalg.norm(fp[:3] - center[:3]) # LAB distance
53
+ ds = np.linalg.norm(fp[3:] - center[3:]) # XY distance
54
+ D = np.sqrt(dc**2 + (ds / S)**2 * m**2)
55
+
56
+ if D < distances[i]:
57
+ distances[i] = D
58
+ labels[i] = idx
59
+
60
+ # Update cluster centers
61
+ new_centers = np.zeros_like(centers)
62
+ count = np.zeros(len(centers))
63
+ for i in range(h * w):
64
+ lbl = labels[i]
65
+ new_centers[lbl] += features[i]
66
+ count[lbl] += 1
67
+ for i in range(len(centers)):
68
+ if count[i] > 0:
69
+ new_centers[i] /= count[i]
70
+ centers = new_centers
71
+
72
+ # Recolor image based on cluster centers
73
+ segmented_img = np.zeros((h, w, 3), dtype=np.uint8)
74
+ for i in range(h * w):
75
+ lbl = labels[i]
76
+ lab_val = centers[lbl][:3]
77
+ lab_pixel = np.uint8([[lab_val]])
78
+ rgb_pixel = cv2.cvtColor(lab_pixel, cv2.COLOR_LAB2RGB)[0][0]
79
+ segmented_img[i // w, i % w] = rgb_pixel
80
+
81
+ return jpg_image, Image.fromarray(segmented_img), labels.reshape((h, w)), centers
82
+
83
+ # img_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
84
+ # image = cv2.imread(img_path)
85
+ # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
86
+
87
+ # _,seg_img, labels, centers = slic_kmeans(image, K=2, m=20)
88
+ # seg_img.save("enhaned_kmeans_segmented.png")
89
+ # plt.figure(figsize=(10, 5))
90
+ # plt.subplot(1, 2, 1)
91
+ # plt.imshow(image)
92
+ # plt.title("Original Image")
93
+ # plt.axis("off")
94
+
95
+ # plt.subplot(1, 2, 2)
96
+ # plt.imshow(seg_img)
97
+ # plt.title("SLIC-like K-Means Segmentation")
98
+ # plt.axis("off")
99
+ # plt.tight_layout()
100
+ # plt.show()
experiments/felzenszwalb_segmentation/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .segmentation import segment
experiments/felzenszwalb_segmentation/disjoint_set.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class DisjointSet:
5
+
6
+ def __init__(self, n_elements):
7
+ self.num = n_elements
8
+ self.elements = np.empty(
9
+ shape=(n_elements, 3),
10
+ dtype=int
11
+ )
12
+ for i in range(n_elements):
13
+ self.elements[i, 0] = 0
14
+ self.elements[i, 1] = 1
15
+ self.elements[i, 2] = i
16
+
17
+ def size(self, x):
18
+ return self.elements[x, 1]
19
+
20
+ def num_sets(self):
21
+ return self.num
22
+
23
+ def find(self, x):
24
+ y = int(x)
25
+ while y != self.elements[y, 2]:
26
+ y = self.elements[y, 2]
27
+ self.elements[x, 2] = y
28
+ return y
29
+
30
+ def join(self, x, y):
31
+ if self.elements[x, 0] > self.elements[y, 0]:
32
+ self.elements[y, 2] = x
33
+ self.elements[x, 1] += self.elements[y, 1]
34
+ else:
35
+ self.elements[x, 2] = y
36
+ self.elements[y, 1] += self.elements[x, 1]
37
+ if self.elements[x, 0] == self.elements[y, 0]:
38
+ self.elements[y, 0] += 1
39
+ self.num -= 1
experiments/felzenszwalb_segmentation/segmentation.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from .disjoint_set import DisjointSet
3
+ from .utils import smoothen, difference, get_random_rgb_image
4
+
5
+
6
+ def segment_graph(num_vertices, num_edges, edges, c):
7
+ edges[0 : num_edges, :] = edges[edges[0 : num_edges, 2].argsort()]
8
+ u = DisjointSet(num_vertices)
9
+ threshold = np.zeros(shape=num_vertices, dtype=float)
10
+ for i in range(num_vertices):
11
+ threshold[i] = c
12
+ for i in range(num_edges):
13
+ pedge = edges[i, :]
14
+ a = u.find(pedge[0])
15
+ b = u.find(pedge[1])
16
+ if a != b:
17
+ if (pedge[2] <= threshold[a]) and (pedge[2] <= threshold[b]):
18
+ u.join(a, b)
19
+ a = u.find(a)
20
+ threshold[a] = pedge[2] + (c / u.size(a))
21
+ return u
22
+
23
+
24
+ def segment(in_image, sigma, k, min_size):
25
+ height, width, band = in_image.shape
26
+ smooth_red_band = smoothen(in_image[:, :, 0], sigma)
27
+ smooth_green_band = smoothen(in_image[:, :, 1], sigma)
28
+ smooth_blue_band = smoothen(in_image[:, :, 2], sigma)
29
+ # build graph
30
+ edges_size = width * height * 4
31
+ edges = np.zeros(shape=(edges_size, 3), dtype=object)
32
+ num = 0
33
+ for y in range(height):
34
+ for x in range(width):
35
+ if x < width - 1:
36
+ edges[num, 0] = int(y * width + x)
37
+ edges[num, 1] = int(y * width + (x + 1))
38
+ edges[num, 2] = difference(
39
+ smooth_red_band, smooth_green_band,
40
+ smooth_blue_band, x, y, x + 1, y
41
+ )
42
+ num += 1
43
+ if y < height - 1:
44
+ edges[num, 0] = int(y * width + x)
45
+ edges[num, 1] = int((y + 1) * width + x)
46
+ edges[num, 2] = difference(
47
+ smooth_red_band, smooth_green_band,
48
+ smooth_blue_band, x, y, x, y + 1
49
+ )
50
+ num += 1
51
+ if (x < width - 1) and (y < height - 2):
52
+ edges[num, 0] = int(y * width + x)
53
+ edges[num, 1] = int((y + 1) * width + (x + 1))
54
+ edges[num, 2] = difference(
55
+ smooth_red_band, smooth_green_band,
56
+ smooth_blue_band, x, y, x + 1, y + 1
57
+ )
58
+ num += 1
59
+ if (x < width - 1) and (y > 0):
60
+ edges[num, 0] = int(y * width + x)
61
+ edges[num, 1] = int((y - 1) * width + (x + 1))
62
+ edges[num, 2] = difference(
63
+ smooth_red_band, smooth_green_band,
64
+ smooth_blue_band, x, y, x + 1, y - 1
65
+ )
66
+ num += 1
67
+ u = segment_graph(width * height, num, edges, k)
68
+ for i in range(num):
69
+ a = u.find(edges[i, 0])
70
+ b = u.find(edges[i, 1])
71
+ if (a != b) and ((u.size(a) < min_size) or (u.size(b) < min_size)):
72
+ u.join(a, b)
73
+ num_cc = u.num_sets()
74
+ output = np.zeros(shape=(height, width, 3))
75
+
76
+ colors = np.zeros(shape=(height * width, 3))
77
+ for i in range(height * width):
78
+ colors[i, :] = get_random_rgb_image()
79
+ for y in range(height):
80
+ for x in range(width):
81
+ comp = u.find(y * width + x)
82
+ output[y, x, :] = colors[comp, :]
83
+ return output
experiments/felzenszwalb_segmentation/utils/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .utils import *
2
+ from .filter_utils import *
experiments/felzenszwalb_segmentation/utils/filter_utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from math import ceil, exp, pow
3
+
4
+
5
+ def convolve(src, mask):
6
+ output = np.zeros(shape=src.shape, dtype=float)
7
+ height, width = src.shape
8
+ length = len(mask)
9
+ for y in range(height):
10
+ for x in range(width):
11
+ sum = float(mask[0] * src[y, x])
12
+ for i in range(1, length):
13
+ sum += mask[i] * (
14
+ src[y, max(x - i, 0)] + src[y, min(x + i, width - 1)])
15
+ output[y, x] = sum
16
+ return output
17
+
18
+
19
+ def normalize(mask):
20
+ sum = 2 * np.sum(np.absolute(mask)) + abs(mask[0])
21
+ return np.divide(mask, sum)
22
+
23
+
24
+ def smoothen(src, sigma):
25
+ mask = make_gaussian_filter(sigma)
26
+ mask = normalize(mask)
27
+ tmp = convolve(src, mask)
28
+ dst = convolve(tmp, mask)
29
+ return dst
30
+
31
+
32
+ def make_gaussian_filter(sigma):
33
+ sigma = max(sigma, 0.01)
34
+ length = int(ceil(sigma * 4.0)) + 1
35
+ mask = np.zeros(shape=length, dtype=float)
36
+ for i in range(length):
37
+ mask[i] = exp(-0.5 * pow(i / sigma, i / sigma))
38
+ return mask
experiments/felzenszwalb_segmentation/utils/utils.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from math import sqrt
3
+ from random import randint
4
+
5
+
6
+ def difference(red_band, green_band, blue_band, x1, y1, x2, y2):
7
+ return sqrt(
8
+ (red_band[y1, x1] - red_band[y2, x2]) ** 2 +\
9
+ (green_band[y1, x1] - green_band[y2, x2]) ** 2 +\
10
+ (blue_band[y1, x1] - blue_band[y2, x2]) ** 2
11
+ )
12
+
13
+
14
+ def get_random_rgb_image():
15
+ rgb = np.zeros(3, dtype=int)
16
+ rgb[0] = randint(0, 255)
17
+ rgb[1] = randint(0, 255)
18
+ rgb[2] = randint(0, 255)
19
+ return rgb
20
+
21
+
22
+ def get_random_gray_image():
23
+ gray = np.zeros(1, dtype=int)
24
+ gray[0] = randint(0, 255)
25
+ return gray
experiments/kmeans_segmenter.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ import cv2
4
+ from PIL import Image
5
+ import io
6
+
7
+ def initialize_centroids(data, K):
8
+ """Randomly choose K data points as initial centroids."""
9
+ indices = np.random.choice(data.shape[0], K, replace=False)
10
+ return data[indices]
11
+
12
+ def compute_distances(data, centroids):
13
+ """Compute the Euclidean distance between each data point and each centroid."""
14
+ return np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
15
+
16
+ def update_centroids(data, labels, K):
17
+ """Update centroids as the mean of the points assigned to each cluster."""
18
+ new_centroids = np.zeros((K, data.shape[1]))
19
+ for k in range(K):
20
+ cluster_points = data[labels == k]
21
+ if len(cluster_points) > 0:
22
+ new_centroids[k] = np.mean(cluster_points, axis=0)
23
+ return new_centroids
24
+
25
+ def kmeans_from_scratch(image, K=4, max_iters=100, tol=1e-4):
26
+ """Apply K-means clustering from scratch to segment the image."""
27
+ data = image.reshape((-1, 3)).astype(np.float32)
28
+
29
+ centroids = initialize_centroids(data, K)
30
+
31
+ for i in range(max_iters):
32
+ distances = compute_distances(data, centroids)
33
+ labels = np.argmin(distances, axis=1)
34
+
35
+ new_centroids = update_centroids(data, labels, K)
36
+ shift = np.linalg.norm(new_centroids - centroids)
37
+
38
+ if shift < tol:
39
+ break
40
+ centroids = new_centroids
41
+
42
+ segmented_data = centroids[labels].astype(np.uint8)
43
+ segmented_image = segmented_data.reshape(image.shape)
44
+
45
+ return segmented_image, labels.reshape(image.shape[:2]), centroids.astype(np.uint8)
46
+
47
+ def generate_kmeans_segmented_image(image_path, k=3):
48
+ """Process image with K-means for Gradio app"""
49
+ image = Image.open(image_path)
50
+ image_np = np.array(image)
51
+
52
+ if len(image_np.shape) == 3:
53
+ image_rgb = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
54
+ image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
55
+ else:
56
+ image_rgb = cv2.cvtColor(image_np, cv2.COLOR_GRAY2RGB)
57
+
58
+ seg_img, labels, centers = kmeans_from_scratch(image_rgb, K=k)
59
+
60
+ colors_image = np.zeros((50 * k, 100, 3), dtype=np.uint8)
61
+ for i, color in enumerate(centers):
62
+ colors_image[i*50:(i+1)*50, :] = color
63
+
64
+ fig, axes = plt.subplots(1, 3, figsize=(12, 4))
65
+
66
+ axes[0].imshow(image_rgb)
67
+ axes[0].set_title("Original Image")
68
+ axes[0].axis('off')
69
+
70
+ axes[1].imshow(seg_img)
71
+ axes[1].set_title(f"K-Means (K={k})")
72
+ axes[1].axis('off')
73
+
74
+ axes[2].imshow(colors_image)
75
+ axes[2].set_title("Cluster Colors")
76
+ axes[2].axis('off')
77
+
78
+ plt.tight_layout()
79
+
80
+ buf = io.BytesIO()
81
+ fig.savefig(buf, format='png')
82
+ buf.seek(0)
83
+ comparison_image = Image.open(buf)
84
+ plt.close(fig)
85
+
86
+ return image, Image.fromarray(seg_img), comparison_image, f"K-Means clustering with K={k}"
87
+
88
+ if __name__ == "__main__":
89
+ image_path = "/home/akshat/projects/CSL7360_Project/bird.jpeg"
90
+ original, segmented, comparison, text = generate_kmeans_segmented_image(image_path, k=3)
91
+
92
+ # Save output images instead of displaying them
93
+ segmented.save("kmeans_segmented.png")
94
+ comparison.save("kmeans_comparison.png")
95
+ print(text)
experiments/otsu_segmenter.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+ import io
6
+
7
+ def otsu_threshold(image):
8
+ hist, bin_edges = np.histogram(image.flatten(), bins=256, range=[0, 256])
9
+ hist = hist.astype(float)
10
+ total_pixels = image.size
11
+ pixel_probability = hist / total_pixels
12
+
13
+ max_variance = 0
14
+ optimal_threshold = 0
15
+
16
+ for threshold in range(1, 256):
17
+ weight_background = np.sum(pixel_probability[:threshold])
18
+ weight_foreground = np.sum(pixel_probability[threshold:])
19
+
20
+ if weight_background == 0 or weight_foreground == 0:
21
+ continue
22
+
23
+ mean_background = np.sum(np.arange(threshold) * pixel_probability[:threshold]) / weight_background
24
+ mean_foreground = np.sum(np.arange(threshold, 256) * pixel_probability[threshold:]) / weight_foreground
25
+
26
+ variance = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
27
+
28
+ if variance > max_variance:
29
+ max_variance = variance
30
+ optimal_threshold = threshold
31
+
32
+ segmented_image = np.zeros_like(image)
33
+ segmented_image[image >= optimal_threshold] = 255
34
+
35
+ return optimal_threshold, segmented_image
36
+
37
+ def generate_segmented_image(image_path):
38
+ # Convert PIL to OpenCV format
39
+ print(f"Image path: {image_path}")
40
+ image = Image.open(image_path)
41
+ image_np = np.array(image)
42
+ original_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
43
+
44
+ if len(original_image.shape) == 3:
45
+ gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
46
+ else:
47
+ gray_image = original_image.copy()
48
+
49
+ blurred = cv2.GaussianBlur(gray_image, (5, 5), 0)
50
+
51
+ # Our implementation
52
+ our_threshold, our_segmented = otsu_threshold(blurred)
53
+
54
+ # OpenCV's implementation
55
+ opencv_threshold, opencv_segmented = cv2.threshold(
56
+ blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
57
+ )
58
+
59
+ # Create histogram figure
60
+ fig, ax = plt.subplots(figsize=(6, 4))
61
+ ax.hist(gray_image.ravel(), 256, [0, 256], color='gray')
62
+ ax.axvline(x=our_threshold, color='red', linestyle='--', label=f'Ours: {our_threshold}')
63
+ ax.axvline(x=opencv_threshold, color='green', linestyle='--', label=f'OpenCV: {opencv_threshold}')
64
+ ax.set_title("Histogram with Thresholds")
65
+ ax.legend()
66
+
67
+ # Convert Matplotlib figure to image
68
+ buf = io.BytesIO()
69
+ plt.savefig(buf, format='png')
70
+ buf.seek(0)
71
+ hist_image = Image.open(buf)
72
+ plt.close(fig) # Close the figure to free memory
73
+
74
+
75
+ return (
76
+ image,
77
+ Image.fromarray(our_segmented),
78
+ Image.fromarray(opencv_segmented),
79
+ hist_image,
80
+ f"Our Threshold: {our_threshold}\nOpenCV Threshold: {opencv_threshold}",
81
+ )
82
+ if __name__ == "__main__":
83
+ #example usage
84
+ # Ensure you have the image path set correctly
85
+ image_path = '/home/akshat/projects/CSL7360_Project/bird.jpeg'
86
+ image = cv2.imread('/home/akshat/projects/CSL7360_Project/bird.jpeg')
87
+ # Call the function
88
+ generate_segmented_image(image)
89
+
90
+
91
+
92
+ # # Optionally, save results to files
93
+ # cv2.imwrite("our_segmented.png", our_segmented)
94
+ # cv2.imwrite("opencv_segmented.png", opencv_segmented)
95
+
experiments/watershed_segmenter.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import heapq
4
+ import matplotlib.pyplot as plt
5
+ from collections import deque
6
+
7
+ # 1. Compute local minima as markers
8
+ def get_local_minima(gray):
9
+ kernel = np.ones((3, 3), np.uint8)
10
+ eroded = cv2.erode(gray, kernel)
11
+ minima = (gray == eroded)
12
+ return minima.astype(np.uint8)
13
+
14
+ # 2. Label each connected component (marker)
15
+ def label_markers(minima):
16
+ num_labels, markers = cv2.connectedComponents(minima)
17
+ return markers, num_labels
18
+
19
+ # 3. Watershed from scratch
20
+ def watershed_from_scratch(gray, markers):
21
+ h, w = gray.shape
22
+ # Constants
23
+ WATERSHED = -1
24
+ INIT = -2
25
+
26
+ # Initialize label and visited map
27
+ label_map = np.full((h, w), INIT, dtype=np.int32)
28
+ label_map[markers > 0] = markers[markers > 0]
29
+
30
+ # Priority queue for pixels: (intensity, y, x)
31
+ pq = []
32
+
33
+ # Populate queue with boundary of initial markers
34
+ for y in range(h):
35
+ for x in range(w):
36
+ if markers[y, x] > 0:
37
+ for dy in [-1, 0, 1]:
38
+ for dx in [-1, 0, 1]:
39
+ ny, nx = y + dy, x + dx
40
+ if 0 <= ny < h and 0 <= nx < w:
41
+ if markers[ny, nx] == 0 and label_map[ny, nx] == INIT:
42
+ heapq.heappush(pq, (gray[ny, nx], ny, nx))
43
+ label_map[ny, nx] = 0 # Mark as in queue
44
+
45
+ # Flooding
46
+ while pq:
47
+ intensity, y, x = heapq.heappop(pq)
48
+
49
+ neighbor_labels = set()
50
+ for dy in [-1, 0, 1]:
51
+ for dx in [-1, 0, 1]:
52
+ ny, nx = y + dy, x + dx
53
+ if 0 <= ny < h and 0 <= nx < w:
54
+ lbl = label_map[ny, nx]
55
+ if lbl > 0:
56
+ neighbor_labels.add(lbl)
57
+
58
+ if len(neighbor_labels) == 1:
59
+ label_map[y, x] = neighbor_labels.pop()
60
+ elif len(neighbor_labels) > 1:
61
+ label_map[y, x] = WATERSHED
62
+
63
+ # Add unvisited neighbors to the queue
64
+ for dy in [-1, 0, 1]:
65
+ for dx in [-1, 0, 1]:
66
+ ny, nx = y + dy, x + dx
67
+ if 0 <= ny < h and 0 <= nx < w:
68
+ if label_map[ny, nx] == INIT:
69
+ heapq.heappush(pq, (gray[ny, nx], ny, nx))
70
+ label_map[ny, nx] = 0 # Mark as in queue
71
+
72
+ return label_map
73
+
74
+ def generate_watershed(iamge_path):
75
+ # Load grayscale image
76
+ image = cv2.imread(iamge_path, cv2.IMREAD_GRAYSCALE)
77
+ image = cv2.GaussianBlur(image, (5, 5), 0)
78
+ minima = get_local_minima(image)
79
+ markers, num_labels = label_markers(minima)
80
+ result = watershed_from_scratch(image, markers)
81
+
82
+ # Visualization
83
+ output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
84
+ output[result == -1] = [255, 0, 0] # Watershed lines in red
85
+ output[result > 0] = [0, 255, 0] # Segments in green
86
+ output[markers > 0] = [0, 0, 255] # Original minima in blue
87
+ return image,output
88
+ if __name__ == "__main__":
89
+ # Run the process
90
+ # Load grayscale image
91
+ image = cv2.imread("/home/akshat/projects/CSL7360_Project/bird.jpeg", cv2.IMREAD_GRAYSCALE)
92
+ image = cv2.GaussianBlur(image, (5, 5), 0)
93
+ minima = get_local_minima(image)
94
+ markers, num_labels = label_markers(minima)
95
+ result = watershed_from_scratch(image, markers)
96
+
97
+ # Visualization
98
+ output = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
99
+ output[result == -1] = [255, 0, 0] # Watershed lines in red
100
+ output[result > 0] = [0, 255, 0] # Segments in green
101
+ output[markers > 0] = [0, 0, 255] # Original minima in blue
102
+
103
+
104
+ # Save the original grayscale and the output image
105
+ cv2.imwrite("original_grayscale.png", image)
106
+ cv2.imwrite("watershed_output.png", output)
107
+
108
+ print("Images saved as 'original_grayscale.png' and 'watershed_output.png'")
109
+
110
+
111
+
kmeans_comparison.png ADDED

Git LFS Details

  • SHA256: d4a9201e30341e13019433ac556b9b2a3ffc44dd2d4adeae0d97e4829ab6860b
  • Pointer size: 131 Bytes
  • Size of remote file: 181 kB
kmeans_segmented.png ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.5.1
2
+ torchvision==0.20.1
3
+ gradio==5.23.1
4
+ pillow==10.4.0
5
+ numpy==2.2.2
6
+ opencv-python==4.10.0
7
+ matplotlib==3.10.0
8
+ wandb==0.19.6
9
+ tqdm==4.67.1
segnet_efficientnet_voc.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5225a079173dc4b5b1f786e79a474d64c2d17a9aa8f35bbb0908cfbb0f2b9baa
3
+ size 29583954
watershed_output.png ADDED

Git LFS Details

  • SHA256: 0107d6ecbbe737c32e931bf30b6739d567082c318d16a738080361165ed045c6
  • Pointer size: 131 Bytes
  • Size of remote file: 135 kB