Spaces:

AJain1234
/

Image_Segmentation_CV_Project

Build error

App Files Files Community

AJain1234 commited on Apr 10, 2025

Commit

43c8d65

verified ·

1 Parent(s): a393bdc

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.gitignore +2 -0
app.py +68 -1
experiments/SegNet/efficient_b0_backbone/architecture.py +87 -0
experiments/SegNet/efficient_b0_backbone/train.py +81 -0
experiments/SegNet/vgg_backbone/SegNet_with_VGG16_backbone.ipynb +0 -0
experiments/SegNet/vgg_backbone/model.py +48 -0
requirements.txt +1 -0

.gitignore CHANGED Viewed

@@ -172,3 +172,5 @@ cython_debug/
 # PyPI configuration file
 .pypirc

 # PyPI configuration file
 .pypirc
+./saved_models/

app.py CHANGED Viewed

@@ -6,10 +6,62 @@ from experiments.kmeans_segmenter import generate_kmeans_segmented_image
 from experiments.enhanced_kmeans_segmenter import slic_kmeans
 from experiments.watershed_segmenter import generate_watershed
 from experiments.felzenszwalb_segmentation import segment
-from experiments.SegNet.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
 import numpy as np
 from PIL import Image
 from matplotlib import cm
 def generate_kmeans(image_path,k):
     kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
@@ -161,6 +213,21 @@ with gr.Blocks() as demo:
                 inputs=[segnet_file_input],
                 outputs=[segnet_image_output,segnet_segmented_image_output]
         )
 if __name__ == "__main__":
     demo.launch()

 from experiments.enhanced_kmeans_segmenter import slic_kmeans
 from experiments.watershed_segmenter import generate_watershed
 from experiments.felzenszwalb_segmentation import segment
+from experiments.SegNet.efficient_b0_backbone.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
+from experiments.SegNet.vgg_backbone.model import SegNet
 import numpy as np
 from PIL import Image
 from matplotlib import cm
+import gdown
+import os
+# Check if the saved_models directory exists, if not create it
+if not os.path.exists("saved_models"):
+    os.makedirs("saved_models")
+# Check if the model file already exists before downloading
+if not os.path.exists("saved_models/segnet_vgg.pth"):
+    print("Downloading SegNet VGG weights...")
+    segnet_vgg_weights = "https://drive.google.com/file/d/1EFXKQ_3bDW9FbZCqOLdrE0DOI0V4W82o/view?usp=sharing"
+    gdown.download(segnet_vgg_weights, "saved_models/segnet_vgg.pth", fuzzy=True)
+    print("Download complete!")
+else:
+    print("SegNet VGG weights already exist, skipping download.")
+def generate_segnet_vgg(image_path):
+    model = SegNet(32).to(DEVICE)
+    model.load_state_dict(torch.load("saved_models/segnet_vgg.pth", map_location=DEVICE))
+    # Set model to evaluation mode
+    model.eval()
+    # Load and preprocess the image
+    image = Image.open(image_path).convert('RGB')
+    original_image = image.copy()
+    # Apply same preprocessing as during training
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),  # Adjust size to match your model's expected input
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
+    # Get prediction
+    with torch.no_grad():
+        output = model(input_tensor)
+        pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
+    # Convert prediction to visualization
+    # Option 1: Use a colormap for visualization
+    colormap = cm.get_cmap('nipy_spectral')
+    colored_mask = colormap(pred_mask / (pred_mask.max() or 1))  # Normalize, handle case where max is 0
+    colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8)  # Drop alpha and convert to uint8
+    segmented_image = Image.fromarray(colored_mask)
+    # Resize segmented image to match original image size
+    segmented_image = segmented_image.resize(original_image.size, Image.NEAREST)
+    return original_image, segmented_image
 def generate_kmeans(image_path,k):
     kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
                 inputs=[segnet_file_input],
                 outputs=[segnet_image_output,segnet_segmented_image_output]
         )
+        with gr.TabItem("SegNet VGG Segmentation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    segnet_file_input = gr.File(label="Upload Image File")
+                    segnet_display_btn = gr.Button("Segment this image")
+                with gr.Column(scale=2):
+                    segnet_image_output = gr.Image(label="Original Image", container=False)
+                    segnet_segmented_image_output = gr.Image(label="SegNet VGG Segmented Image", container=False)
+            segnet_display_btn.click(
+                fn=generate_segnet_vgg,
+                inputs=[segnet_file_input],
+                outputs=[segnet_image_output,segnet_segmented_image_output]
+        )
 if __name__ == "__main__":
     demo.launch()

experiments/SegNet/efficient_b0_backbone/architecture.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from PIL import Image
+import numpy as np
+import wandb
+import os
+import matplotlib.pyplot as plt
+torch.manual_seed(42)
+np.random.seed(42)
+# wandb.login(key="your_wandb_api_key_here")
+EPOCHS = 25
+BATCH_SIZE = 8
+LR = 1e-3
+NUM_CLASSES = 21  # Pascal VOC has 21 classes including background
+IMAGE_SIZE = (256, 256)
+DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# wandb.init(project="segnet-efficientnet-voc", config={
+#     "epochs": EPOCHS,
+#     "batch_size": BATCH_SIZE,
+#     "learning_rate": LR,
+#     "architecture": "SegNet-EfficientNet",
+#     "dataset": "PascalVOC2012"
+# })
+class SegNetEfficientNet(nn.Module):
+    def __init__(self, num_classes):
+        super(SegNetEfficientNet, self).__init__()
+        base_model = models.efficientnet_b0(pretrained=True)
+        features = list(base_model.features.children())
+        # Encoder: Use EfficientNet blocks
+        self.encoder = nn.Sequential(*features)
+        # Decoder: Up-convolutions
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(64, num_classes, kernel_size=1)
+        )
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        x = F.interpolate(x, size=IMAGE_SIZE, mode='bilinear', align_corners=False)
+        return x
+class VOCSegmentationDataset(VOCSegmentation):
+    def __init__(self, root, image_set='train', transform=None, target_transform=None):
+        super().__init__(root=root, year='2012', image_set=image_set, download=True)
+        self.transform = transform
+        self.target_transform = target_transform
+    def __getitem__(self, index):
+        img, target = super().__getitem__(index)
+        if self.transform:
+            img = self.transform(img)
+        if self.target_transform:
+            target = self.target_transform(target)
+        target = torch.as_tensor(np.array(target), dtype=torch.long)
+        return img, target
+if __name__ == "__main__":
+    image_transform = transforms.Compose([
+        transforms.Resize(IMAGE_SIZE),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406],
+                            [0.229, 0.224, 0.225])
+    ])
+    mask_transform = transforms.Resize(IMAGE_SIZE, interpolation=Image.NEAREST)
+    train_dataset = VOCSegmentationDataset("voc_data", 'train', image_transform, mask_transform)
+    val_dataset = VOCSegmentationDataset("voc_data", 'val', image_transform, mask_transform)
+    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
+    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

experiments/SegNet/efficient_b0_backbone/train.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from torchvision.datasets import VOCSegmentation
+from torch.utils.data import DataLoader
+from PIL import Image
+import numpy as np
+import wandb
+import os
+import matplotlib.pyplot as plt
+from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
+from tqdm import tqdm
+model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
+optimizer = torch.optim.Adam(model.parameters(), lr=LR)
+criterion = nn.CrossEntropyLoss(ignore_index=255)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def pixel_accuracy(preds, labels):
+    _, preds = torch.max(preds, 1)
+    correct = (preds == labels).float()
+    acc = correct.sum() / correct.numel()
+    return acc
+# def mean_iou(preds, labels, num_classes=NUM_CLASSES):
+#     _, preds = torch.max(preds, 1)
+#     ious = []
+#     for cls in range(num_classes):
+#         intersection = ((preds == cls) & (labels == cls)).float().sum()
+#         union = ((preds == cls) | (labels == cls)).float().sum()
+#         if union > 0:
+#             ious.append(intersection / union)
+#     return sum(ious) / len(ious) if ious else 0
+for epoch in tqdm(range(EPOCHS)):
+    model.train()
+    train_loss, train_acc = 0.0, 0.0
+    for images, masks in train_loader:
+        images, masks = images.to(DEVICE), masks.to(DEVICE)
+        optimizer.zero_grad()
+        outputs = model(images)
+        loss = criterion(outputs, masks)
+        loss.backward()
+        optimizer.step()
+        train_loss += loss.item()
+        train_acc += pixel_accuracy(outputs, masks).item()
+    train_loss /= len(train_loader)
+    train_acc /= len(train_loader)
+    # Validation
+    model.eval()
+    val_loss, val_acc = 0.0, 0.0
+    with torch.no_grad():
+        for images, masks in val_loader:
+            images, masks = images.to(DEVICE), masks.to(DEVICE)
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            val_loss += loss.item()
+            val_acc += pixel_accuracy(outputs, masks).item()
+    val_loss /= len(val_loader)
+    val_acc /= len(val_loader)
+    # wandb.log({
+    #     "epoch": epoch + 1,
+    #     "train_loss": train_loss,
+    #     "train_accuracy": train_acc,
+    #     "val_loss": val_loss,
+    #     "val_accuracy": val_acc
+    # })
+    print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
+torch.save(model.state_dict(), "segnet_efficientnet_voc.pth")
+# wandb.finish()

experiments/SegNet/vgg_backbone/SegNet_with_VGG16_backbone.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/SegNet/vgg_backbone/model.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+class SegNet(nn.Module):
+    def __init__(self, num_classes=32):
+        super(SegNet, self).__init__()
+        vgg16 = models.vgg16_bn(pretrained=True)
+        self.pool = nn.MaxPool2d(2, 2, return_indices=True)
+        self.unpool = nn.MaxUnpool2d(2, 2)
+        self.enc1 = nn.Sequential(*vgg16.features[:6])
+        self.enc2 = nn.Sequential(*vgg16.features[7:13])
+        self.enc3 = nn.Sequential(*vgg16.features[14:23])
+        self.enc4 = nn.Sequential(*vgg16.features[24:33])
+        self.dec4 = self.decoder_block(512, 256)
+        self.dec3 = self.decoder_block(256, 128)
+        self.dec2 = self.decoder_block(128, 64)
+        self.dec1 = self.decoder_block(64, 64)
+        self.classifier = nn.Conv2d(64, num_classes, kernel_size=1)
+    def decoder_block(self, in_channels, out_channels):
+        return nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, 3, padding=1),
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x1 = self.enc1(x)
+        x1p, ind1 = self.pool(x1)
+        x2 = self.enc2(x1p)
+        x2p, ind2 = self.pool(x2)
+        x3 = self.enc3(x2p)
+        x3p, ind3 = self.pool(x3)
+        x4 = self.enc4(x3p)
+        x4p, ind4 = self.pool(x4)
+        d4 = self.unpool(x4p, ind4, output_size=x4.size())
+        d4 = self.dec4(d4)
+        d3 = self.unpool(d4, ind3, output_size=x3.size())
+        d3 = self.dec3(d3)
+        d2 = self.unpool(d3, ind2, output_size=x2.size())
+        d2 = self.dec2(d2)
+        d1 = self.unpool(d2, ind1, output_size=x1.size())
+        d1 = self.dec1(d1)
+        return self.classifier(d1)

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ opencv-python==4.10.0.84
 matplotlib==3.10.0
 wandb==0.19.6
 tqdm==4.67.1

 matplotlib==3.10.0
 wandb==0.19.6
 tqdm==4.67.1
+gdown==5.2.0