AJain1234 commited on
Commit
43c8d65
·
verified ·
1 Parent(s): a393bdc

Upload folder using huggingface_hub

Browse files
.gitignore CHANGED
@@ -172,3 +172,5 @@ cython_debug/
172
 
173
  # PyPI configuration file
174
  .pypirc
 
 
 
172
 
173
  # PyPI configuration file
174
  .pypirc
175
+
176
+ ./saved_models/
app.py CHANGED
@@ -6,10 +6,62 @@ from experiments.kmeans_segmenter import generate_kmeans_segmented_image
6
  from experiments.enhanced_kmeans_segmenter import slic_kmeans
7
  from experiments.watershed_segmenter import generate_watershed
8
  from experiments.felzenszwalb_segmentation import segment
9
- from experiments.SegNet.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
 
10
  import numpy as np
11
  from PIL import Image
12
  from matplotlib import cm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def generate_kmeans(image_path,k):
15
  kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
@@ -161,6 +213,21 @@ with gr.Blocks() as demo:
161
  inputs=[segnet_file_input],
162
  outputs=[segnet_image_output,segnet_segmented_image_output]
163
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  if __name__ == "__main__":
165
  demo.launch()
166
 
 
6
  from experiments.enhanced_kmeans_segmenter import slic_kmeans
7
  from experiments.watershed_segmenter import generate_watershed
8
  from experiments.felzenszwalb_segmentation import segment
9
+ from experiments.SegNet.efficient_b0_backbone.architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, IMAGE_SIZE
10
+ from experiments.SegNet.vgg_backbone.model import SegNet
11
  import numpy as np
12
  from PIL import Image
13
  from matplotlib import cm
14
+ import gdown
15
+ import os
16
+
17
+ # Check if the saved_models directory exists, if not create it
18
+ if not os.path.exists("saved_models"):
19
+ os.makedirs("saved_models")
20
+
21
+ # Check if the model file already exists before downloading
22
+ if not os.path.exists("saved_models/segnet_vgg.pth"):
23
+ print("Downloading SegNet VGG weights...")
24
+ segnet_vgg_weights = "https://drive.google.com/file/d/1EFXKQ_3bDW9FbZCqOLdrE0DOI0V4W82o/view?usp=sharing"
25
+ gdown.download(segnet_vgg_weights, "saved_models/segnet_vgg.pth", fuzzy=True)
26
+ print("Download complete!")
27
+ else:
28
+ print("SegNet VGG weights already exist, skipping download.")
29
+
30
+ def generate_segnet_vgg(image_path):
31
+ model = SegNet(32).to(DEVICE)
32
+ model.load_state_dict(torch.load("saved_models/segnet_vgg.pth", map_location=DEVICE))
33
+ # Set model to evaluation mode
34
+ model.eval()
35
+
36
+ # Load and preprocess the image
37
+ image = Image.open(image_path).convert('RGB')
38
+ original_image = image.copy()
39
+
40
+ # Apply same preprocessing as during training
41
+ transform = transforms.Compose([
42
+ transforms.Resize((224, 224)), # Adjust size to match your model's expected input
43
+ transforms.ToTensor(),
44
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
45
+ ])
46
+
47
+ input_tensor = transform(image).unsqueeze(0).to(DEVICE)
48
+
49
+ # Get prediction
50
+ with torch.no_grad():
51
+ output = model(input_tensor)
52
+ pred_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
53
+
54
+ # Convert prediction to visualization
55
+ # Option 1: Use a colormap for visualization
56
+ colormap = cm.get_cmap('nipy_spectral')
57
+ colored_mask = colormap(pred_mask / (pred_mask.max() or 1)) # Normalize, handle case where max is 0
58
+ colored_mask = (colored_mask[:, :, :3] * 255).astype(np.uint8) # Drop alpha and convert to uint8
59
+ segmented_image = Image.fromarray(colored_mask)
60
+
61
+ # Resize segmented image to match original image size
62
+ segmented_image = segmented_image.resize(original_image.size, Image.NEAREST)
63
+
64
+ return original_image, segmented_image
65
 
66
  def generate_kmeans(image_path,k):
67
  kmeans_image_output, kmeans_segmented_image_output,_,kmeans_threshold_text=generate_kmeans_segmented_image(image_path, k)
 
213
  inputs=[segnet_file_input],
214
  outputs=[segnet_image_output,segnet_segmented_image_output]
215
  )
216
+ with gr.TabItem("SegNet VGG Segmentation"):
217
+ with gr.Row():
218
+ with gr.Column(scale=1):
219
+ segnet_file_input = gr.File(label="Upload Image File")
220
+ segnet_display_btn = gr.Button("Segment this image")
221
+
222
+ with gr.Column(scale=2):
223
+ segnet_image_output = gr.Image(label="Original Image", container=False)
224
+ segnet_segmented_image_output = gr.Image(label="SegNet VGG Segmented Image", container=False)
225
+
226
+ segnet_display_btn.click(
227
+ fn=generate_segnet_vgg,
228
+ inputs=[segnet_file_input],
229
+ outputs=[segnet_image_output,segnet_segmented_image_output]
230
+ )
231
  if __name__ == "__main__":
232
  demo.launch()
233
 
experiments/SegNet/efficient_b0_backbone/architecture.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchvision import models, transforms
5
+ from torchvision.datasets import VOCSegmentation
6
+ from torch.utils.data import DataLoader
7
+ from PIL import Image
8
+ import numpy as np
9
+ import wandb
10
+ import os
11
+ import matplotlib.pyplot as plt
12
+
13
+ torch.manual_seed(42)
14
+ np.random.seed(42)
15
+
16
+ # wandb.login(key="your_wandb_api_key_here")
17
+
18
+ EPOCHS = 25
19
+ BATCH_SIZE = 8
20
+ LR = 1e-3
21
+ NUM_CLASSES = 21 # Pascal VOC has 21 classes including background
22
+ IMAGE_SIZE = (256, 256)
23
+ DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
+
25
+ # wandb.init(project="segnet-efficientnet-voc", config={
26
+ # "epochs": EPOCHS,
27
+ # "batch_size": BATCH_SIZE,
28
+ # "learning_rate": LR,
29
+ # "architecture": "SegNet-EfficientNet",
30
+ # "dataset": "PascalVOC2012"
31
+ # })
32
+
33
+ class SegNetEfficientNet(nn.Module):
34
+ def __init__(self, num_classes):
35
+ super(SegNetEfficientNet, self).__init__()
36
+ base_model = models.efficientnet_b0(pretrained=True)
37
+ features = list(base_model.features.children())
38
+
39
+ # Encoder: Use EfficientNet blocks
40
+ self.encoder = nn.Sequential(*features)
41
+
42
+ # Decoder: Up-convolutions
43
+ self.decoder = nn.Sequential(
44
+ nn.ConvTranspose2d(1280, 512, kernel_size=2, stride=2),
45
+ nn.ReLU(inplace=True),
46
+ nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
47
+ nn.ReLU(inplace=True),
48
+ nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
49
+ nn.ReLU(inplace=True),
50
+ nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
51
+ nn.ReLU(inplace=True),
52
+ nn.ConvTranspose2d(64, num_classes, kernel_size=1)
53
+ )
54
+
55
+ def forward(self, x):
56
+ x = self.encoder(x)
57
+ x = self.decoder(x)
58
+ x = F.interpolate(x, size=IMAGE_SIZE, mode='bilinear', align_corners=False)
59
+ return x
60
+
61
+ class VOCSegmentationDataset(VOCSegmentation):
62
+ def __init__(self, root, image_set='train', transform=None, target_transform=None):
63
+ super().__init__(root=root, year='2012', image_set=image_set, download=True)
64
+ self.transform = transform
65
+ self.target_transform = target_transform
66
+
67
+ def __getitem__(self, index):
68
+ img, target = super().__getitem__(index)
69
+ if self.transform:
70
+ img = self.transform(img)
71
+ if self.target_transform:
72
+ target = self.target_transform(target)
73
+ target = torch.as_tensor(np.array(target), dtype=torch.long)
74
+ return img, target
75
+ if __name__ == "__main__":
76
+ image_transform = transforms.Compose([
77
+ transforms.Resize(IMAGE_SIZE),
78
+ transforms.ToTensor(),
79
+ transforms.Normalize([0.485, 0.456, 0.406],
80
+ [0.229, 0.224, 0.225])
81
+ ])
82
+ mask_transform = transforms.Resize(IMAGE_SIZE, interpolation=Image.NEAREST)
83
+
84
+ train_dataset = VOCSegmentationDataset("voc_data", 'train', image_transform, mask_transform)
85
+ val_dataset = VOCSegmentationDataset("voc_data", 'val', image_transform, mask_transform)
86
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
87
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
experiments/SegNet/efficient_b0_backbone/train.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchvision import models, transforms
5
+ from torchvision.datasets import VOCSegmentation
6
+ from torch.utils.data import DataLoader
7
+ from PIL import Image
8
+ import numpy as np
9
+ import wandb
10
+ import os
11
+ import matplotlib.pyplot as plt
12
+ from .architecture import SegNetEfficientNet, NUM_CLASSES, DEVICE, LR, EPOCHS, train_loader, val_loader, IMAGE_SIZE
13
+ from tqdm import tqdm
14
+
15
+ model = SegNetEfficientNet(NUM_CLASSES).to(DEVICE)
16
+ optimizer = torch.optim.Adam(model.parameters(), lr=LR)
17
+ criterion = nn.CrossEntropyLoss(ignore_index=255)
18
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
19
+
20
+ def pixel_accuracy(preds, labels):
21
+ _, preds = torch.max(preds, 1)
22
+ correct = (preds == labels).float()
23
+ acc = correct.sum() / correct.numel()
24
+ return acc
25
+
26
+ # def mean_iou(preds, labels, num_classes=NUM_CLASSES):
27
+ # _, preds = torch.max(preds, 1)
28
+ # ious = []
29
+ # for cls in range(num_classes):
30
+ # intersection = ((preds == cls) & (labels == cls)).float().sum()
31
+ # union = ((preds == cls) | (labels == cls)).float().sum()
32
+ # if union > 0:
33
+ # ious.append(intersection / union)
34
+ # return sum(ious) / len(ious) if ious else 0
35
+
36
+ for epoch in tqdm(range(EPOCHS)):
37
+ model.train()
38
+ train_loss, train_acc = 0.0, 0.0
39
+
40
+ for images, masks in train_loader:
41
+ images, masks = images.to(DEVICE), masks.to(DEVICE)
42
+ optimizer.zero_grad()
43
+ outputs = model(images)
44
+ loss = criterion(outputs, masks)
45
+ loss.backward()
46
+ optimizer.step()
47
+
48
+ train_loss += loss.item()
49
+ train_acc += pixel_accuracy(outputs, masks).item()
50
+
51
+ train_loss /= len(train_loader)
52
+ train_acc /= len(train_loader)
53
+
54
+ # Validation
55
+ model.eval()
56
+ val_loss, val_acc = 0.0, 0.0
57
+ with torch.no_grad():
58
+ for images, masks in val_loader:
59
+ images, masks = images.to(DEVICE), masks.to(DEVICE)
60
+ outputs = model(images)
61
+ loss = criterion(outputs, masks)
62
+
63
+ val_loss += loss.item()
64
+ val_acc += pixel_accuracy(outputs, masks).item()
65
+
66
+ val_loss /= len(val_loader)
67
+ val_acc /= len(val_loader)
68
+
69
+ # wandb.log({
70
+ # "epoch": epoch + 1,
71
+ # "train_loss": train_loss,
72
+ # "train_accuracy": train_acc,
73
+ # "val_loss": val_loss,
74
+ # "val_accuracy": val_acc
75
+ # })
76
+
77
+ print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
78
+
79
+ torch.save(model.state_dict(), "segnet_efficientnet_voc.pth")
80
+ # wandb.finish()
81
+
experiments/SegNet/vgg_backbone/SegNet_with_VGG16_backbone.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
experiments/SegNet/vgg_backbone/model.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchvision.models as models
4
+
5
+ class SegNet(nn.Module):
6
+ def __init__(self, num_classes=32):
7
+ super(SegNet, self).__init__()
8
+ vgg16 = models.vgg16_bn(pretrained=True)
9
+ self.pool = nn.MaxPool2d(2, 2, return_indices=True)
10
+ self.unpool = nn.MaxUnpool2d(2, 2)
11
+ self.enc1 = nn.Sequential(*vgg16.features[:6])
12
+ self.enc2 = nn.Sequential(*vgg16.features[7:13])
13
+ self.enc3 = nn.Sequential(*vgg16.features[14:23])
14
+ self.enc4 = nn.Sequential(*vgg16.features[24:33])
15
+ self.dec4 = self.decoder_block(512, 256)
16
+ self.dec3 = self.decoder_block(256, 128)
17
+ self.dec2 = self.decoder_block(128, 64)
18
+ self.dec1 = self.decoder_block(64, 64)
19
+ self.classifier = nn.Conv2d(64, num_classes, kernel_size=1)
20
+
21
+ def decoder_block(self, in_channels, out_channels):
22
+ return nn.Sequential(
23
+ nn.Conv2d(in_channels, in_channels, 3, padding=1),
24
+ nn.BatchNorm2d(in_channels),
25
+ nn.ReLU(inplace=True),
26
+ nn.Conv2d(in_channels, out_channels, 3, padding=1),
27
+ nn.BatchNorm2d(out_channels),
28
+ nn.ReLU(inplace=True)
29
+ )
30
+
31
+ def forward(self, x):
32
+ x1 = self.enc1(x)
33
+ x1p, ind1 = self.pool(x1)
34
+ x2 = self.enc2(x1p)
35
+ x2p, ind2 = self.pool(x2)
36
+ x3 = self.enc3(x2p)
37
+ x3p, ind3 = self.pool(x3)
38
+ x4 = self.enc4(x3p)
39
+ x4p, ind4 = self.pool(x4)
40
+ d4 = self.unpool(x4p, ind4, output_size=x4.size())
41
+ d4 = self.dec4(d4)
42
+ d3 = self.unpool(d4, ind3, output_size=x3.size())
43
+ d3 = self.dec3(d3)
44
+ d2 = self.unpool(d3, ind2, output_size=x2.size())
45
+ d2 = self.dec2(d2)
46
+ d1 = self.unpool(d2, ind1, output_size=x1.size())
47
+ d1 = self.dec1(d1)
48
+ return self.classifier(d1)
requirements.txt CHANGED
@@ -7,3 +7,4 @@ opencv-python==4.10.0.84
7
  matplotlib==3.10.0
8
  wandb==0.19.6
9
  tqdm==4.67.1
 
 
7
  matplotlib==3.10.0
8
  wandb==0.19.6
9
  tqdm==4.67.1
10
+ gdown==5.2.0