{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "d305511a", "metadata": {}, "outputs": [], "source": [ "# This Python 3 environment comes with many helpful analytics libraries installed\n", "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n", "# For example, here's several helpful packages to load\n", "\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "\n", "# Input data files are available in the read-only \"../input/\" directory\n", "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", "\n", "import os\n", "for dirname, _, filenames in os.walk('/kaggle/input'):\n", " for filename in filenames:\n", " print(os.path.join(dirname, filename))\n", "\n", "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n", "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session\n", "\n", "\n", "get_ipython().getoutput(\"pip install -q segmentation-models-pytorch albumentations\")\n", "\n", "\n", "import os\n", "import cv2\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import Dataset, DataLoader\n", "import segmentation_models_pytorch as smp\n", "import albumentations as A\n", "from albumentations.pytorch import ToTensorV2\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm\n", "\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(\"Using:\", device)\n", "\n", "\n", "BASE_PATH = \"/kaggle/input/datasets/balraj98/massachusetts-buildings-dataset\"\n", "\n", "TRAIN_IMG_PATH = os.path.join(BASE_PATH, \"tiff/train\")\n", "TRAIN_MASK_PATH = os.path.join(BASE_PATH, \"tiff/train_labels\")\n", "\n", "VAL_IMG_PATH = os.path.join(BASE_PATH, \"tiff/val\")\n", "VAL_MASK_PATH = os.path.join(BASE_PATH, \"tiff/val_labels\")\n", "\n", "\n", "train_transform = A.Compose([\n", " A.HorizontalFlip(p=0.5),\n", " A.VerticalFlip(p=0.5),\n", " A.RandomRotate90(p=0.5),\n", " A.RandomBrightnessContrast(p=0.3),\n", " A.Normalize(mean=(0.485, 0.456, 0.406),\n", " std=(0.229, 0.224, 0.225)),\n", " ToTensorV2()\n", "])\n", "\n", "val_transform = A.Compose([\n", " A.Normalize(mean=(0.485, 0.456, 0.406),\n", " std=(0.229, 0.224, 0.225)),\n", " ToTensorV2()\n", "])\n", "\n", "\n", "def extract_patches(img, mask, patch_size=256):\n", " img_patches = []\n", " mask_patches = []\n", "\n", " h, w = img.shape[:2]\n", "\n", " for i in range(0, h - patch_size + 1, patch_size):\n", " for j in range(0, w - patch_size + 1, patch_size):\n", " img_patch = img[i:i+patch_size, j:j+patch_size]\n", " mask_patch = mask[i:i+patch_size, j:j+patch_size]\n", "\n", " img_patches.append(img_patch)\n", " mask_patches.append(mask_patch)\n", "\n", " return img_patches, mask_patches\n", "\n", "\n", "class BuildingDataset(Dataset):\n", " def __init__(self, img_dir, mask_dir, transform=None, patch_size=256):\n", " self.transform = transform\n", " self.patch_size = patch_size\n", "\n", " self.img_patches = []\n", " self.mask_patches = []\n", "\n", " images = sorted(os.listdir(img_dir))\n", " masks = sorted(os.listdir(mask_dir))\n", "\n", " for img_name, mask_name in zip(images, masks):\n", " img = cv2.imread(os.path.join(img_dir, img_name))\n", " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", "\n", " mask = cv2.imread(os.path.join(mask_dir, mask_name), 0)\n", " mask = (mask > 0).astype(np.float32)\n", "\n", " img_p, mask_p = extract_patches(img, mask, self.patch_size)\n", "\n", " self.img_patches.extend(img_p)\n", " self.mask_patches.extend(mask_p)\n", "\n", " def __len__(self):\n", " return len(self.img_patches)\n", "\n", " def __getitem__(self, idx):\n", " img = self.img_patches[idx]\n", " mask = self.mask_patches[idx]\n", "\n", " if self.transform:\n", " augmented = self.transform(image=img, mask=mask)\n", " img = augmented[\"image\"]\n", " mask = augmented[\"mask\"].unsqueeze(0)\n", " else:\n", " img = torch.tensor(img).permute(2,0,1).float() / 255.0\n", " mask = torch.tensor(mask).unsqueeze(0)\n", "\n", " return img, mask.float()\n", "\n", "\n", "train_dataset = BuildingDataset(\n", " TRAIN_IMG_PATH,\n", " TRAIN_MASK_PATH,\n", " transform=train_transform,\n", " patch_size=256 \n", ")\n", "\n", "val_dataset = BuildingDataset(\n", " VAL_IMG_PATH,\n", " VAL_MASK_PATH,\n", " transform=val_transform,\n", " patch_size=256 \n", ")\n", "\n", "\n", "train_dataset = BuildingDataset(TRAIN_IMG_PATH, TRAIN_MASK_PATH, patch_size=256)\n", "val_dataset = BuildingDataset(VAL_IMG_PATH, VAL_MASK_PATH, patch_size=256)\n", "\n", "train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)\n", "val_loader = DataLoader(val_dataset, batch_size=8)\n", "\n", "\n", "model = smp.Unet(\n", " encoder_name=\"efficientnet-b3\",\n", " encoder_weights=\"imagenet\",\n", " in_channels=3,\n", " classes=1,\n", " activation=None\n", ")\n", "\n", "model.to(device)\n", "\n", "\n", "loss_fn = smp.losses.DiceLoss(mode='binary')\n", "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n", "\n", "\n", "loss_fn = smp.losses.DiceLoss(mode='binary', from_logits=True)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n", "\n", "scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n", " optimizer, mode='max', patience=3, factor=0.5\n", ")\n", "\n", "\n", "def iou_score(pred, mask):\n", " pred = torch.sigmoid(pred)\n", " pred = (pred > 0.5).float()\n", " intersection = (pred * mask).sum()\n", " union = pred.sum() + mask.sum() - intersection\n", " return (intersection + 1e-6) / (union + 1e-6)\n", "\n", "epochs = 30\n", "\n", "best_iou = 0\n", "train_losses = []\n", "val_losses = []\n", "ious = []\n", "\n", "for epoch in range(epochs):\n", "\n", " # ---- TRAIN ----\n", " model.train()\n", " train_loss = 0\n", "\n", " for imgs, masks in tqdm(train_loader):\n", " imgs = imgs.to(device)\n", " masks = masks.to(device)\n", "\n", " preds = model(imgs)\n", " loss = loss_fn(preds, masks)\n", "\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " train_loss += loss.item()\n", "\n", " avg_train_loss = train_loss / len(train_loader)\n", " train_losses.append(avg_train_loss)\n", "\n", " # ---- VALIDATION ----\n", " model.eval()\n", " val_loss = 0\n", " iou_total = 0\n", "\n", " with torch.no_grad():\n", " for imgs, masks in val_loader:\n", " imgs = imgs.to(device)\n", " masks = masks.to(device)\n", "\n", " preds = model(imgs)\n", " loss = loss_fn(preds, masks)\n", "\n", " val_loss += loss.item()\n", " iou_total += iou_score(preds, masks).item()\n", "\n", " avg_val_loss = val_loss / len(val_loader)\n", " avg_iou = iou_total / len(val_loader)\n", "\n", " val_losses.append(avg_val_loss)\n", " ious.append(avg_iou)\n", "\n", " scheduler.step(avg_iou)\n", "\n", " print(f\"\\nEpoch {epoch+1}\")\n", " print(f\"Train Loss: {avg_train_loss:.4f}\")\n", " print(f\"Val Loss: {avg_val_loss:.4f}\")\n", " print(f\"Val IoU: {avg_iou:.4f}\")\n", "\n", " if avg_iou > best_iou:\n", " best_iou = avg_iou\n", " torch.save(model.state_dict(), \"best_model.pth\")\n", " print(\"Best model saved!\")\n", "\n", "\n", "plt.figure(figsize=(12,5))\n", "\n", "plt.subplot(1,2,1)\n", "plt.plot(train_losses, label=\"Train\")\n", "plt.plot(val_losses, label=\"Val\")\n", "plt.title(\"Loss Curve\")\n", "plt.legend()\n", "\n", "plt.subplot(1,2,2)\n", "plt.plot(ious)\n", "plt.title(\"IoU Curve\")\n", "\n", "plt.show()\n", "\n", "\n", "model.load_state_dict(torch.load(\"best_model.pth\"))\n", "model.eval()\n", "\n", "imgs, masks = next(iter(val_loader))\n", "imgs = imgs.to(device)\n", "\n", "with torch.no_grad():\n", " preds = model(imgs)\n", "\n", "pred = torch.sigmoid(preds[0]).cpu().numpy().squeeze()\n", "pred = (pred > 0.5)\n", "\n", "img = imgs[0].cpu().permute(1,2,0).numpy()\n", "\n", "plt.figure(figsize=(10,4))\n", "\n", "plt.subplot(1,2,1)\n", "plt.imshow(img)\n", "plt.title(\"Input\")\n", "\n", "plt.subplot(1,2,2)\n", "plt.imshow(pred, cmap='gray')\n", "plt.title(\"Prediction\")\n", "\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "4bcc68d4", "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'cv2'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcv2\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcreate_zoning_mask\u001b[39m(shape):\n", "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'cv2'" ] } ], "source": [ "import cv2\n", "import numpy as np\n", "\n", "\n", "def create_zoning_mask(shape):\n", " h, w = shape\n", " zoning = np.zeros((h, w), dtype=np.uint8)\n", " zoning[:, w//2:] = 1\n", " return zoning\n", "\n", "\n", "def get_building_components(binary_mask):\n", " num_labels, labels = cv2.connectedComponents(binary_mask.astype(np.uint8))\n", " return num_labels, labels\n", "\n", "\n", "def detect_illegal_buildings(building_mask, zoning_mask):\n", "\n", " num_labels, labels = get_building_components(building_mask)\n", "\n", " illegal_buildings = []\n", " legal_buildings = []\n", "\n", " for label in range(1, num_labels): # skip background (0)\n", " building_pixels = (labels == label)\n", "\n", " # Check overlap with restricted zone\n", " overlap = building_pixels & (zoning_mask == 1)\n", "\n", " if overlap.any():\n", " illegal_buildings.append(label)\n", " else:\n", " legal_buildings.append(label)\n", "\n", " return illegal_buildings, legal_buildings, labels\n", "\n", "\n", "def visualize_illegal(image, labels, illegal_buildings):\n", "\n", " output = image.copy()\n", "\n", " for label in illegal_buildings:\n", " output[labels == label] = [255, 0, 0] # red\n", "\n", " return output\n", "\n", "\n", "plt.figure(figsize=(12,4))\n", "\n", "plt.subplot(1,3,1)\n", "plt.title(\"Building Mask\")\n", "plt.imshow(pred_mask, cmap='gray')\n", "\n", "plt.subplot(1,3,2)\n", "plt.title(\"Zoning Mask\")\n", "plt.imshow(zoning_mask, cmap='gray')\n", "\n", "plt.subplot(1,3,3)\n", "plt.title(\"Overlay Result\")\n", "plt.imshow(overlay)\n", "\n", "plt.show()\n", "\n", "\n", "pred_mask = (pred > 0.5).astype(np.uint8)\n", "\n", "zoning_mask = create_zoning_mask(pred_mask.shape)\n", "\n", "illegal_buildings, legal_buildings, labels = detect_illegal_buildings(\n", " pred_mask,\n", " zoning_mask\n", ")\n", "\n", "overlay = visualize_illegal(img.astype(np.uint8), labels, illegal_buildings)\n", "\n", "print(\"Total Buildings:\", len(illegal_buildings) + len(legal_buildings))\n", "print(\"Illegal Buildings:\", len(illegal_buildings))\n", "print(\"Legal Buildings:\", len(legal_buildings))\n", "\n", "plt.figure(figsize=(8,6))\n", "plt.imshow(overlay)\n", "plt.title(\"Illegal Buildings Highlighted in Red\")\n", "plt.show()\n", "\n", "\n", "get_ipython().getoutput(\"ls /kaggle/working\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.9" } }, "nbformat": 4, "nbformat_minor": 5 }