Spaces:
Sleeping
Sleeping
| """ | |
| Semantic Segmentation — Pixel-level classification with DeepLabV3 | |
| Courses: 100 ch3, 360 ch4 | |
| """ | |
| import numpy as np | |
| import torch | |
| import torchvision.models.segmentation as seg_models | |
| import torchvision.transforms as T | |
| import gradio as gr | |
| from PIL import Image | |
| device = torch.device("cpu") | |
| # Load DeepLabV3 with MobileNetV3 backbone (lightweight) | |
| model = seg_models.deeplabv3_mobilenet_v3_large( | |
| weights=seg_models.DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT | |
| ).eval().to(device) | |
| preprocess = T.Compose([ | |
| T.ToTensor(), | |
| T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| ]) | |
| # PASCAL VOC class names (21 classes) | |
| CLASS_NAMES = [ | |
| "background", "aeroplane", "bicycle", "bird", "boat", | |
| "bottle", "bus", "car", "cat", "chair", | |
| "cow", "dining table", "dog", "horse", "motorbike", | |
| "person", "potted plant", "sheep", "sofa", "train", | |
| "tv/monitor", | |
| ] | |
| # Color palette for each class | |
| PALETTE = np.array([ | |
| [0, 0, 0], # background | |
| [128, 0, 0], # aeroplane | |
| [0, 128, 0], # bicycle | |
| [128, 128, 0], # bird | |
| [0, 0, 128], # boat | |
| [128, 0, 128], # bottle | |
| [0, 128, 128], # bus | |
| [128, 128, 128], # car | |
| [64, 0, 0], # cat | |
| [192, 0, 0], # chair | |
| [64, 128, 0], # cow | |
| [192, 128, 0], # dining table | |
| [64, 0, 128], # dog | |
| [192, 0, 128], # horse | |
| [64, 128, 128], # motorbike | |
| [192, 128, 128], # person | |
| [0, 64, 0], # potted plant | |
| [128, 64, 0], # sheep | |
| [0, 192, 0], # sofa | |
| [128, 192, 0], # train | |
| [0, 64, 128], # tv/monitor | |
| ], dtype=np.uint8) | |
| def segment(image: Image.Image, display_mode: str): | |
| if image is None: | |
| return None, None, "" | |
| img = image.convert("RGB") | |
| w, h = img.size | |
| # Inference | |
| inp = preprocess(img).unsqueeze(0).to(device) | |
| with torch.no_grad(): | |
| output = model(inp)["out"] | |
| pred = output.argmax(1).squeeze().cpu().numpy() | |
| # Resize prediction to original size | |
| pred_resized = np.array( | |
| Image.fromarray(pred.astype(np.uint8)).resize((w, h), Image.NEAREST) | |
| ) | |
| # Color segmentation map | |
| seg_color = PALETTE[pred_resized] | |
| # Overlay | |
| img_np = np.array(img) | |
| overlay = (img_np * 0.5 + seg_color * 0.5).astype(np.uint8) | |
| # Detected classes | |
| unique_classes = np.unique(pred_resized) | |
| detected = [CLASS_NAMES[c] for c in unique_classes if c != 0] | |
| legend = "**Detected classes:**\n\n" | |
| for c in unique_classes: | |
| if c == 0: | |
| continue | |
| color = PALETTE[c] | |
| pixel_pct = np.sum(pred_resized == c) / pred_resized.size * 100 | |
| color_hex = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}" | |
| legend += f"- <span style='color:{color_hex};font-weight:bold;'>██</span> {CLASS_NAMES[c]} ({pixel_pct:.1f}%)\n" | |
| if not detected: | |
| legend += "- No objects detected (background only)" | |
| if display_mode == "Overlay": | |
| return overlay, seg_color, legend | |
| elif display_mode == "Segmentation Only": | |
| return seg_color, seg_color, legend | |
| else: # Side by Side | |
| return overlay, seg_color, legend | |
| with gr.Blocks(title="Semantic Segmentation") as demo: | |
| gr.Markdown( | |
| "# Semantic Segmentation\n" | |
| "Upload an image to see pixel-level classification (21 PASCAL VOC classes).\n" | |
| "*Courses: 100 Deep Learning ch3, 360 Autonomous Driving ch4*" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(type="pil", label="Upload Image") | |
| mode = gr.Radio( | |
| ["Overlay", "Segmentation Only", "Side by Side"], | |
| value="Overlay", | |
| label="Display Mode", | |
| ) | |
| btn = gr.Button("Segment", variant="primary") | |
| with gr.Column(scale=2): | |
| with gr.Row(): | |
| overlay_out = gr.Image(label="Result") | |
| seg_out = gr.Image(label="Segmentation Map") | |
| legend_md = gr.Markdown() | |
| btn.click(segment, [input_image, mode], [overlay_out, seg_out, legend_md]) | |
| gr.Examples( | |
| examples=[ | |
| ["examples/street.jpg", "Overlay"], | |
| ["examples/room.jpg", "Side by Side"], | |
| ], | |
| inputs=[input_image, mode], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |