Spaces:

Jazz1508
/

Infrawatch

Running

File size: 3,876 Bytes

import cv2
import torch
import numpy as np
import gradio as gr
import segmentation_models_pytorch as smp
from albumentations import Normalize
from albumentations.pytorch import ToTensorV2

# ================================
# CONFIG
# ================================
MODEL_PATH = "s2ds_deeplabv3plus.pth"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 7
INFER_SIZE = 512  # 🔥 reduce for speed (important for live feed)

CLASS_NAMES = {
    0: "Background",
    1: "Crack",
    2: "Spalling",
    3: "Corrosion",
    4: "Efflorescence",
    5: "Vegetation",
    6: "Control Point"
}

ID_TO_COLOR = {
    0: (0, 0, 0),
    1: (255, 255, 255),
    2: (255, 0, 0),
    3: (255, 255, 0),
    4: (0, 255, 255),
    5: (0, 255, 0),
    6: (0, 0, 255)
}

# ================================
# LOAD MODEL
# ================================
model = smp.DeepLabV3Plus(
    encoder_name="resnet50",
    encoder_weights=None,
    in_channels=3,
    classes=NUM_CLASSES
)

checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
model.load_state_dict(
    checkpoint["model_state_dict"] if "model_state_dict" in checkpoint else checkpoint
)

model.to(DEVICE)
model.eval()

normalize = Normalize()
to_tensor = ToTensorV2()

# ================================
# HELPERS
# ================================
def pad_to_16(img):
    h, w = img.shape[:2]
    new_h = (h + 15) // 16 * 16
    new_w = (w + 15) // 16 * 16
    pad_h = new_h - h
    pad_w = new_w - w
    padded = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_REFLECT)
    return padded, h, w

def colorize_mask(mask):
    h, w = mask.shape
    color_mask = np.zeros((h, w, 3), dtype=np.uint8)
    for cls, color in ID_TO_COLOR.items():
        color_mask[mask == cls] = color
    return color_mask

# ================================
# FAST INFERENCE FUNCTION
# ================================
def segment_image(image):
    if image is None:
        return None, ""

    # 🔥 Downscale for speed
    original = image.copy()
    image = cv2.resize(image, (INFER_SIZE, INFER_SIZE))

    padded, orig_h, orig_w = pad_to_16(image)

    img = normalize(image=padded)["image"]
    img = to_tensor(image=img)["image"]
    img = img.unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        if DEVICE == "cuda":
            with torch.cuda.amp.autocast():
                pred = model(img)
        else:
            pred = model(img)

        pred_mask = torch.argmax(pred, dim=1)[0].cpu().numpy()

    pred_mask = pred_mask[:orig_h, :orig_w]

    color_mask = colorize_mask(pred_mask)
    overlay_small = cv2.addWeighted(image, 0.6, color_mask, 0.4, 0)

    # 🔥 Resize back to original size
    overlay = cv2.resize(overlay_small, (original.shape[1], original.shape[0]))

    # Image-level classification
    vals, counts = np.unique(pred_mask, return_counts=True)
    vals = vals[vals > 0]

    if len(vals) > 0:
        img_class = int(vals[np.argmax(counts[1:])])
        label = CLASS_NAMES[img_class]
    else:
        label = "Background"

    return overlay, f"Detected: {label}"

# ================================
# GRADIO UI
# ================================
with gr.Blocks() as demo:
    gr.Markdown("# 🏗 Structural Defect Segmentation")

    with gr.Tab("Image Upload"):
        input_img = gr.Image(type="numpy")
        output_img = gr.Image()
        output_text = gr.Textbox()
        btn = gr.Button("Run Segmentation")
        btn.click(segment_image, inputs=input_img, outputs=[output_img, output_text])

    with gr.Tab("Live Camera (Fast Mode)"):
        cam = gr.Image(
            sources=["webcam"],
            streaming=True,
            type="numpy",
            webcam_options={"facingMode": "environment"}  # 🔥 force back camera
        )
        cam_out = gr.Image()
        cam.stream(lambda x: segment_image(x)[0], inputs=cam, outputs=cam_out)

demo.launch()