Spaces:

S-4-G-4-R
/

Drywall_image_segmentation

Sleeping

App Files Files Community

S-4-G-4-R commited on 6 days ago

Commit

b5122c5

verified ·

1 Parent(s): 437b12c

Upload 2 files

Browse files

Files changed (2) hide show

app.py +215 -0
requirements .txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""
+app.py — Gradio demo for Prompted Segmentation for Drywall QA
+Model  : CLIPSeg (CIDAS/clipseg-rd64-refined), fine-tuned on drywall datasets
+Weights: best_model.pt  (upload this file to your HuggingFace Space)
+"""
+import os
+import time
+import numpy as np
+import torch
+import gradio as gr
+from PIL import Image
+from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
+# ── Config ────────────────────────────────────────────────────────────────────
+MODEL_NAME  = "CIDAS/clipseg-rd64-refined"
+CKPT_PATH   = "best_model.pt"          # must be in the Space root directory
+IMG_SIZE    = 352
+THRESHOLD   = 0.5
+DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Supported prompts (trained)
+PROMPT_CHOICES = [
+    "segment crack",
+    "segment taping area",
+]
+# ── Load model (once at startup) ──────────────────────────────────────────────
+print(f"Loading CLIPSeg processor from {MODEL_NAME} ...")
+processor = CLIPSegProcessor.from_pretrained(MODEL_NAME)
+print(f"Loading CLIPSeg model from {MODEL_NAME} ...")
+model = CLIPSegForImageSegmentation.from_pretrained(MODEL_NAME)
+if os.path.exists(CKPT_PATH):
+    print(f"Loading fine-tuned weights from {CKPT_PATH} ...")
+    state_dict = torch.load(CKPT_PATH, map_location=DEVICE)
+    model.load_state_dict(state_dict)
+    print("Fine-tuned weights loaded successfully.")
+else:
+    print(f"WARNING: {CKPT_PATH} not found — running with base CLIPSeg weights.")
+model = model.to(DEVICE)
+model.eval()
+print(f"Model ready on {DEVICE}.")
+# ── Inference function ────────────────────────────────────────────────────────
+def predict(image: Image.Image, prompt: str, threshold: float) -> tuple:
+    """
+    Runs CLIPSeg inference and returns:
+      - overlay  : original image blended with coloured mask
+      - mask_img : pure binary mask (grayscale)
+      - info_str : prompt used + inference time
+    """
+    if image is None:
+        return None, None, "Please upload an image."
+    original_size = image.size          # (W, H) — to resize mask back
+    image_rgb     = image.convert("RGB")
+    # Preprocess
+    encoding = processor(
+        text           = [prompt],
+        images         = [image_rgb],
+        return_tensors = "pt",
+        padding        = "max_length",
+        truncation     = True,
+    )
+    pixel_values   = encoding["pixel_values"].to(DEVICE)
+    input_ids      = encoding["input_ids"].to(DEVICE)
+    attention_mask = encoding["attention_mask"].to(DEVICE)
+    # Inference
+    t0 = time.time()
+    with torch.no_grad():
+        outputs = model(
+            pixel_values   = pixel_values,
+            input_ids      = input_ids,
+            attention_mask = attention_mask,
+        )
+    inf_ms = (time.time() - t0) * 1000
+    # Post-process logits → binary mask
+    prob     = torch.sigmoid(outputs.logits[0]).cpu().numpy()   # (H, W) at 352×352
+    pred_bin = (prob > threshold).astype(np.uint8)              # 0 or 1
+    # Resize mask back to original image size
+    mask_pil = Image.fromarray((pred_bin * 255).astype(np.uint8), mode="L")
+    mask_pil = mask_pil.resize(original_size, Image.NEAREST)
+    mask_arr = np.array(mask_pil)                               # 0 or 255
+    # ── Build overlay (original + coloured mask) ──────────────────────────────
+    img_arr  = np.array(image_rgb).astype(np.float32)           # (H, W, 3)
+    overlay  = img_arr.copy()
+    # Colour: teal for crack, orange for taping area
+    if "crack" in prompt.lower():
+        colour = np.array([0, 200, 220], dtype=np.float32)      # teal
+    else:
+        colour = np.array([255, 160, 50], dtype=np.float32)     # orange
+    fg = mask_arr > 0
+    overlay[fg] = overlay[fg] * 0.45 + colour * 0.55
+    overlay = np.clip(overlay, 0, 255).astype(np.uint8)
+    # Coverage stat
+    coverage = fg.sum() / fg.size * 100
+    info = (
+        f"Prompt      : \"{prompt}\"\n"
+        f"Threshold   : {threshold:.2f}\n"
+        f"Inference   : {inf_ms:.1f} ms\n"
+        f"Coverage    : {coverage:.2f} % of image\n"
+        f"Device      : {DEVICE}"
+    )
+    return Image.fromarray(overlay), mask_pil, info
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
+TITLE = "🧱 Drywall QA — Prompted Segmentation"
+DESCRIPTION = """
+Fine-tuned **CLIPSeg** for text-conditioned binary segmentation of drywall defects.
+Upload a drywall image, pick a prompt, and the model highlights the defective region.
+| Prompt | Target | Val mIoU | Val Dice |
+|---|---|---|---|
+| `segment crack` | Wall cracks | **0.735** | **0.834** |
+| `segment taping area` | Joint / tape seam | **0.499** | **0.626** |
+*Model: CIDAS/clipseg-rd64-refined fine-tuned for 20 epochs · Seed 42*
+"""
+ARTICLE = """
+### How it works
+CLIPSeg extends CLIP with a lightweight decoder that turns any text prompt into a segmentation mask.
+The model was fine-tuned end-to-end on two Roboflow drywall datasets using a combined BCE + Dice loss.
+**Datasets:** [Drywall-Join-Detect](https://universe.roboflow.com/objectdetect-pu6rn/drywall-join-detect) · [Cracks](https://universe.roboflow.com/fyp-ny1jt/cracks-3ii36)
+"""
+with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"# {TITLE}")
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        # ── Left column: inputs ───────────────────────────────────────────────
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                type    = "pil",
+                label   = "Upload Drywall Image",
+                height  = 320,
+            )
+            prompt_input = gr.Radio(
+                choices = PROMPT_CHOICES,
+                value   = PROMPT_CHOICES[0],
+                label   = "Segmentation Prompt",
+            )
+            threshold_slider = gr.Slider(
+                minimum = 0.1,
+                maximum = 0.9,
+                value   = THRESHOLD,
+                step    = 0.05,
+                label   = "Threshold  (lower → more detections, higher → stricter)",
+            )
+            run_btn = gr.Button("🔍 Run Segmentation", variant="primary")
+        # ── Right column: outputs ─────────────────────────────────────────────
+        with gr.Column(scale=1):
+            overlay_out = gr.Image(
+                type  = "pil",
+                label = "Overlay  (original + mask)",
+                height= 320,
+            )
+            mask_out = gr.Image(
+                type  = "pil",
+                label = "Binary Mask  (white = detected region)",
+                height= 160,
+            )
+            info_out = gr.Textbox(
+                label = "Run Info",
+                lines = 5,
+            )
+    run_btn.click(
+        fn      = predict,
+        inputs  = [image_input, prompt_input, threshold_slider],
+        outputs = [overlay_out, mask_out, info_out],
+    )
+    # Also run on image upload (convenience)
+    image_input.change(
+        fn      = predict,
+        inputs  = [image_input, prompt_input, threshold_slider],
+        outputs = [overlay_out, mask_out, info_out],
+    )
+    gr.Markdown(ARTICLE)
+    gr.Examples(
+        examples        = [],          # add example image paths here if you have them
+        inputs          = [image_input, prompt_input, threshold_slider],
+        outputs         = [overlay_out, mask_out, info_out],
+        fn              = predict,
+        cache_examples  = False,
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements .txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==4.44.0
+torch==2.3.1
+torchvision==0.18.1
+transformers==4.44.2
+Pillow==10.4.0
+numpy==1.26.4
+matplotlib==3.9.2