Spaces:

S-4-G-4-R
/

Drywall_image_segmentation

Sleeping

App Files Files Community

S-4-G-4-R commited on 5 days ago

Commit

2e0b11e

verified ·

1 Parent(s): fe08322

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -92

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """
 app.py — Gradio demo for Prompted Segmentation for Drywall QA
-Model  : CLIPSeg (CIDAS/clipseg-rd64-refined), fine-tuned on drywall datasets
-Weights: best_model.pt  (upload this file to your HuggingFace Space)
 """
 import os
@@ -13,50 +12,32 @@ from PIL import Image
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
 # ── Config ────────────────────────────────────────────────────────────────────
-MODEL_NAME  = "CIDAS/clipseg-rd64-refined"
-CKPT_PATH   = "best_model.pt"          # must be in the Space root directory
-IMG_SIZE    = 352
-THRESHOLD   = 0.5
-DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Supported prompts (trained)
 PROMPT_CHOICES = [
     "segment crack",
     "segment taping area",
 ]
 # ── Load model (once at startup) ──────────────────────────────────────────────
-print(f"Loading CLIPSeg processor from {MODEL_NAME} ...")
-processor = CLIPSegProcessor.from_pretrained(MODEL_NAME)
-print(f"Loading CLIPSeg model from {MODEL_NAME} ...")
-model = CLIPSegForImageSegmentation.from_pretrained(MODEL_NAME)
-if os.path.exists(CKPT_PATH):
-    print(f"Loading fine-tuned weights from {CKPT_PATH} ...")
-    state_dict = torch.load(CKPT_PATH, map_location=DEVICE)
-    model.load_state_dict(state_dict)
-    print("Fine-tuned weights loaded successfully.")
-else:
-    print(f"WARNING: {CKPT_PATH} not found — running with base CLIPSeg weights.")
 model = model.to(DEVICE)
 model.eval()
 print(f"Model ready on {DEVICE}.")
-# ── Inference function ────────────────────────────────────────────────────────
 def predict(image: Image.Image, prompt: str, threshold: float) -> tuple:
-    """
-    Runs CLIPSeg inference and returns:
-      - overlay  : original image blended with coloured mask
-      - mask_img : pure binary mask (grayscale)
-      - info_str : prompt used + inference time
-    """
     if image is None:
         return None, None, "Please upload an image."
-    original_size = image.size          # (W, H) — to resize mask back
     image_rgb     = image.convert("RGB")
     # Preprocess
@@ -81,32 +62,23 @@ def predict(image: Image.Image, prompt: str, threshold: float) -> tuple:
         )
     inf_ms = (time.time() - t0) * 1000
-    # Post-process logits → binary mask
-    prob     = torch.sigmoid(outputs.logits[0]).cpu().numpy()   # (H, W) at 352×352
-    pred_bin = (prob > threshold).astype(np.uint8)              # 0 or 1
-    # Resize mask back to original image size
     mask_pil = Image.fromarray((pred_bin * 255).astype(np.uint8), mode="L")
     mask_pil = mask_pil.resize(original_size, Image.NEAREST)
-    mask_arr = np.array(mask_pil)                               # 0 or 255
-    # ── Build overlay (original + coloured mask) ──────────────────────────────
-    img_arr  = np.array(image_rgb).astype(np.float32)           # (H, W, 3)
-    overlay  = img_arr.copy()
-    # Colour: teal for crack, orange for taping area
-    if "crack" in prompt.lower():
-        colour = np.array([0, 200, 220], dtype=np.float32)      # teal
-    else:
-        colour = np.array([255, 160, 50], dtype=np.float32)     # orange
     fg = mask_arr > 0
     overlay[fg] = overlay[fg] * 0.45 + colour * 0.55
     overlay = np.clip(overlay, 0, 255).astype(np.uint8)
-    # Coverage stat
     coverage = fg.sum() / fg.size * 100
     info = (
         f"Prompt      : \"{prompt}\"\n"
         f"Threshold   : {threshold:.2f}\n"
@@ -118,12 +90,11 @@ def predict(image: Image.Image, prompt: str, threshold: float) -> tuple:
     return Image.fromarray(overlay), mask_pil, info
-# ── Gradio UI ─────────────────────────────────────────────────────────────────
 TITLE = "🧱 Drywall QA — Prompted Segmentation"
 DESCRIPTION = """
 Fine-tuned **CLIPSeg** for text-conditioned binary segmentation of drywall defects.
 Upload a drywall image, pick a prompt, and the model highlights the defective region.
 | Prompt | Target | Val mIoU | Val Dice |
@@ -131,13 +102,13 @@ Upload a drywall image, pick a prompt, and the model highlights the defective re
 | `segment crack` | Wall cracks | **0.735** | **0.834** |
 | `segment taping area` | Joint / tape seam | **0.499** | **0.626** |
-*Model: CIDAS/clipseg-rd64-refined fine-tuned for 20 epochs · Seed 42*
 """
 ARTICLE = """
 ### How it works
 CLIPSeg extends CLIP with a lightweight decoder that turns any text prompt into a segmentation mask.
-The model was fine-tuned end-to-end on two Roboflow drywall datasets using a combined BCE + Dice loss.
 **Datasets:** [Drywall-Join-Detect](https://universe.roboflow.com/objectdetect-pu6rn/drywall-join-detect) · [Cracks](https://universe.roboflow.com/fyp-ny1jt/cracks-3ii36)
 """
@@ -148,52 +119,29 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Row():
-        # ── Left column: inputs ───────────────────────────────────────────────
         with gr.Column(scale=1):
-            image_input = gr.Image(
-                type    = "pil",
-                label   = "Upload Drywall Image",
-                height  = 320,
-            )
             prompt_input = gr.Radio(
-                choices = PROMPT_CHOICES,
-                value   = PROMPT_CHOICES[0],
-                label   = "Segmentation Prompt",
             )
             threshold_slider = gr.Slider(
-                minimum = 0.1,
-                maximum = 0.9,
-                value   = THRESHOLD,
-                step    = 0.05,
-                label   = "Threshold  (lower → more detections, higher → stricter)",
             )
             run_btn = gr.Button("🔍 Run Segmentation", variant="primary")
-        # ── Right column: outputs ─────────────────────────────────────────────
         with gr.Column(scale=1):
-            overlay_out = gr.Image(
-                type  = "pil",
-                label = "Overlay  (original + mask)",
-                height= 320,
-            )
-            mask_out = gr.Image(
-                type  = "pil",
-                label = "Binary Mask  (white = detected region)",
-                height= 160,
-            )
-            info_out = gr.Textbox(
-                label = "Run Info",
-                lines = 5,
-            )
     run_btn.click(
         fn      = predict,
         inputs  = [image_input, prompt_input, threshold_slider],
         outputs = [overlay_out, mask_out, info_out],
     )
-    # Also run on image upload (convenience)
     image_input.change(
         fn      = predict,
         inputs  = [image_input, prompt_input, threshold_slider],
@@ -202,14 +150,6 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
     gr.Markdown(ARTICLE)
-    gr.Examples(
-        examples        = [],          # add example image paths here if you have them
-        inputs          = [image_input, prompt_input, threshold_slider],
-        outputs         = [overlay_out, mask_out, info_out],
-        fn              = predict,
-        cache_examples  = False,
-    )
 if __name__ == "__main__":
-    demo.launch()

 """
 app.py — Gradio demo for Prompted Segmentation for Drywall QA
+Model  : S-4-G-4-R/clipseg-drywall-qa (loaded directly from HuggingFace)
 """
 import os
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
 # ── Config ────────────────────────────────────────────────────────────────────
+REPO_ID    = "S-4-G-4-R/clipseg-drywall-qa"
+THRESHOLD  = 0.5
+DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PROMPT_CHOICES = [
     "segment crack",
     "segment taping area",
 ]
 # ── Load model (once at startup) ──────────────────────────────────────────────
+print(f"Loading processor from {REPO_ID} ...")
+processor = CLIPSegProcessor.from_pretrained(REPO_ID)
+print(f"Loading model from {REPO_ID} ...")
+model = CLIPSegForImageSegmentation.from_pretrained(REPO_ID)
 model = model.to(DEVICE)
 model.eval()
 print(f"Model ready on {DEVICE}.")
+# ── Inference ─────────────────────────────────────────────────────────────────
 def predict(image: Image.Image, prompt: str, threshold: float) -> tuple:
     if image is None:
         return None, None, "Please upload an image."
+    original_size = image.size
     image_rgb     = image.convert("RGB")
     # Preprocess
         )
     inf_ms = (time.time() - t0) * 1000
+    # Logits → binary mask → resize back to original
+    prob     = torch.sigmoid(outputs.logits[0]).cpu().numpy()
+    pred_bin = (prob > threshold).astype(np.uint8)
     mask_pil = Image.fromarray((pred_bin * 255).astype(np.uint8), mode="L")
     mask_pil = mask_pil.resize(original_size, Image.NEAREST)
+    mask_arr = np.array(mask_pil)
+    # Overlay — teal for crack, orange for taping area
+    img_arr = np.array(image_rgb).astype(np.float32)
+    overlay = img_arr.copy()
+    colour  = np.array([0, 200, 220], dtype=np.float32) if "crack" in prompt.lower() \
+              else np.array([255, 160, 50], dtype=np.float32)
     fg = mask_arr > 0
     overlay[fg] = overlay[fg] * 0.45 + colour * 0.55
     overlay = np.clip(overlay, 0, 255).astype(np.uint8)
     coverage = fg.sum() / fg.size * 100
     info = (
         f"Prompt      : \"{prompt}\"\n"
         f"Threshold   : {threshold:.2f}\n"
     return Image.fromarray(overlay), mask_pil, info
+# ── UI ────────────────────────────────────────────────────────────────────────
 TITLE = "🧱 Drywall QA — Prompted Segmentation"
 DESCRIPTION = """
 Fine-tuned **CLIPSeg** for text-conditioned binary segmentation of drywall defects.
 Upload a drywall image, pick a prompt, and the model highlights the defective region.
 | Prompt | Target | Val mIoU | Val Dice |
 | `segment crack` | Wall cracks | **0.735** | **0.834** |
 | `segment taping area` | Joint / tape seam | **0.499** | **0.626** |
+*Model: [S-4-G-4-R/clipseg-drywall-qa](https://huggingface.co/S-4-G-4-R/clipseg-drywall-qa) · Fine-tuned 20 epochs · Seed 42*
 """
 ARTICLE = """
 ### How it works
 CLIPSeg extends CLIP with a lightweight decoder that turns any text prompt into a segmentation mask.
+Fine-tuned end-to-end on two Roboflow drywall datasets using combined BCE + Dice loss.
 **Datasets:** [Drywall-Join-Detect](https://universe.roboflow.com/objectdetect-pu6rn/drywall-join-detect) · [Cracks](https://universe.roboflow.com/fyp-ny1jt/cracks-3ii36)
 """
     gr.Markdown(DESCRIPTION)
     with gr.Row():
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Upload Drywall Image", height=320)
             prompt_input = gr.Radio(
+                choices=PROMPT_CHOICES,
+                value=PROMPT_CHOICES[0],
+                label="Segmentation Prompt",
             )
             threshold_slider = gr.Slider(
+                minimum=0.1, maximum=0.9, value=THRESHOLD, step=0.05,
+                label="Threshold  (lower → more detections, higher → stricter)",
             )
             run_btn = gr.Button("🔍 Run Segmentation", variant="primary")
         with gr.Column(scale=1):
+            overlay_out = gr.Image(type="pil", label="Overlay  (original + mask)", height=320)
+            mask_out    = gr.Image(type="pil", label="Binary Mask  (white = detected region)", height=160)
+            info_out    = gr.Textbox(label="Run Info", lines=5)
     run_btn.click(
         fn      = predict,
         inputs  = [image_input, prompt_input, threshold_slider],
         outputs = [overlay_out, mask_out, info_out],
     )
     image_input.change(
         fn      = predict,
         inputs  = [image_input, prompt_input, threshold_slider],
     gr.Markdown(ARTICLE)
 if __name__ == "__main__":
+    demo.launch()