Spaces:

darthvader2603
/

Webcam_Iris_Segmentation

Sleeping

App Files Files Community

darthvader2603 commited on Jan 5

Commit

d0bfb7a

verified ·

1 Parent(s): 8ef0245

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -37

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import torchvision.transforms as T
 import gradio as gr
 import numpy as np
 import cv2
-from PIL import Image
 # ==========================================
 # 1. Model Architecture
@@ -34,7 +33,7 @@ class SimpleUNet(nn.Module):
         return x
 # ==========================================
-# 2. Load Model
 # ==========================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = SimpleUNet().to(device)
@@ -45,78 +44,126 @@ try:
 except FileNotFoundError:
     print("WARNING: 'iris_segmentation_model.pth' not found.")
 model.eval()
 # ==========================================
-# 3. Preprocessing & Logic
 # ==========================================
-transform = T.Compose([
-    T.Resize((224, 224)),
-    T.ToTensor(),
-    T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
-])
-def process_frame(frame):
     """
-    Standard processing function.
-    Used by both the Live Stream and the Snapshot button.
     """
     if frame is None:
-        return None
     original_h, original_w = frame.shape[:2]
-    # Enhance
     gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     enhanced_rgb = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2RGB)
-    pil_img = Image.fromarray(enhanced_rgb)
-    # Predict
-    input_tensor = transform(pil_img).unsqueeze(0).to(device)
     with torch.no_grad():
         pred = model(input_tensor)
-        pred_mask = pred.squeeze().cpu().numpy()
-    # Mask
     binary_mask = (pred_mask > 0.5).astype(np.uint8)
     binary_mask_resized = cv2.resize(binary_mask, (original_w, original_h), interpolation=cv2.INTER_NEAREST)
-    # Colorize
     color_mask_bgr = cv2.applyColorMap(binary_mask_resized * 255, cv2.COLORMAP_JET)
     color_mask_rgb = cv2.cvtColor(color_mask_bgr, cv2.COLOR_BGR2RGB)
-    # Blend
     blended = cv2.addWeighted(frame, 0.7, color_mask_rgb, 0.3, 0)
-    # Flip for mirror effect (optional)
-    return cv2.flip(blended, 1)
 # ==========================================
-# 4. Gradio Interface
 # ==========================================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 👁️ Live Iris Segmentation + Snapshot")
-    # --- Live Stream Section ---
     with gr.Row():
-        input_stream = gr.Image(sources=["webcam"], streaming=True, label="Live Webcam", mirror_webcam=True)
         output_stream = gr.Image(label="Live Segmentation", interactive=False)
-    # --- Capture Controls ---
-    # This button allows you to "Stop and See" the current result
-    btn_snapshot = gr.Button("📸 Freeze Current Frame", variant="primary")
-    # --- Static Result Section ---
-    # This shows the single frozen frame
-    static_output = gr.Image(label="Frozen Snapshot (Inspection View)")
-    # 1. Start the Live Stream
-    input_stream.stream(fn=process_frame, inputs=input_stream, outputs=output_stream)
-    # 2. Button Logic: Take current stream frame -> Process -> Show in Static Box
-    btn_snapshot.click(fn=process_frame, inputs=input_stream, outputs=static_output)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import numpy as np
 import cv2
 # ==========================================
 # 1. Model Architecture
         return x
 # ==========================================
+# 2. Load Model & Optimize (FP16)
 # ==========================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = SimpleUNet().to(device)
 except FileNotFoundError:
     print("WARNING: 'iris_segmentation_model.pth' not found.")
+# OPTIMIZATION: Convert to Half Precision (FP16) for speed
+if device.type == 'cuda':
+    model.half()
 model.eval()
 # ==========================================
+# 3. High-Speed Processing Logic
 # ==========================================
+# Pre-calculate normalization constants for speed
+mean = torch.tensor([0.5, 0.5, 0.5], device=device).view(1, 3, 1, 1)
+std = torch.tensor([0.5, 0.5, 0.5], device=device).view(1, 3, 1, 1)
+if device.type == 'cuda':
+    mean = mean.half()
+    std = std.half()
+def process_frame_fast(frame):
     """
+    Optimized pipeline:
+    1. Direct Numpy -> Tensor (No PIL)
+    2. GPU Resize
+    3. FP16 Inference
     """
     if frame is None:
+        return None, None
     original_h, original_w = frame.shape[:2]
+    # 1. Preprocessing (OpenCV is faster than PIL for basic ops)
+    # CLAHE (CPU side is usually fine, but moving to GPU tensor first is an option if CPU is bottleneck)
+    # Keeping CLAHE on CPU for stability with OpenCV
     gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     enhanced_rgb = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2RGB)
+    # 2. To Tensor (Directly to GPU)
+    input_tensor = torch.from_numpy(enhanced_rgb).permute(2, 0, 1).to(device)
+    # 3. Normalize & Resize on GPU (Faster than CPU resize)
+    # We resize to 224x224
+    input_tensor = T.functional.resize(input_tensor, [224, 224], antialias=True)
+    # Convert to float (or half) and normalize
+    if device.type == 'cuda':
+        input_tensor = input_tensor.half()
+    else:
+        input_tensor = input_tensor.float()
+    input_tensor = input_tensor.div(255.0).unsqueeze(0)
+    input_tensor = (input_tensor - mean) / std
+    # 4. Inference
     with torch.no_grad():
         pred = model(input_tensor)
+        # Squeeze and bring mask back to CPU as float32 for OpenCV
+        pred_mask = pred.squeeze().float().cpu().numpy()
+    # 5. Post-Processing (Mask & Blend)
     binary_mask = (pred_mask > 0.5).astype(np.uint8)
     binary_mask_resized = cv2.resize(binary_mask, (original_w, original_h), interpolation=cv2.INTER_NEAREST)
     color_mask_bgr = cv2.applyColorMap(binary_mask_resized * 255, cv2.COLORMAP_JET)
     color_mask_rgb = cv2.cvtColor(color_mask_bgr, cv2.COLOR_BGR2RGB)
     blended = cv2.addWeighted(frame, 0.7, color_mask_rgb, 0.3, 0)
+    # Return twice: one for display, one for 'latest_frame' state
+    return blended, blended
 # ==========================================
+# 4. Gradio Interface (Generator Pattern)
 # ==========================================
+def capture_logic(image):
+    return image
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## ⚡ Fast Iris Segmentation")
+    # State to hold the latest image for capturing
+    latest_frame_state = gr.State()
     with gr.Row():
+        # Input: Webcam (mirroring enabled)
+        input_stream = gr.Image(sources=["webcam"], streaming=True, label="Webcam", mirror_webcam=True)
+        # Output: Live Result
         output_stream = gr.Image(label="Live Segmentation", interactive=False)
+        # Output: Snapshot
+        snapshot_output = gr.Image(label="Snapshot")
+    with gr.Row():
+        # Three minimal buttons
+        btn_start = gr.Button("▶️ Start / Restart", variant="primary")
+        btn_stop = gr.Button("⏹️ Stop", variant="stop")
+        btn_capture = gr.Button("📸 Capture", variant="secondary")
+    # --- Event Logic ---
+    # 1. START: distinct event that triggers the stream
+    # using input_stream.stream allows Gradio to handle the webcam loop efficiently
+    stream_event = input_stream.stream(
+        fn=process_frame_fast,
+        inputs=input_stream,
+        outputs=[output_stream, latest_frame_state],
+        show_progress=False
+    )
+    # 2. RESTART: Clicking start simply re-triggers the stream event
+    btn_start.click(
+        fn=process_frame_fast,
+        inputs=input_stream,
+        outputs=[output_stream, latest_frame_state],
+        show_progress=False
+    )
+    # 3. STOP: Cancels the stream event. This kills the process cleanly.
+    btn_stop.click(fn=None, inputs=None, outputs=None, cancels=[stream_event])
+    # 4. CAPTURE: Grabs the last frame from State
+    btn_capture.click(fn=capture_logic, inputs=latest_frame_state, outputs=snapshot_output)
 if __name__ == "__main__":
     demo.launch()