Spaces:

JLtan1024
/

ScrewDetection

Runtime error

App Files Files Community

JLtan1024 commited on Apr 24, 2025

Commit

8dfa70e

verified ·

1 Parent(s): 03082e3

gradio-webrtc

Browse files

Files changed (1) hide show

app.py +127 -125

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from collections import Counter
@@ -6,6 +7,8 @@ import time
 import tempfile
 from ultralytics import YOLO
 import cv2
 # Constants
 COIN_CLASS_ID = 11  # 10sen coin
@@ -105,99 +108,133 @@ def non_max_suppression(detections, iou_threshold):
     return [detections[i] for i in keep_indices]
-def process_frame(frame, iou_threshold, confidence_threshold, show_detections, px_to_mm_ratio=None):
-    """Process a single frame and return annotated image and detection data"""
-    results = model(frame, conf=confidence_threshold)
-    if not results:
-        return frame, [], px_to_mm_ratio
-    result = results[0]
-    filtered_detections = non_max_suppression(result.obb, iou_threshold)
-    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-    draw = ImageDraw.Draw(pil_image)
-    try:
-        font = ImageFont.truetype("arial.ttf", LABEL_FONT_SIZE)
-    except:
-        font = ImageFont.load_default()
-        if hasattr(font, 'size'):
-            font.size = LABEL_FONT_SIZE
-    detected_objects = []
-    current_px_to_mm_ratio = px_to_mm_ratio
-    # Find coin for scaling
-    if current_px_to_mm_ratio is None:
         for detection in filtered_detections:
-            if len(detection.cls) > 0 and int(detection.cls[0]) == COIN_CLASS_ID and len(detection.xywhr) > 0:
-                coin_xywhr = detection.xywhr[0]
-                width_px = coin_xywhr[2]
-                height_px = coin_xywhr[3]
-                avg_px_diameter = (width_px + height_px) / 2
-                if avg_px_diameter > 0:
-                    current_px_to_mm_ratio = COIN_DIAMETER_MM / avg_px_diameter
-                break
-    # Draw detections
-    for detection in filtered_detections:
-        if len(detection.cls) > 0 and len(detection.xywhr) > 0 and len(detection.xyxy) > 0:
-            class_id = int(detection.cls[0])
-            confidence = detection.conf[0]
-            x1, y1, x2, y2 = map(int, detection.xyxy[0])
-            class_name = CLASS_NAMES.get(class_id, f"Class {int(class_id)}")
-            color = CATEGORY_COLORS.get(class_name, (0, 255, 0))
-            label_text = f"{class_name}"
-            if class_id != COIN_CLASS_ID:
-                detected_objects.append(class_name)
-            if class_id == COIN_CLASS_ID and current_px_to_mm_ratio:
-                diameter_px = (x2 - x1 + y2 - y1) / 2
-                diameter_mm = diameter_px * current_px_to_mm_ratio
-                label_text += f", Dia: {diameter_mm:.2f}mm"
-            elif class_id != COIN_CLASS_ID and current_px_to_mm_ratio:
-                xywhr = detection.xywhr[0]
-                width_px = xywhr[2]
-                height_px = xywhr[3]
-                length_px = max(width_px, height_px)
-                length_mm = length_px * current_px_to_mm_ratio
-                label_text += f", Length: {length_mm:.2f}mm"
-            elif class_id != COIN_CLASS_ID:
-                label_text += ", Length: N/A (No Coin)"
-            elif class_id == COIN_CLASS_ID:
-                label_text += ", Dia: N/A (No Ratio)"
-            if show_detections:
-                draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=BORDER_WIDTH)
-                text_width, text_height = get_text_size(draw, label_text, font)
-                draw.rectangle([(x1, y1 - text_height - 5), (x1 + text_width + 5, y1)], fill=color)
-                draw.text((x1 + 2, y1 - text_height - 3), label_text, fill=(255, 255, 255), font=font)
-    return np.array(pil_image), detected_objects, current_px_to_mm_ratio
 def process_image(input_image, iou_threshold, confidence_threshold, show_detections, show_summary):
     frame = np.array(input_image)
-    processed_frame, detected_objects, _ = process_frame(frame, iou_threshold, confidence_threshold, show_detections)
     output_image = Image.fromarray(processed_frame)
-    summary = ""
-    if show_summary and detected_objects:
-        screw_counts = Counter(detected_objects)
-        summary = "Detection Summary:\n"
-        for name, count in screw_counts.items():
-            summary += f"- {name}: {count}\n"
-    elif show_summary:
-        summary = "No screws or nuts detected."
     return output_image, summary
 def process_video(video_path, iou_threshold, confidence_threshold, show_detections, show_summary):
     cap = cv2.VideoCapture(video_path)
-    px_to_mm_ratio = None
-    all_detected_objects = []
     frames = []
     while cap.isOpened():
@@ -205,60 +242,15 @@ def process_video(video_path, iou_threshold, confidence_threshold, show_detectio
         if not ret:
             break
-        processed_frame, detected_objects, px_to_mm_ratio = process_frame(
-            frame, iou_threshold, confidence_threshold, show_detections, px_to_mm_ratio
-        )
-        if detected_objects:
-            all_detected_objects.extend(detected_objects)
         frames.append(processed_frame)
     cap.release()
-    summary = ""
-    if show_summary and all_detected_objects:
-        screw_counts = Counter(all_detected_objects)
-        summary = "Detection Summary:\n"
-        for name, count in screw_counts.items():
-            summary += f"- {name}: {count}\n"
-    elif show_summary:
-        summary = "No screws or nuts detected."
     return frames, summary
-def webcam_capture(iou_threshold, confidence_threshold, show_detections, show_summary):
-    cap = cv2.VideoCapture(0)
-    px_to_mm_ratio = None
-    all_detected_objects = []
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        processed_frame, detected_objects, px_to_mm_ratio = process_frame(
-            frame, iou_threshold, confidence_threshold, show_detections, px_to_mm_ratio
-        )
-        if detected_objects:
-            all_detected_objects.extend(detected_objects)
-        yield cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
-    cap.release()
-    summary = ""
-    if show_summary and all_detected_objects:
-        screw_counts = Counter(all_detected_objects)
-        summary = "Detection Summary:\n"
-        for name, count in screw_counts.items():
-            summary += f"- {name}: {count}\n"
-    elif show_summary:
-        summary = "No screws or nuts detected."
-    yield None, summary
 # Gradio Interface
 with gr.Blocks(title="Screw Detection and Measurement") as demo:
     gr.Markdown("# 🔍 Screw Detection and Measurement (YOLOv11 OBB)")
@@ -308,15 +300,25 @@ with gr.Blocks(title="Screw Detection and Measurement") as demo:
                 webcam_conf = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
                 webcam_show_det = gr.Checkbox(label="Show Detections", value=True)
                 webcam_show_sum = gr.Checkbox(label="Show Summary", value=True)
-                webcam_button = gr.Button("Start Webcam")
             with gr.Column():
-                webcam_output = gr.Image(label="Live Detection", streaming=True)
                 webcam_summary = gr.Textbox(label="Summary", interactive=False)
-        webcam_button.click(
-            webcam_capture,
-            inputs=[webcam_iou, webcam_conf, webcam_show_det, webcam_show_sum],
-            outputs=[webcam_output, webcam_summary]
         )
 demo.launch()

 import gradio as gr
+from gradio_webrtc import WebRTC
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from collections import Counter
 import tempfile
 from ultralytics import YOLO
 import cv2
+import av
+import threading
 # Constants
 COIN_CLASS_ID = 11  # 10sen coin
     return [detections[i] for i in keep_indices]
+class VideoProcessor:
+    def __init__(self):
+        self.px_to_mm_ratio = None
+        self.detected_objects = []
+        self.lock = threading.Lock()
+        self.show_detections = True
+        self.show_summary = True
+        self.iou_threshold = 0.7
+        self.confidence_threshold = 0.5
+    def update_settings(self, iou_threshold, confidence_threshold, show_detections, show_summary):
+        with self.lock:
+            self.iou_threshold = iou_threshold
+            self.confidence_threshold = confidence_threshold
+            self.show_detections = show_detections
+            self.show_summary = show_summary
+    def get_summary(self):
+        with self.lock:
+            if not self.show_summary or not self.detected_objects:
+                return "No screws or nuts detected yet."
+            screw_counts = Counter(self.detected_objects)
+            summary_text = "Detection Summary:\n"
+            for name, count in screw_counts.items():
+                summary_text += f"- {name}: {count}\n"
+            return summary_text
+    def process_frame(self, frame):
+        frame = frame.to_ndarray(format="bgr24")
+        results = model(frame, conf=self.confidence_threshold)
+        if not results:
+            return frame, []
+        result = results[0]
+        filtered_detections = non_max_suppression(result.obb, self.iou_threshold)
+        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        draw = ImageDraw.Draw(pil_image)
+        try:
+            font = ImageFont.truetype("arial.ttf", LABEL_FONT_SIZE)
+        except:
+            font = ImageFont.load_default()
+            if hasattr(font, 'size'):
+                font.size = LABEL_FONT_SIZE
+        frame_detected_objects = []
+        # Find coin for scaling
+        if self.px_to_mm_ratio is None:
+            for detection in filtered_detections:
+                if len(detection.cls) > 0 and int(detection.cls[0]) == COIN_CLASS_ID and len(detection.xywhr) > 0:
+                    coin_xywhr = detection.xywhr[0]
+                    width_px = coin_xywhr[2]
+                    height_px = coin_xywhr[3]
+                    avg_px_diameter = (width_px + height_px) / 2
+                    if avg_px_diameter > 0:
+                        self.px_to_mm_ratio = COIN_DIAMETER_MM / avg_px_diameter
+                    break
+        # Draw detections
         for detection in filtered_detections:
+            if len(detection.cls) > 0 and len(detection.xywhr) > 0 and len(detection.xyxy) > 0:
+                class_id = int(detection.cls[0])
+                confidence = detection.conf[0]
+                x1, y1, x2, y2 = map(int, detection.xyxy[0])
+                class_name = CLASS_NAMES.get(class_id, f"Class {int(class_id)}")
+                color = CATEGORY_COLORS.get(class_name, (0, 255, 0))
+                label_text = f"{class_name}"
+                if class_id != COIN_CLASS_ID:
+                    frame_detected_objects.append(class_name)
+                if class_id == COIN_CLASS_ID and self.px_to_mm_ratio:
+                    diameter_px = (x2 - x1 + y2 - y1) / 2
+                    diameter_mm = diameter_px * self.px_to_mm_ratio
+                    label_text += f", Dia: {diameter_mm:.2f}mm"
+                elif class_id != COIN_CLASS_ID and self.px_to_mm_ratio:
+                    xywhr = detection.xywhr[0]
+                    width_px = xywhr[2]
+                    height_px = xywhr[3]
+                    length_px = max(width_px, height_px)
+                    length_mm = length_px * self.px_to_mm_ratio
+                    label_text += f", Length: {length_mm:.2f}mm"
+                elif class_id != COIN_CLASS_ID:
+                    label_text += ", Length: N/A (No Coin)"
+                elif class_id == COIN_CLASS_ID:
+                    label_text += ", Dia: N/A (No Ratio)"
+                if self.show_detections:
+                    draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=BORDER_WIDTH)
+                    text_width, text_height = get_text_size(draw, label_text, font)
+                    draw.rectangle([(x1, y1 - text_height - 5), (x1 + text_width + 5, y1)], fill=color)
+                    draw.text((x1 + 2, y1 - text_height - 3), label_text, fill=(255, 255, 255), font=font)
+        with self.lock:
+            self.detected_objects.extend(frame_detected_objects)
+        return np.array(pil_image)
+    def recv(self, frame):
+        processed_frame = self.process_frame(frame)
+        return av.VideoFrame.from_ndarray(processed_frame, format="bgr24")
 def process_image(input_image, iou_threshold, confidence_threshold, show_detections, show_summary):
     frame = np.array(input_image)
+    # Create a temporary processor for image processing
+    processor = VideoProcessor()
+    processor.update_settings(iou_threshold, confidence_threshold, show_detections, show_summary)
+    processed_frame = processor.process_frame(av.VideoFrame.from_ndarray(frame, format="bgr24"))
     output_image = Image.fromarray(processed_frame)
+    summary = processor.get_summary()
     return output_image, summary
 def process_video(video_path, iou_threshold, confidence_threshold, show_detections, show_summary):
     cap = cv2.VideoCapture(video_path)
+    # Create a processor for video processing
+    processor = VideoProcessor()
+    processor.update_settings(iou_threshold, confidence_threshold, show_detections, show_summary)
     frames = []
     while cap.isOpened():
         if not ret:
             break
+        processed_frame = processor.process_frame(av.VideoFrame.from_ndarray(frame, format="bgr24"))
         frames.append(processed_frame)
     cap.release()
+    summary = processor.get_summary()
     return frames, summary
 # Gradio Interface
 with gr.Blocks(title="Screw Detection and Measurement") as demo:
     gr.Markdown("# 🔍 Screw Detection and Measurement (YOLOv11 OBB)")
                 webcam_conf = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05)
                 webcam_show_det = gr.Checkbox(label="Show Detections", value=True)
                 webcam_show_sum = gr.Checkbox(label="Show Summary", value=True)
+                settings_button = gr.Button("Update Settings")
             with gr.Column():
+                webrtc_ctx = WebRTC(
+                    mode="sendonly",
+                    audio=False,
+                    video_processor_factory=VideoProcessor,
+                    key="webcam-detection"
+                )
                 webcam_summary = gr.Textbox(label="Summary", interactive=False)
+                refresh_button = gr.Button("Refresh Summary")
+        settings_button.click(
+            fn=lambda iou, conf, det, summ: webrtc_ctx.video_processor.update_settings(iou, conf, det, summ),
+            inputs=[webcam_iou, webcam_conf, webcam_show_det, webcam_show_sum]
+        )
+        refresh_button.click(
+            fn=lambda: webrtc_ctx.video_processor.get_summary(),
+            outputs=webcam_summary
         )
 demo.launch()