File size: 6,497 Bytes
006319c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90582e1
 
 
006319c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90582e1
 
006319c
 
 
90582e1
006319c
 
 
 
 
 
 
 
 
 
 
 
 
90582e1
 
006319c
 
 
 
90582e1
 
 
006319c
 
90582e1
006319c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90582e1
 
 
006319c
 
 
 
 
 
 
 
 
 
 
d98304c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import gradio as gr
import cv2
import numpy as np
import torch
from ultralytics import YOLO
import time
from typing import Tuple, Dict, List, Optional, Union

# Initialize default confidence threshold
DEFAULT_CONF_THRESHOLD = 0.25

# Load YOLO model (using YOLOv8 as it's optimized for CPU)
model = YOLO('yolov8n.pt')  # using the nano version for faster CPU inference

# Set to CPU explicitly
model.to('cpu')

def detect_objects(image, conf_threshold: float = DEFAULT_CONF_THRESHOLD):
    if image is None:
        return None, "No image provided"
    
    # Convert image from RGB (Gradio) to BGR (OpenCV)
    image_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # Start timer for FPS calculation
    start_time = time.time()
    
    # Run inference with user-defined confidence threshold
    results = model(image_cv, conf=conf_threshold)
    
    # Calculate FPS
    fps = 1.0 / (time.time() - start_time)
    
    # Process results
    if len(results) == 0:
        return image, "No detections found"
    
    result = results[0]
    output_image = image.copy()
    
    # Get detections
    boxes = result.boxes.cpu().numpy()
    
    detection_count = {}
    
    # Draw bounding boxes and labels
    for box in boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        cls_name = result.names[cls_id]
        
        # Update detection count
        detection_count[cls_name] = detection_count.get(cls_name, 0) + 1
        
        # Generate a color based on class ID (for consistent colors per class)
        color = (int(hash(cls_name) % 256), int(hash(cls_name + "salt") % 256), int(hash(cls_name + "pepper") % 256))
        
        # Draw bounding box
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
        
        # Draw label background
        text = f"{cls_name}: {conf:.2f}"
        text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        cv2.rectangle(output_image, (x1, y1 - text_size[1] - 5), (x1 + text_size[0], y1), color, -1)
        
        # Draw text
        cv2.putText(output_image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # Create detection summary
    summary = f"FPS: {fps:.1f}\n"
    if detection_count:
        summary += "Detected objects:\n"
        for cls_name, count in sorted(detection_count.items()):
            summary += f"- {cls_name}: {count}\n"
    else:
        summary += "No objects detected"
    
    return output_image, summary

def process_webcam(image, conf_threshold: float = DEFAULT_CONF_THRESHOLD):
    """Process webcam frames for real-time detection"""
    return detect_objects(image, conf_threshold)

def process_uploaded_image(image, conf_threshold: float = DEFAULT_CONF_THRESHOLD):
    """Process uploaded image for detection"""
    return detect_objects(image, conf_threshold)

# Create custom HTML header for the app
custom_css = """
<style>
.header-container {
    background: linear-gradient(90deg, #2C3E50, #4CA1AF);
    color: white;
    padding: 1.5rem;
    border-radius: 8px;
    margin-bottom: 1.5rem;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    text-align: center;
}
.header-title {
    font-size: 2.5rem;
    font-weight: bold;
    margin-bottom: 0.5rem;
}
.header-subtitle {
    font-size: 1.2rem;
    opacity: 0.9;
}
</style>
"""

custom_header = f"""
{custom_css}
<div class="header-container">
    <div class="header-title">MyStuff Object Detection Demo</div>
    <div class="header-subtitle">Powered by YOLOv8 and Gradio</div>
</div>
"""

# Create Gradio interface - using Gradio 3.x compatible syntax
with gr.Blocks() as demo:
    gr.HTML(custom_header)
    
    # Global confidence threshold slider
    with gr.Row():
        conf_threshold = gr.Slider(
            minimum=0.1, 
            maximum=0.9, 
            value=DEFAULT_CONF_THRESHOLD, 
            step=0.05, 
            label="Confidence Threshold", 
            info="Adjust to control detection sensitivity (lower = more detections)"
        )
    
    with gr.Tabs():
        with gr.TabItem("Webcam Detection"):
            with gr.Row():
                with gr.Column():
                    webcam_input = gr.Image(label="Webcam Input")
                    webcam_button = gr.Button("Enable Webcam", variant="primary")
                with gr.Column():
                    webcam_output = gr.Image(label="Detection Output")
                    webcam_summary = gr.Textbox(label="Detection Summary")
            
            # Process button for webcam
            process_webcam_button = gr.Button("Process Webcam Image")
            process_webcam_button.click(
                process_webcam,
                inputs=[webcam_input, conf_threshold],
                outputs=[webcam_output, webcam_summary]
            )
        
        with gr.TabItem("Image Upload Detection"):
            with gr.Row():
                with gr.Column():
                    img_input = gr.Image(label="Upload Image")
                    img_button = gr.Button("Detect Objects", variant="primary")
                with gr.Column():
                    img_output = gr.Image(label="Detection Output")
                    img_summary = gr.Textbox(label="Detection Summary")
            
            img_button.click(
                process_uploaded_image,
                inputs=[img_input, conf_threshold],
                outputs=[img_output, img_summary]
            )
    
    with gr.Accordion("Usage Instructions & Notes", open=True):
        gr.Markdown("""
        ### Usage Instructions
        - **Adjust Confidence Threshold**: Use the slider to control detection sensitivity
          - **Higher values** (e.g., 0.7-0.9): Only very confident detections, fewer objects detected
          - **Lower values** (e.g., 0.1-0.3): More objects detected but may include false positives
        - **Webcam Tab:** 
          1. Click "Enable Webcam" to activate your camera
          2. Click "Process Webcam Image" to detect objects in the current frame
        - **Image Upload Tab:** Upload an image and click "Detect Objects"
        
        ### Technical Notes
        - This demo uses YOLOv8 nano model optimized for CPU usage
        - Performance will vary based on your CPU speed
        - Detection runs locally in your browser
        - The FPS counter shows real-time performance metrics
        """)

# Launch the app
if __name__ == "__main__":
    demo.launch(share=True)