File size: 5,667 Bytes
16dba1f
 
 
 
 
 
e2b4ce0
 
ee1f310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ef3f8
e2b4ce0
 
 
 
 
 
 
 
 
 
7d8873a
e2b4ce0
ee1f310
 
 
 
 
 
 
e2b4ce0
 
 
 
 
 
 
 
 
7d8873a
e2b4ce0
16dba1f
8e309ca
ee1f310
8e309ca
ee1f310
 
 
 
 
 
8e309ca
 
 
16dba1f
 
 
 
 
8e309ca
 
 
 
 
 
 
 
 
 
7d8873a
e2b4ce0
16dba1f
 
e2b4ce0
 
16dba1f
7d8873a
16dba1f
 
 
 
 
 
 
7d8873a
 
16dba1f
 
8e309ca
16dba1f
8e309ca
16dba1f
8e309ca
16dba1f
 
 
 
7d8873a
 
 
 
 
 
 
 
16dba1f
 
 
e2b4ce0
16dba1f
 
 
8e309ca
 
 
 
 
 
0d5323f
16dba1f
8e309ca
0d5323f
16dba1f
7d8873a
16dba1f
 
0d5323f
ee1f310
16dba1f
 
 
7d8873a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import gradio as gr
from ultralytics import YOLO
from PIL import Image
import numpy as np
import cv2

model = YOLO("yolo26s.pt")

def is_valid_person(x1, y1, x2, y2, img_h, img_w):
    """Filter out partial detections β€” shoes, legs, hands etc.
    A real full/half person must:
    - Be tall enough relative to image height
    - Have a portrait-ish aspect ratio (taller than wide)
    - Not be tiny
    """
    box_w = x2 - x1
    box_h = y2 - y1

    # Minimum size β€” must be at least 4% of image height
    min_height = img_h * 0.04
    if box_h < min_height:
        return False

    # Aspect ratio β€” person box should be taller than wide (portrait)
    # Allow up to 1.2 wide:tall ratio to catch slightly leaning people
    aspect_ratio = box_w / box_h
    if aspect_ratio > 1.2:
        return False

    return True


def run_tiled_detection(img_bgr, tile_size=640, overlap=0.2, conf=0.50):
    h, w = img_bgr.shape[:2]
    step = int(tile_size * (1 - overlap))
    all_boxes = []

    for y in range(0, h, step):
        for x in range(0, w, step):
            x2 = min(x + tile_size, w)
            y2 = min(y + tile_size, h)
            tile = img_bgr[y:y2, x:x2]
            results = model(tile, classes=[0], conf=conf, verbose=False)
            for box in results[0].boxes:
                bx1, by1, bx2, by2 = map(int, box.xyxy[0])
                # Translate to full image coords
                fx1, fy1, fx2, fy2 = bx1+x, by1+y, bx2+x, by2+y
                if is_valid_person(fx1, fy1, fx2, fy2, h, w):
                    all_boxes.append({
                        "x1": fx1, "y1": fy1, "x2": fx2, "y2": fy2,
                        "conf": float(box.conf[0])
                    })

    if not all_boxes:
        return []

    scores = np.array([b["conf"] for b in all_boxes], dtype=np.float32)
    indices = cv2.dnn.NMSBoxes(
        [[b["x1"], b["y1"], b["x2"]-b["x1"], b["y2"]-b["y1"]] for b in all_boxes],
        scores.tolist(), score_threshold=conf, nms_threshold=0.45
    )
    return [all_boxes[i] for i in indices.flatten()] if len(indices) > 0 else []


def run_standard_detection(img_bgr, conf=0.50):
    h, w = img_bgr.shape[:2]
    results = model(img_bgr, classes=[0], conf=conf, verbose=False)
    detections = []
    for b in results[0].boxes:
        x1, y1, x2, y2 = int(b.xyxy[0][0]), int(b.xyxy[0][1]), int(b.xyxy[0][2]), int(b.xyxy[0][3])
        if is_valid_person(x1, y1, x2, y2, h, w):
            detections.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2, "conf": float(b.conf[0])})
    return detections


def count_people(image, mode):
    if image is None:
        return None, "No image uploaded."

    img_array = np.array(image)
    img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
    h, w = img_bgr.shape[:2]

    if mode == "Auto (recommended)":
        use_tiling = w > 1920 or h > 1080
    elif mode == "Tiled (large crowd/wide shot)":
        use_tiling = True
    else:
        use_tiling = False

    conf = 0.50
    detections = run_tiled_detection(img_bgr, conf=conf) if use_tiling else run_standard_detection(img_bgr, conf=conf)
    count = len(detections)

    annotated = img_bgr.copy()
    for det in detections:
        x1, y1, x2, y2 = det["x1"], det["y1"], det["x2"], det["y2"]
        cv2.rectangle(annotated, (x1, y1), (x2, y2), (0, 200, 100), 2)
        cv2.putText(annotated, f"Person {det['conf']:.0%}", (x1, y1 - 8),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 200, 100), 2)

    overlay_text = f"Total People Detected: {count}"
    cv2.rectangle(annotated, (0, 0), (len(overlay_text) * 14 + 20, 45), (0, 0, 0), -1)
    cv2.putText(annotated, overlay_text, (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.85, (255, 255, 255), 2)

    output_image = Image.fromarray(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB))
    method = "Tiled" if use_tiling else "Standard"

    if count == 0:
        summary = f"πŸ‘€ No people detected. [{method}]"
    elif count == 1:
        summary = f"πŸ‘€ 1 person detected. [{method}]"
    else:
        summary = f"πŸ‘₯ {count} people detected. [{method}]"

    return output_image, summary


custom_css = """
    .gradio-container { max-width: 860px !important; margin: auto; }
    #title { text-align: center; padding: 2rem 0 0.5rem; }
    #title h1 { font-size: 2.2rem; font-weight: 800; letter-spacing: -0.5px; }
    #title p  { font-size: 1rem; margin-top: 0.3rem; }
"""

with gr.Blocks(title="People Counter β€” YOLO26") as demo:

    with gr.Column(elem_id="title"):
        gr.HTML("<h1>πŸ” People Counter</h1>")
        gr.HTML("<p>Upload an image β€” YOLO26 will detect and count every person in it.</p>")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Image", height=360)
            mode = gr.Radio(
                choices=["Auto (recommended)", "Standard (close-up/normal)", "Tiled (large crowd/wide shot)"],
                value="Auto (recommended)",
                label="Detection Mode"
            )
            run_btn = gr.Button("Count People β†’", variant="primary")
        with gr.Column():
            output_image = gr.Image(type="pil", label="Detection Result", height=360)
            output_text = gr.Textbox(label="Count Result", interactive=False, lines=1)

    run_btn.click(fn=count_people, inputs=[input_image, mode], outputs=[output_image, output_text])

    gr.HTML("""
        <div style='text-align:center; color:#888; font-size:0.8rem; padding: 1.5rem 0 0.5rem;'>
            Powered by <strong>Ultralytics YOLO26</strong> Β· Model: yolo26s Β· Conf: 0.50 Β· Full-body filter ON
        </div>
    """)

demo.launch(css=custom_css)