mlbench123 commited on
Commit
309e137
·
verified ·
1 Parent(s): 538b7f4

Upload 4 files

Browse files
Files changed (4) hide show
  1. head_detection_single_video_best.pt +3 -0
  2. requirements.txt +21 -0
  3. run7.py +829 -0
  4. yolo11x.pt +3 -0
head_detection_single_video_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a33be921c31b44083693db0d03025604fe60c94d4846a168fe7f93d158a15a
3
+ size 16737774
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core Dependencies
2
+ numpy==1.24.3
3
+ opencv-python==4.8.1.78
4
+ scipy==1.11.4
5
+
6
+ # Computer Vision & Object Detection (CPU-only for Mac)
7
+ torch==2.1.0
8
+ torchvision==0.16.0
9
+ ultralytics==8.1.3
10
+
11
+ # Data Processing
12
+ pandas==2.1.4
13
+ openpyxl==3.1.2
14
+
15
+ # Optional but recommended for better UX
16
+ tqdm==4.66.1 # Progress bars
17
+ PyYAML==6.0.1 # Configuration files
18
+
19
+ # Optional visualization (can be removed if client doesn't need)
20
+ matplotlib==3.8.0
21
+ seaborn==0.13.0
run7.py ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # run7.py
2
+
3
+ # Updated to implement Option 1 directional crossing:
4
+ # - Detect directional crossing of L1 then L2 (L1 coords and L2 coords provided)
5
+ # - Maintain a global counter that increments only when an ID crosses L1 (outside->inside) then later crosses L2 (outside->inside)
6
+ # - Maintain a live "inside polygon" counter
7
+ # - Visualize both counters in Zone Summary panel
8
+ # - Keeps all previous features: homography patch, foot-point mapping, travel distance, avg time, occlusion tolerance and reappearance inheritance
9
+ # Paste and run. Output video and person_times.xlsx saved in working folder.
10
+
11
+ import cv2
12
+ import numpy as np
13
+ import time
14
+ import torch
15
+ import pandas as pd
16
+ from collections import defaultdict, deque
17
+ from scipy.ndimage import gaussian_filter1d
18
+ from ultralytics import YOLO
19
+ import os
20
+
21
+ # ---------------- Points in image (given) - adjust if needed
22
+ A = (440.0, 829.0)
23
+ B = (883.0, 928.0)
24
+ C = (1052.0, 325.0)
25
+ D = (739.0, 297.0)
26
+ E = (727.0, 688.0)
27
+ F = (893.0, 312.0)
28
+
29
+ POLYGON = np.array([A, B, C, D], dtype=np.float32)
30
+
31
+ # ---------------- Real-world segment lengths for path C -> B -> A -> D (meters)
32
+ SEG_REAL_M = [5.0, 2.5, 5.0] # C->B, B->A, A->D
33
+ # image path (order C,B,A,D)
34
+ PATH_IMAGE = np.array([C, B, A, D], dtype=np.float32)
35
+
36
+ # Patch base scaling (pixels per meter). Will adapt to fit.
37
+ BASE_SCALE_PX_PER_M = 80.0
38
+ RIGHT_PANEL_W = 350
39
+
40
+ SMOOTH_ALPHA = 0.65
41
+ MISSING_TIMEOUT = 3.0
42
+
43
+ # ---------------- Lines (L1, L2) coordinates (image space) - use these for counting
44
+ L1_p1 = (898.0, 322.0)
45
+ L1_p2 = (1020.0, 453.0)
46
+ L2_p1 = (786.0, 576.0)
47
+ L2_p2 = (977.0, 607.0)
48
+
49
+ # ---------------- Utilities
50
+ def progress_bar(current, total, bar_length=30):
51
+ if total <= 0:
52
+ return
53
+ ratio = current / total
54
+ filled = int(ratio * bar_length)
55
+ bar = "█" * filled + "-" * (bar_length - filled)
56
+ print(f"\r[{bar}] {int(ratio * 100)}% Frame {current}/{total}", end="")
57
+
58
+ def point_in_polygon(cx, cy, polygon):
59
+ return cv2.pointPolygonTest(polygon.astype(np.int32), (int(cx), int(cy)), False) >= 0
60
+
61
+ def euclid(a, b):
62
+ return float(np.hypot(a[0]-b[0], a[1]-b[1]))
63
+
64
+ def fmt(t):
65
+ return time.strftime('%H:%M:%S', time.gmtime(t))
66
+
67
+ def calculate_foot_from_head(head_box, head_center):
68
+ """Calculate foot position from head detection."""
69
+ x1, y1, x2, y2 = head_box
70
+ head_cx, head_cy = head_center
71
+ head_height = y2 - y1
72
+ body_length_est = head_height * 5.5
73
+ foot_x = head_cx
74
+ foot_y = head_cy + body_length_est
75
+ return foot_x, foot_y
76
+
77
+ def nms_obb(boxes, scores, threshold=0.4):
78
+ """Non-Maximum Suppression for Oriented Bounding Boxes"""
79
+ if len(boxes) == 0:
80
+ return []
81
+
82
+ boxes_np = np.array(boxes)
83
+ scores_np = np.array(scores)
84
+
85
+ x_coords = boxes_np[:, 0::2]
86
+ y_coords = boxes_np[:, 1::2]
87
+
88
+ x_min = np.min(x_coords, axis=1)
89
+ y_min = np.min(y_coords, axis=1)
90
+ x_max = np.max(x_coords, axis=1)
91
+ y_max = np.max(y_coords, axis=1)
92
+
93
+ areas = (x_max - x_min) * (y_max - y_min)
94
+ order = scores_np.argsort()[::-1]
95
+
96
+ keep = []
97
+ while order.size > 0:
98
+ i = order[0]
99
+ keep.append(i)
100
+
101
+ xx1 = np.maximum(x_min[i], x_min[order[1:]])
102
+ yy1 = np.maximum(y_min[i], y_min[order[1:]])
103
+ xx2 = np.minimum(x_max[i], x_max[order[1:]])
104
+ yy2 = np.minimum(y_max[i], y_max[order[1:]])
105
+
106
+ w = np.maximum(0.0, xx2 - xx1)
107
+ h = np.maximum(0.0, yy2 - yy1)
108
+ intersection = w * h
109
+
110
+ union = areas[i] + areas[order[1:]] - intersection
111
+ iou = intersection / union
112
+
113
+ inds = np.where(iou <= threshold)[0]
114
+ order = order[inds + 1]
115
+
116
+ return keep
117
+
118
+ # ---------------- Project point onto polyline (returns along distance in px and proj point)
119
+ def project_point_to_polyline(pt, poly):
120
+ best_dist = None
121
+ best_proj = None
122
+ best_cum = 0.0
123
+ cum = 0.0
124
+ for i in range(1, len(poly)):
125
+ a = np.array(poly[i-1], dtype=np.float32)
126
+ b = np.array(poly[i], dtype=np.float32)
127
+ v = b - a
128
+ w = np.array(pt, dtype=np.float32) - a
129
+ seg_len = float(np.hypot(v[0], v[1]))
130
+ if seg_len == 0:
131
+ t = 0.0
132
+ proj = a.copy()
133
+ else:
134
+ t = float(np.dot(w, v) / (seg_len*seg_len))
135
+ t = max(0.0, min(1.0, t))
136
+ proj = a + t*v
137
+ d = float(np.hypot(proj[0]-pt[0], proj[1]-pt[1]))
138
+ along_px = cum + t * seg_len
139
+ if best_dist is None or d < best_dist:
140
+ best_dist = d
141
+ best_proj = proj
142
+ best_cum = along_px
143
+ cum += seg_len
144
+ return float(best_cum), (float(best_proj[0]), float(best_proj[1]))
145
+
146
+ def polyline_pixel_lengths(poly):
147
+ return [euclid(poly[i-1], poly[i]) for i in range(1, len(poly))]
148
+
149
+ # ---------------- Compute conversion per segment (image)
150
+ img_seg_px_lengths = polyline_pixel_lengths(PATH_IMAGE)
151
+ if len(img_seg_px_lengths) != len(SEG_REAL_M):
152
+ raise RuntimeError("PATH_IMAGE and SEG_REAL_M length mismatch")
153
+
154
+ seg_px_to_m = []
155
+ for px_len, m_len in zip(img_seg_px_lengths, SEG_REAL_M):
156
+ seg_px_to_m.append((m_len / px_len) if px_len > 1e-6 else 0.0)
157
+
158
+ # helper: compute along_m from an image point using image PATH_IMAGE
159
+ def image_point_to_along_m(pt):
160
+ along_px, _ = project_point_to_polyline(pt, PATH_IMAGE)
161
+ px_cum = 0.0
162
+ cum_m = 0.0
163
+ for i, seg_px in enumerate(img_seg_px_lengths):
164
+ next_px = px_cum + seg_px
165
+ if along_px <= next_px + 1e-9:
166
+ offset_px = along_px - px_cum
167
+ along_m = cum_m + offset_px * seg_px_to_m[i]
168
+ return float(max(0.0, min(sum(SEG_REAL_M), along_m)))
169
+ px_cum = next_px
170
+ cum_m += SEG_REAL_M[i]
171
+ return float(sum(SEG_REAL_M))
172
+
173
+ # ---------------- Build patch rectangle layout (pixel coordinates)
174
+ def build_patch_layout(scale_px_per_m):
175
+ margin = 18
176
+ rect_w_px = int(2.5 * scale_px_per_m)
177
+ rect_h_px = int(5.0 * scale_px_per_m)
178
+ patch_w = rect_w_px + 2*margin
179
+ patch_h = rect_h_px + 2*margin
180
+ left_x = margin
181
+ right_x = margin + rect_w_px
182
+ top_y = margin
183
+ bottom_y = margin + rect_h_px
184
+ # top row: D (left-top), F (mid-top), C (right-top)
185
+ D_p = (left_x, top_y)
186
+ F_p = ( (left_x + right_x)//2, top_y )
187
+ C_p = (right_x, top_y)
188
+ A_p = (left_x, bottom_y)
189
+ B_p = (right_x, bottom_y)
190
+ # E point down from F
191
+ E_p = (F_p[0], top_y + int(rect_h_px * 0.55))
192
+ path_patch = np.array([C_p, B_p, A_p, D_p], dtype=np.float32) # C->B->A->D
193
+ extras = {"patch_w": patch_w, "patch_h": patch_h, "D": D_p, "F": F_p, "C": C_p, "A": A_p, "B": B_p, "E": E_p, "scale": scale_px_per_m}
194
+ return path_patch, extras
195
+
196
+ PATCH_PATH, PATCH_EXTRAS = build_patch_layout(BASE_SCALE_PX_PER_M)
197
+ PATCH_W = PATCH_EXTRAS["patch_w"]
198
+ PATCH_H = PATCH_EXTRAS["patch_h"]
199
+
200
+ # ---------------- Line helpers for crossing detection
201
+ def line_coeffs(p1, p2):
202
+ # returns a,b,c for line ax+by+c=0
203
+ (x1,y1), (x2,y2) = p1, p2
204
+ a = y1 - y2
205
+ b = x2 - x1
206
+ c = x1*y2 - x2*y1
207
+ return a, b, c
208
+
209
+ def signed_dist_to_line(p, line_coeff):
210
+ a,b,c = line_coeff
211
+ x,y = p
212
+ return (a*x + b*y + c) / (np.hypot(a,b) + 1e-12)
213
+
214
+ def segment_intersects(a1,a2,b1,b2):
215
+ # standard segment intersection test
216
+ def ccw(A,B,C):
217
+ return (C[1]-A[1])*(B[0]-A[0]) > (B[1]-A[1])*(C[0]-A[0])
218
+ A=a1; B=a2; C=b1; D=b2
219
+ return (ccw(A,C,D) != ccw(B,C,D)) and (ccw(A,B,C) != ccw(A,B,D))
220
+
221
+ L1_coeff = line_coeffs(L1_p1, L1_p2)
222
+ L2_coeff = line_coeffs(L2_p1, L2_p2)
223
+
224
+ # Determine inside side for each line using polygon centroid:
225
+ poly_centroid = tuple(np.mean(POLYGON, axis=0).tolist())
226
+ L1_inside_sign = np.sign(signed_dist_to_line(poly_centroid, L1_coeff))
227
+ if L1_inside_sign == 0:
228
+ L1_inside_sign = 1.0
229
+ L2_inside_sign = np.sign(signed_dist_to_line(poly_centroid, L2_coeff))
230
+ if L2_inside_sign == 0:
231
+ L2_inside_sign = 1.0
232
+
233
+ # ---------------- BBox smoother
234
+ class BBoxSmoother:
235
+ def __init__(self, buffer_size=5):
236
+ self.buf = buffer_size
237
+ self.hist = defaultdict(lambda: deque(maxlen=buffer_size))
238
+ def smooth(self, boxes, ids):
239
+ out = []
240
+ for box, tid in zip(boxes, ids):
241
+ self.hist[tid].append(box)
242
+ arr = np.array(self.hist[tid])
243
+ if arr.shape[0] >= 3:
244
+ sm = gaussian_filter1d(arr, sigma=1, axis=0)[-1]
245
+ else:
246
+ sm = arr[-1]
247
+ out.append(sm)
248
+ return np.array(out)
249
+
250
+ # ---------------- Main processing function
251
+ def process_video(
252
+ input_video_path="crop_video.mp4",
253
+ output_video_path="people_polygon_tracking_corrected.avi",
254
+ model_name="yolo11x.pt",
255
+ head_model_name="head_detection_model.pt",
256
+ conf_threshold=0.3,
257
+ img_size=1280,
258
+ use_gpu=True,
259
+ enhance_frames=False,
260
+ smooth_bbox_tracks=True,
261
+ missing_timeout=MISSING_TIMEOUT
262
+ ):
263
+ device = "cuda" if torch.cuda.is_available() and use_gpu else "cpu"
264
+ model = YOLO(model_name)
265
+ PERSON_CLASS = 0
266
+ head_model = YOLO(head_model_name) # Your OBB head detection model
267
+ HEAD_CLASS = 0
268
+ bbox_smoother = BBoxSmoother(5) if smooth_bbox_tracks else None
269
+
270
+ # persistent state
271
+ inside_state = {}
272
+ entry_time = {}
273
+ accumulated_time = defaultdict(float)
274
+ first_entry_vid = {}
275
+ last_exit_vid = {}
276
+ last_seen = {}
277
+ prev_along = {}
278
+ prev_time = {}
279
+ entry_along = {}
280
+ travel_distance = defaultdict(float)
281
+
282
+ display_pos = {}
283
+ head_foot_positions = {} # Stores head detections with estimated foot positions
284
+ person_only_ids = set() # Track person-only detections
285
+ head_only_ids = set() # Track head-only detections
286
+
287
+ # crossing trackers
288
+ prev_foot = {} # {id: (x,y)} previous foot coordinate (image space)
289
+ crossed_l1_flag = {} # {id: bool} whether this id has crossed L1 (in required direction) and not yet used to count
290
+ crossed_l2_counted = {} # {id: bool} whether this id has already triggered the global count by crossing L2 after L1
291
+
292
+ global_counter = 0 # counts completed L1->L2 sequences
293
+ completed_times = [] # for avg time taken
294
+
295
+ cap = cv2.VideoCapture(input_video_path)
296
+ if not cap.isOpened():
297
+ raise RuntimeError("Cannot open input video: " + input_video_path)
298
+ fps = int(cap.get(cv2.CAP_PROP_FPS)) or 25
299
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
300
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
301
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
302
+
303
+ out_w = width + RIGHT_PANEL_W
304
+ out_h = height
305
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
306
+ writer = cv2.VideoWriter(output_video_path, fourcc, fps, (out_w, out_h))
307
+ if not writer.isOpened():
308
+ raise RuntimeError("Failed to open VideoWriter. Try different codec or path.")
309
+
310
+ # adjust patch scale if too tall
311
+ PATCH_PATH_local = PATCH_PATH.copy()
312
+ patch_w = PATCH_W
313
+ patch_h = PATCH_H
314
+ patch_scale = PATCH_EXTRAS["scale"]
315
+ if patch_h > height - 40:
316
+ factor = (height - 60) / patch_h
317
+ PATCH_PATH_local = PATCH_PATH_local * factor
318
+ patch_w = int(patch_w * factor)
319
+ patch_h = int(patch_h * factor)
320
+ patch_scale = patch_scale * factor
321
+
322
+ # Create homography from POLYGON (image A,B,C,D) to rect corners in patch coordinates (A_p,B_p,C_p,D_p)
323
+ A_p = PATCH_EXTRAS["A"]
324
+ B_p = PATCH_EXTRAS["B"]
325
+ C_p = PATCH_EXTRAS["C"]
326
+ D_p = PATCH_EXTRAS["D"]
327
+ dest_rect = np.array([A_p, B_p, C_p, D_p], dtype=np.float32)
328
+ H_img2patch = cv2.getPerspectiveTransform(POLYGON.astype(np.float32), dest_rect.astype(np.float32))
329
+
330
+ start_time = time.time()
331
+ frame_idx = 0
332
+
333
+ # precompute line endpoints & ints for visualization and intersection tests
334
+ L1 = (L1_p1, L1_p2)
335
+ L2 = (L2_p1, L2_p2)
336
+
337
+ while True:
338
+ ret, frame = cap.read()
339
+ if not ret:
340
+ break
341
+ frame_idx += 1
342
+ progress_bar(frame_idx, total_frames)
343
+ now = time.time()
344
+ vid_seconds = now - start_time
345
+
346
+ if enhance_frames:
347
+ frame = cv2.fastNlMeansDenoisingColored(frame, None, 5,5,7,21)
348
+
349
+ results = model.track(
350
+ frame,
351
+ persist=True,
352
+ tracker="bytetrack.yaml",
353
+ classes=[PERSON_CLASS],
354
+ conf=conf_threshold,
355
+ iou=0.5,
356
+ imgsz=img_size,
357
+ device=device,
358
+ half=use_gpu,
359
+ verbose=False
360
+ )
361
+
362
+ # Head detection (NEW - runs in parallel)
363
+ head_results = head_model(frame, conf=conf_threshold, classes=[HEAD_CLASS], verbose=False)[0]
364
+
365
+ # Process head detections
366
+ obb_boxes = []
367
+ obb_scores = []
368
+ obb_data = []
369
+ head_foot_positions = {} # {estimated_foot_pos: (head_box, conf)}
370
+
371
+ if head_results.obb is not None and len(head_results.obb) > 0:
372
+ for obb in head_results.obb:
373
+ xyxyxyxy = obb.xyxyxyxy[0].cpu().numpy()
374
+ conf = float(obb.conf[0])
375
+
376
+ if conf < conf_threshold:
377
+ continue
378
+
379
+ obb_boxes.append(xyxyxyxy.flatten().tolist())
380
+ obb_scores.append(conf)
381
+ obb_data.append((xyxyxyxy, conf))
382
+
383
+ # Apply NMS to head detections
384
+ if len(obb_boxes) > 0:
385
+ keep_indices = nms_obb(obb_boxes, obb_scores, 0.4)
386
+
387
+ for idx in keep_indices:
388
+ xyxyxyxy, conf = obb_data[idx]
389
+
390
+ # Convert OBB to axis-aligned bbox
391
+ x_min = int(xyxyxyxy[:, 0].min())
392
+ y_min = int(xyxyxyxy[:, 1].min())
393
+ x_max = int(xyxyxyxy[:, 0].max())
394
+ y_max = int(xyxyxyxy[:, 1].max())
395
+
396
+ head_cx = (x_min + x_max) / 2.0
397
+ head_cy = float(y_min)
398
+
399
+ # Calculate foot from head
400
+ foot_x, foot_y = calculate_foot_from_head(
401
+ [x_min, y_min, x_max, y_max],
402
+ (head_cx, head_cy)
403
+ )
404
+
405
+ head_foot_positions[(foot_x, foot_y)] = ((x_min, y_min, x_max, y_max, xyxyxyxy), conf)
406
+
407
+ # draw polygon on frame
408
+ cv2.polylines(frame, [POLYGON.astype(np.int32)], True, (255,0,0), 3)
409
+
410
+ # draw L1 and L2 on frame (blue)
411
+ cv2.line(frame, tuple(map(int, L1_p1)), tuple(map(int, L1_p2)), (255,180,0), 3)
412
+ cv2.line(frame, tuple(map(int, L2_p1)), tuple(map(int, L2_p2)), (255,180,0), 3)
413
+
414
+ right_panel = np.ones((height, RIGHT_PANEL_W, 3), dtype=np.uint8) * 40
415
+ patch = np.ones((patch_h, patch_w, 3), dtype=np.uint8) * 255
416
+
417
+ # draw patch structure: rectangle and center divider
418
+ A_px = (int(dest_rect[0][0]), int(dest_rect[0][1]))
419
+ B_px = (int(dest_rect[1][0]), int(dest_rect[1][1]))
420
+ C_px = (int(dest_rect[2][0]), int(dest_rect[2][1]))
421
+ D_px = (int(dest_rect[3][0]), int(dest_rect[3][1]))
422
+ # walls (thick black lines)
423
+ cv2.line(patch, A_px, D_px, (0,0,0), 6) # left
424
+ cv2.line(patch, A_px, B_px, (0,0,0), 6) # bottom
425
+ cv2.line(patch, B_px, C_px, (0,0,0), 6) # right
426
+ cv2.line(patch, D_px, C_px, (0,0,0), 6) # top
427
+ # center divider F->E
428
+ F_px = ( (D_px[0] + C_px[0])//2, D_px[1] )
429
+ E_px = (F_px[0], D_px[1] + int((patch_h) * 0.5))
430
+ cv2.line(patch, F_px, E_px, (0,0,0), 6)
431
+ for p in [A_px, B_px, C_px, D_px, F_px, E_px]:
432
+ cv2.circle(patch, p, 5, (0,0,0), -1)
433
+
434
+ # Match person detections with head detections
435
+ person_head_matches = {} # {person_id: head_foot_pos}
436
+ matched_heads = set()
437
+
438
+ b = results[0].boxes
439
+ detected_ids = set()
440
+ current_inside = []
441
+ current_projs = []
442
+
443
+ if b is not None and b.id is not None:
444
+ boxes = b.xyxy.cpu().numpy()
445
+ ids = b.id.cpu().numpy().astype(int)
446
+ if bbox_smoother is not None:
447
+ boxes = bbox_smoother.smooth(boxes, ids)
448
+
449
+ # First pass: match person detections with head detections
450
+ for box, tid in zip(boxes, ids):
451
+ x1, y1, x2, y2 = map(int, box)
452
+ person_foot_x = float((x1 + x2) / 2.0)
453
+ person_foot_y = float(y2)
454
+
455
+ # Find closest head detection within reasonable distance
456
+ best_head = None
457
+ best_dist = 100 # pixels threshold
458
+
459
+ for head_foot_pos, (head_box_data, head_conf) in head_foot_positions.items():
460
+ head_fx, head_fy = head_foot_pos
461
+ dist = np.sqrt((person_foot_x - head_fx)**2 + (person_foot_y - head_fy)**2)
462
+
463
+ # Check if head is roughly above person bbox (y_head < y_person_top)
464
+ head_box = head_box_data[:4]
465
+ if head_box[3] < y1 + 50: # head bottom should be near person top
466
+ if dist < best_dist and head_foot_pos not in matched_heads:
467
+ best_dist = dist
468
+ best_head = head_foot_pos
469
+
470
+ if best_head:
471
+ person_head_matches[tid] = best_head
472
+ matched_heads.add(best_head)
473
+ person_only_ids.discard(tid)
474
+ else:
475
+ person_only_ids.add(tid)
476
+
477
+
478
+ for box, tid in zip(boxes, ids):
479
+ x1, y1, x2, y2 = map(int, box)
480
+
481
+ # Use head-derived foot if available, otherwise use person bbox foot
482
+ if tid in person_head_matches:
483
+ fx, fy = person_head_matches[tid]
484
+ head_box_data, head_conf = head_foot_positions[person_head_matches[tid]]
485
+ head_box = head_box_data[:4]
486
+ xyxyxyxy = head_box_data[4]
487
+ # Draw head OBB (cyan for matched detection)
488
+ points = xyxyxyxy.astype(np.int32)
489
+ cv2.polylines(frame, [points], True, (255, 255, 0), 2)
490
+ else:
491
+ fx = float((x1 + x2) / 2.0)
492
+ fy = float(y2) # bottom center (foot)
493
+
494
+ detected_ids.add(tid)
495
+ last_seen[tid] = now
496
+
497
+ inside = point_in_polygon(fx, fy, POLYGON)
498
+ prev = inside_state.get(tid, False)
499
+
500
+ # maintain prev_foot for intersection tests
501
+ prev_pt = prev_foot.get(tid, None)
502
+ current_pt = (fx, fy)
503
+
504
+ # Crossing detection for L1
505
+ if prev_pt is not None:
506
+ # check intersection with L1
507
+ inter_l1 = segment_intersects(prev_pt, current_pt, L1_p1, L1_p2)
508
+ if inter_l1:
509
+ # check direction: we want prev_sign != curr_sign and curr_sign == inside sign
510
+ prev_sign = np.sign(signed_dist_to_line(prev_pt, L1_coeff))
511
+ curr_sign = np.sign(signed_dist_to_line(current_pt, L1_coeff))
512
+ if prev_sign == 0:
513
+ prev_sign = -curr_sign if curr_sign != 0 else 1.0
514
+ if curr_sign == 0:
515
+ curr_sign = prev_sign
516
+ if prev_sign != curr_sign and curr_sign == L1_inside_sign:
517
+ # crossed L1 in correct direction (outside -> inside)
518
+ crossed_l1_flag[tid] = True
519
+
520
+ # check intersection with L2
521
+ inter_l2 = segment_intersects(prev_pt, current_pt, L2_p1, L2_p2)
522
+ if inter_l2:
523
+ prev_sign = np.sign(signed_dist_to_line(prev_pt, L2_coeff))
524
+ curr_sign = np.sign(signed_dist_to_line(current_pt, L2_coeff))
525
+ if prev_sign == 0:
526
+ prev_sign = -curr_sign if curr_sign != 0 else 1.0
527
+ if curr_sign == 0:
528
+ curr_sign = prev_sign
529
+ if prev_sign != curr_sign and curr_sign == L2_inside_sign:
530
+ # crossed L2 in correct direction; if previously crossed L1 and not yet counted => count
531
+ if crossed_l1_flag.get(tid, False) and not crossed_l2_counted.get(tid, False):
532
+ global_counter += 1
533
+ crossed_l2_counted[tid] = True
534
+ # once person completed crossing sequence, we keep their travel/time records intact
535
+ # update prev_foot
536
+ prev_foot[tid] = current_pt
537
+
538
+ if inside and not prev:
539
+ inside_state[tid] = True
540
+ if tid not in entry_time:
541
+ entry_time[tid] = now
542
+ if tid not in first_entry_vid:
543
+ first_entry_vid[tid] = vid_seconds
544
+
545
+ if tid not in accumulated_time:
546
+ accumulated_time[tid] = 0.0
547
+ if tid not in travel_distance:
548
+ travel_distance[tid] = 0.0
549
+
550
+ # draw bbox only for inside persons
551
+ if inside:
552
+ # Green if matched with head, yellow if person-only
553
+ color = (0, 200, 0) if tid in person_head_matches else (0, 200, 200)
554
+ cv2.rectangle(frame, (x1,y1), (x2,y2), color, 2)
555
+ cv2.putText(frame, f"ID {tid}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
556
+
557
+ # map foot point through homography to patch coordinates (this is the key)
558
+ pt_img = np.array([[[fx, fy]]], dtype=np.float32)
559
+ mapped = cv2.perspectiveTransform(pt_img, H_img2patch)[0][0]
560
+ mx = float(np.clip(mapped[0], 0, patch_w - 1))
561
+ my = float(np.clip(mapped[1], 0, patch_h - 1))
562
+
563
+ # smooth display position
564
+ if tid in display_pos:
565
+ px_prev, py_prev = display_pos[tid]
566
+ sx = SMOOTH_ALPHA
567
+ dx = px_prev*(1 - sx) + mx*sx
568
+ dy = py_prev*(1 - sx) + my*sx
569
+ else:
570
+ dx, dy = mx, my
571
+ display_pos[tid] = (dx, dy)
572
+ current_inside.append(tid)
573
+
574
+ # compute along_m using image-based method for metric consistency
575
+ along_m = image_point_to_along_m((fx, fy))
576
+ current_projs.append((tid, along_m))
577
+
578
+ # initialize prev_along if first time
579
+ if tid not in prev_along:
580
+ prev_along[tid] = along_m
581
+ entry_along[tid] = along_m
582
+ prev_time[tid] = now
583
+
584
+ # compute forward-only travel distance
585
+ delta = along_m - prev_along.get(tid, along_m)
586
+ if delta > 0:
587
+ travel_distance[tid] += delta
588
+ prev_along[tid] = along_m
589
+ prev_time[tid] = now
590
+
591
+ for head_foot_pos, (head_box_data, head_conf) in head_foot_positions.items():
592
+ if head_foot_pos in matched_heads:
593
+ continue # Already matched with a person
594
+
595
+ fx, fy = head_foot_pos
596
+
597
+ # Only process if inside polygon
598
+ if not point_in_polygon(fx, fy, POLYGON):
599
+ continue
600
+
601
+ # Try to match with existing tracked IDs by proximity
602
+ matched_existing = False
603
+ for tid in list(inside_state.keys()):
604
+ if tid in detected_ids:
605
+ continue # Already detected this frame
606
+
607
+ if tid in display_pos:
608
+ prev_x, prev_y = display_pos[tid]
609
+ # Check if head is near previous position
610
+ dist = np.sqrt((fx - prev_x)**2 + (fy - prev_y)**2)
611
+ if dist < 80: # pixels threshold
612
+ # Reactivate this ID using head detection
613
+ detected_ids.add(tid)
614
+ last_seen[tid] = now
615
+ prev_foot[tid] = (fx, fy)
616
+ matched_existing = True
617
+ head_only_ids.add(tid)
618
+
619
+ # Draw head detection (red for head-only recovery)
620
+ head_box = head_box_data[:4]
621
+ xyxyxyxy = head_box_data[4]
622
+ points = xyxyxyxy.astype(np.int32)
623
+ cv2.polylines(frame, [points], True, (0, 0, 255), 2)
624
+ cv2.putText(frame, f"ID {tid} (H)", (int(head_box[0]), int(head_box[1]) - 10),
625
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
626
+
627
+ # Continue tracking
628
+ inside_state[tid] = True
629
+ current_inside.append(tid)
630
+
631
+ # Map through homography
632
+ pt_img = np.array([[[fx, fy]]], dtype=np.float32)
633
+ mapped = cv2.perspectiveTransform(pt_img, H_img2patch)[0][0]
634
+ mx = float(np.clip(mapped[0], 0, patch_w - 1))
635
+ my = float(np.clip(mapped[1], 0, patch_h - 1))
636
+
637
+ # Smooth display position
638
+ if tid in display_pos:
639
+ px_prev, py_prev = display_pos[tid]
640
+ sx = SMOOTH_ALPHA
641
+ dx = px_prev*(1 - sx) + mx*sx
642
+ dy = py_prev*(1 - sx) + my*sx
643
+ else:
644
+ dx, dy = mx, my
645
+ display_pos[tid] = (dx, dy)
646
+
647
+ # Track travel distance
648
+ along_m = image_point_to_along_m((fx, fy))
649
+ current_projs.append((tid, along_m))
650
+
651
+ if tid not in prev_along:
652
+ prev_along[tid] = along_m
653
+ entry_along[tid] = along_m
654
+ prev_time[tid] = now
655
+
656
+ delta = along_m - prev_along.get(tid, along_m)
657
+ if delta > 0:
658
+ travel_distance[tid] += delta
659
+ prev_along[tid] = along_m
660
+ prev_time[tid] = now
661
+
662
+ break
663
+
664
+ # finalize exits after missing timeout
665
+ known_ids = set(list(inside_state.keys()) + list(last_seen.keys()))
666
+ for tid in list(known_ids):
667
+ if inside_state.get(tid, False) and tid not in detected_ids:
668
+ ls = last_seen.get(tid, None)
669
+ if ls is None:
670
+ continue
671
+ missing = now - ls
672
+ if missing > missing_timeout:
673
+ inside_state[tid] = False
674
+ if tid in entry_time:
675
+ accumulated_time[tid] += now - entry_time[tid]
676
+ # record last exit time in video seconds (use last seen time)
677
+ last_exit_vid[tid] = ls - start_time
678
+ completed_times.append(accumulated_time[tid])
679
+ entry_time.pop(tid, None)
680
+ else:
681
+ # within occlusion grace window -> keep inside state
682
+ pass
683
+
684
+ # Reappearance inheritance logic (same as prior): copy neighbor state if ID lost & reappears
685
+ current_projs_map = {tid: a for tid, a in current_projs}
686
+ for tid, along in current_projs:
687
+ if tid in prev_along:
688
+ continue
689
+ candidates = []
690
+ for other_tid, other_al in current_projs_map.items():
691
+ if other_tid == tid:
692
+ continue
693
+ candidates.append((other_tid, other_al))
694
+ if not candidates and prev_along:
695
+ candidates = [(other_tid, prev_along_val) for other_tid, prev_along_val in prev_along.items() if other_tid != tid]
696
+ if not candidates:
697
+ prev_along[tid] = along
698
+ entry_along.setdefault(tid, along)
699
+ prev_time[tid] = now
700
+ continue
701
+ neighbor_tid, neighbor_al = min(candidates, key=lambda x: abs(x[1] - along))
702
+ if abs(neighbor_al - along) < max(0.5, sum(SEG_REAL_M)*0.5):
703
+ prev_along[tid] = prev_along.get(neighbor_tid, neighbor_al)
704
+ entry_along[tid] = entry_along.get(neighbor_tid, neighbor_al)
705
+ prev_time[tid] = now
706
+ accumulated_time[tid] = accumulated_time.get(neighbor_tid, 0.0)
707
+ if neighbor_tid in entry_time:
708
+ entry_time[tid] = entry_time[neighbor_tid]
709
+ else:
710
+ entry_time[tid] = now - accumulated_time[tid]
711
+ # also inherit crossed L1/L2 flags if neighbor had them (helps maintain global count consistency)
712
+ if crossed_l1_flag.get(neighbor_tid, False) and not crossed_l1_flag.get(tid, False):
713
+ crossed_l1_flag[tid] = True
714
+ if crossed_l2_counted.get(neighbor_tid, False) and not crossed_l2_counted.get(tid, False):
715
+ crossed_l2_counted[tid] = True
716
+ else:
717
+ prev_along[tid] = along
718
+ entry_along.setdefault(tid, along)
719
+ prev_time[tid] = now
720
+
721
+ # build display list sorted by along for consistent ordering
722
+ disp = []
723
+ for tid in current_inside:
724
+ if tid not in display_pos:
725
+ continue
726
+ dx, dy = display_pos[tid]
727
+ cur_al = prev_along.get(tid, entry_along.get(tid, 0.0))
728
+ t_inside = int(now - entry_time[tid]) if tid in entry_time else int(accumulated_time.get(tid, 0.0))
729
+ trav = travel_distance.get(tid, 0.0)
730
+ disp.append((tid, int(round(dx)), int(round(dy)), t_inside, trav, cur_al))
731
+ disp.sort(key=lambda x: x[5]) # by along
732
+
733
+ # draw patch dots and labels (no velocity)
734
+ for tid, xi, yi, t_inside, trav, _ in disp:
735
+ cv2.circle(patch, (xi, yi), 6, (0,0,255), -1)
736
+ cv2.putText(patch, f"ID {tid}", (xi+8, yi-8), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 1)
737
+ cv2.putText(patch, f"{t_inside}s {trav:.2f}m", (xi+8, yi+8), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
738
+
739
+ # compute avg time taken from completed_times
740
+ avg_time_taken = float(np.mean(completed_times)) if len(completed_times) > 0 else 0.0
741
+
742
+ # top-right summary: show both counters
743
+ panel_h, panel_w = 220, 350
744
+ panel = np.ones((panel_h, panel_w, 3), dtype=np.uint8) * 255
745
+ cv2.putText(panel, "Zone Summary", (12, 24), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2)
746
+ cv2.putText(panel, f"Inside count: {len(disp)}", (12, 58), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,120,0), 2)
747
+ cv2.putText(panel, f"Global count: {global_counter}", (12, 92), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,128), 2)
748
+ cv2.putText(panel, f"Avg time taken: {int(avg_time_taken)}s", (12, 126), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,0), 2)
749
+
750
+ yv = 150
751
+ for tid, _, _, t_inside, trav, _ in disp[:8]:
752
+ cv2.putText(panel, f"ID {tid}: {t_inside}s, {trav:.2f}m", (12, yv), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (50,50,50), 1)
753
+ yv += 18
754
+
755
+ final = np.hstack((frame, right_panel))
756
+ # place panel top-right inside right panel
757
+ panel_x = width + (RIGHT_PANEL_W - panel_w)//2
758
+ panel_y = 10
759
+ final[panel_y:panel_y+panel_h, panel_x:panel_x+panel_w] = panel
760
+
761
+ # place patch below panel
762
+ patch_x = width + (RIGHT_PANEL_W - patch_w)//2
763
+ patch_y = panel_y + panel_h + 10
764
+ if patch_y + patch_h > height:
765
+ patch_y = height - patch_h - 10
766
+ final[patch_y:patch_y+patch_h, patch_x:patch_x+patch_w] = patch
767
+
768
+ writer.write(np.ascontiguousarray(final))
769
+
770
+ # finalize
771
+ end_t = time.time()
772
+ for tid in list(entry_time.keys()):
773
+ accumulated_time[tid] += end_t - entry_time[tid]
774
+ last_exit_vid[tid] = last_seen.get(tid, end_t) - start_time
775
+ completed_times.append(accumulated_time[tid])
776
+ entry_time.pop(tid, None)
777
+ inside_state[tid] = False
778
+
779
+ cap.release()
780
+ writer.release()
781
+
782
+ # export excel (only >0)
783
+ rows = []
784
+ for tid, tot in accumulated_time.items():
785
+ if tot <= 0:
786
+ continue
787
+ tin = first_entry_vid.get(tid, 0.0)
788
+ tout = last_exit_vid.get(tid, tin)
789
+ rows.append({
790
+ "Passenger": int(tid),
791
+ "Time in": fmt(tin),
792
+ "Time out": fmt(tout),
793
+ "Time in queue (seconds)": round(float(tot), 2)
794
+ })
795
+ df = pd.DataFrame(rows, columns=["Passenger","Time in","Time out","Time in queue (seconds)"])
796
+ if len(df) > 0:
797
+ df.to_excel("person_times.xlsx", index=False)
798
+ else:
799
+ pd.DataFrame(columns=["Passenger","Time in","Time out","Time in queue (seconds)"]).to_excel("person_times.xlsx", index=False)
800
+
801
+ print("\nFinished. Output:", os.path.abspath(output_video_path))
802
+ print("Saved times:", os.path.abspath("person_times.xlsx"))
803
+
804
+ # ---------------- Runner
805
+ if __name__ == "__main__":
806
+ CONFIG = {
807
+ 'input_video_path': "sample_vid.mp4",
808
+ 'output_video_path': "output22.avi",
809
+ 'model_name': "yolo11x.pt",
810
+ 'head_model_name': "head_detection_single_video_best.pt",
811
+ 'conf_threshold': 0.3,
812
+ 'img_size': 1280,
813
+ 'use_gpu': True,
814
+ 'enhance_frames': False,
815
+ 'smooth_bbox_tracks': True,
816
+ 'missing_timeout': 3.0
817
+ }
818
+ process_video(
819
+ input_video_path = CONFIG['input_video_path'],
820
+ output_video_path = CONFIG['output_video_path'],
821
+ model_name = CONFIG['model_name'],
822
+ head_model_name = CONFIG['head_model_name'],
823
+ conf_threshold = CONFIG['conf_threshold'],
824
+ img_size = CONFIG['img_size'],
825
+ use_gpu = CONFIG['use_gpu'],
826
+ enhance_frames = CONFIG['enhance_frames'],
827
+ smooth_bbox_tracks = CONFIG['smooth_bbox_tracks'],
828
+ missing_timeout = CONFIG['missing_timeout']
829
+ )
yolo11x.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc158aa95c0ebfdd87f70f01653c1131b93e92522dbe15c228bcd742e773a24
3
+ size 114636239