AliZak commited on
Commit
62064b6
·
verified ·
1 Parent(s): ffe6db7

Uploaded the base files

Browse files
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. perception_roi_server.py +789 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
+
6
+ RUN apt-get update \
7
+ && apt-get install -y --no-install-recommends ffmpeg \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ WORKDIR /app
11
+
12
+ COPY requirements.txt /app/requirements.txt
13
+ RUN pip install --no-cache-dir -r /app/requirements.txt
14
+
15
+ COPY perception_roi_server.py /app/perception_roi_server.py
16
+
17
+ EXPOSE 7860
18
+
19
+ CMD ["python", "/app/perception_roi_server.py", "--host", "0.0.0.0", "--port", "7860"]
perception_roi_server.py ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ROI-aware compression server (FastAPI)
4
+ - Uploads a video and prompt
5
+ - Runs YOLOv8x detection + simple tracking
6
+ - Produces 3 outputs: overlay (tracking), compressed, ROI-preserved
7
+ - Serves MJPEG stream of live overlay
8
+
9
+ Endpoints:
10
+ POST /track/async
11
+ POST /process/compress/{job_id}
12
+ GET /process/status/{job_id}
13
+ GET /process/video/overlay/{job_id}
14
+ GET /process/video/compressed/{job_id}
15
+ GET /process/video/roi/{job_id}
16
+ GET /detect/stream/{job_id}
17
+ """
18
+
19
+ import os
20
+ import uuid
21
+ import time
22
+ import math
23
+ import threading
24
+ import shutil
25
+ import subprocess
26
+ from dataclasses import dataclass, field
27
+ from typing import Dict, List, Optional, Any
28
+
29
+ import cv2
30
+ import numpy as np
31
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
32
+ from fastapi.middleware.cors import CORSMiddleware
33
+ from fastapi.responses import FileResponse, StreamingResponse, JSONResponse
34
+ from ultralytics import YOLO
35
+
36
+ DEFAULT_WEIGHTS = os.environ.get("YOLO_WEIGHTS", "yolov8s.pt")
37
+ DEFAULT_CONF = float(os.environ.get("YOLO_CONF", "0.25"))
38
+ DEFAULT_DEVICE = os.environ.get("YOLO_DEVICE", "auto")
39
+ FAST_DETECT_SCALE = float(os.environ.get("FAST_DETECT_SCALE", "0.65"))
40
+ FAST_DETECT_IMGSZ = int(os.environ.get("FAST_DETECT_IMGSZ", "512"))
41
+
42
+ app = FastAPI(title="ROI Compression Server", version="1.0.0")
43
+ app.add_middleware(
44
+ CORSMiddleware,
45
+ allow_origins=["*"],
46
+ allow_credentials=False,
47
+ allow_methods=["*"],
48
+ allow_headers=["*"],
49
+ )
50
+
51
+ _model_lock = threading.Lock()
52
+ _model: Optional[YOLO] = None
53
+
54
+ def get_model(weights: str) -> YOLO:
55
+ global _model
56
+ with _model_lock:
57
+ if _model is None:
58
+ _model = YOLO(weights)
59
+ return _model
60
+
61
+
62
+ def _parse_queries(q: str) -> List[str]:
63
+ if not q:
64
+ return []
65
+ parts = [p.strip().lower() for p in q.replace("\n", ",").split(",")]
66
+ return [p for p in parts if p]
67
+
68
+
69
+ def _keep_det(label: str, queries: List[str]) -> bool:
70
+ if not queries:
71
+ return True
72
+ l = (label or "").strip().lower()
73
+ if not l:
74
+ return False
75
+ return any((q == l) or (q in l) or (l in q) for q in queries)
76
+
77
+
78
+ def _yolo_detect_frame(
79
+ model: YOLO,
80
+ frame_bgr: np.ndarray,
81
+ conf: float,
82
+ queries: List[str],
83
+ device: str,
84
+ fast_mode: bool = False,
85
+ ) -> List[Dict[str, Any]]:
86
+ scale = 1.0
87
+ if fast_mode:
88
+ scale = max(0.1, min(1.0, float(FAST_DETECT_SCALE)))
89
+ if scale < 1.0:
90
+ h, w = frame_bgr.shape[:2]
91
+ sw, sh = max(64, int(w * scale)), max(64, int(h * scale))
92
+ small = cv2.resize(frame_bgr, (sw, sh), interpolation=cv2.INTER_AREA)
93
+ img = cv2.cvtColor(small, cv2.COLOR_BGR2RGB)
94
+ else:
95
+ img = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
96
+
97
+ pred_kwargs = {"conf": conf, "verbose": False}
98
+ if fast_mode:
99
+ pred_kwargs["imgsz"] = FAST_DETECT_IMGSZ
100
+ if device and str(device).lower() != "auto":
101
+ pred_kwargs["device"] = device
102
+ if fast_mode and str(device).lower() != "cpu":
103
+ pred_kwargs["half"] = True
104
+ try:
105
+ res = model.predict(img, **pred_kwargs)
106
+ except Exception as e:
107
+ msg = str(e)
108
+ if ("cuda" in msg.lower()) and (str(device).lower() != "cpu"):
109
+ pred_kwargs["device"] = "cpu"
110
+ res = model.predict(img, **pred_kwargs)
111
+ else:
112
+ raise
113
+ if not res:
114
+ return []
115
+ r0 = res[0]
116
+ names = getattr(r0, "names", None) or getattr(model, "names", None) or {}
117
+ boxes = []
118
+ if r0.boxes is None:
119
+ return boxes
120
+ for b in r0.boxes:
121
+ try:
122
+ xyxy = b.xyxy[0].cpu().numpy().tolist()
123
+ if scale < 1.0:
124
+ inv = 1.0 / scale
125
+ xyxy = [v * inv for v in xyxy]
126
+ score = float(b.conf[0].cpu().numpy())
127
+ cls_i = int(b.cls[0].cpu().numpy())
128
+ label = str(names.get(cls_i, cls_i))
129
+ if not _keep_det(label, queries):
130
+ continue
131
+ boxes.append({"bbox_xyxy": xyxy, "label": label, "score": score})
132
+ except Exception:
133
+ continue
134
+ return boxes
135
+
136
+
137
+ def _draw_boxes(frame_bgr: np.ndarray, dets: List[Dict[str, Any]]) -> np.ndarray:
138
+ out = frame_bgr.copy()
139
+ for d in dets:
140
+ b = d.get("bbox_xyxy")
141
+ if not (isinstance(b, (list, tuple)) and len(b) == 4):
142
+ continue
143
+ x1, y1, x2, y2 = [int(max(0, v)) for v in b]
144
+ label = str(d.get("label", ""))
145
+ score = d.get("score", None)
146
+ tid = d.get("track_id", None)
147
+ tag = f"#{tid}" if isinstance(tid, int) else ""
148
+ txt = f"{label}{tag} {score:.2f}" if isinstance(score, (float, int)) else f"{label}{tag}"
149
+ cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
150
+ if txt:
151
+ cv2.putText(out, txt, (x1, max(12, y1 - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
152
+ return out
153
+
154
+
155
+ def _iou_xyxy(a: List[float], b: List[float]) -> float:
156
+ ax1, ay1, ax2, ay2 = a
157
+ bx1, by1, bx2, by2 = b
158
+ inter_x1 = max(ax1, bx1)
159
+ inter_y1 = max(ay1, by1)
160
+ inter_x2 = min(ax2, bx2)
161
+ inter_y2 = min(ay2, by2)
162
+ if inter_x2 <= inter_x1 or inter_y2 <= inter_y1:
163
+ return 0.0
164
+ inter = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
165
+ area_a = max(0.0, (ax2 - ax1)) * max(0.0, (ay2 - ay1))
166
+ area_b = max(0.0, (bx2 - bx1)) * max(0.0, (by2 - by1))
167
+ denom = area_a + area_b - inter
168
+ if denom <= 0:
169
+ return 0.0
170
+ return float(inter / denom)
171
+
172
+
173
+ def _assign_tracks(dets: List[Dict[str, Any]], tracker: Dict[str, Any], iou_thresh: float = 0.3) -> List[Dict[str, Any]]:
174
+ prev = tracker.get("tracks", [])
175
+ used_prev = set()
176
+ out = []
177
+ for d in dets:
178
+ b = d.get("bbox_xyxy")
179
+ label = str(d.get("label", ""))
180
+ best_i = None
181
+ best_iou = 0.0
182
+ if isinstance(b, (list, tuple)) and len(b) == 4:
183
+ for i, tr in enumerate(prev):
184
+ if i in used_prev:
185
+ continue
186
+ if label and tr.get("label") and tr.get("label") != label:
187
+ continue
188
+ iou = _iou_xyxy(b, tr.get("bbox_xyxy", [0, 0, 0, 0]))
189
+ if iou > best_iou:
190
+ best_iou = iou
191
+ best_i = i
192
+ if best_i is not None and best_iou >= iou_thresh:
193
+ d["track_id"] = int(prev[best_i].get("id"))
194
+ used_prev.add(best_i)
195
+ else:
196
+ d["track_id"] = int(tracker.get("next_id", 1))
197
+ tracker["next_id"] = int(d["track_id"]) + 1
198
+ out.append(d)
199
+ tracker["tracks"] = [
200
+ {"id": int(d.get("track_id")), "bbox_xyxy": d.get("bbox_xyxy"), "label": d.get("label", "")}
201
+ for d in out
202
+ ]
203
+ return out
204
+
205
+
206
+ def _ensure_even(v: int, min_v: int = 64) -> int:
207
+ v = max(min_v, int(v))
208
+ return v - (v % 2)
209
+
210
+
211
+ def _compute_target_params(w: int, h: int, fps: float, bandwidth_kbps: int, target_fps: int, target_w: int, target_h: int, scale: float):
212
+ fps = max(1.0, float(fps or 1.0))
213
+ budget = max(100, int(bandwidth_kbps or 1500))
214
+ base_kbps_720p30 = 2500.0
215
+ base_kbps_orig = base_kbps_720p30 * (float(w) * float(h) * fps) / (1280.0 * 720.0 * 30.0)
216
+ if not math.isfinite(base_kbps_orig) or base_kbps_orig <= 0:
217
+ base_kbps_orig = base_kbps_720p30
218
+ if target_w and target_h:
219
+ tw, th = int(target_w), int(target_h)
220
+ else:
221
+ scale = float(scale or 1.0)
222
+ if scale < 0.1:
223
+ scale = 0.1
224
+ if scale > 1.0:
225
+ scale = 1.0
226
+ tw, th = int(w * scale), int(h * scale)
227
+ tfps = int(target_fps or fps)
228
+ scale_r = min(1.0, math.sqrt(budget / base_kbps_orig))
229
+ tw = min(tw, int(w * scale_r))
230
+ th = min(th, int(h * scale_r))
231
+ tfps = min(int(fps), tfps)
232
+ tw = _ensure_even(max(64, tw))
233
+ th = _ensure_even(max(64, th))
234
+ tfps = max(1, tfps)
235
+ frame_step = max(1, int(round(fps / max(1, tfps))))
236
+ return tw, th, tfps, frame_step
237
+
238
+
239
+ def _open_writer(path: str, w: int, h: int, fps: float) -> Optional[cv2.VideoWriter]:
240
+ if w <= 0 or h <= 0:
241
+ return None
242
+ # for codec in ("avc1", "H264", "mp4v"):
243
+ # try:
244
+ # fourcc = cv2.VideoWriter_fourcc(*codec)
245
+ # wtmp = cv2.VideoWriter(path, fourcc, float(fps or 30.0), (int(w), int(h)))
246
+ # if wtmp is not None and wtmp.isOpened():
247
+ # return wtmp
248
+ # except Exception:
249
+ # continue
250
+
251
+ # Force software-friendly codec to avoid hardware H.264 failures on some systems.
252
+ try:
253
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
254
+ wtmp = cv2.VideoWriter(path, fourcc, float(fps or 30.0), (int(w), int(h)))
255
+ if wtmp is not None and wtmp.isOpened():
256
+ return wtmp
257
+ except Exception:
258
+ pass
259
+ return None
260
+
261
+
262
+ def _ffmpeg_available() -> bool:
263
+ return shutil.which("ffmpeg") is not None
264
+
265
+
266
+ def _transcode_h264(src_path: str) -> Optional[str]:
267
+ if not src_path or not os.path.exists(src_path):
268
+ return None
269
+ if not _ffmpeg_available():
270
+ return None
271
+ dst_path = os.path.splitext(src_path)[0] + "_h264.mp4"
272
+ cmd = [
273
+ "ffmpeg",
274
+ "-y",
275
+ "-i",
276
+ src_path,
277
+ "-c:v",
278
+ "libx264",
279
+ "-preset",
280
+ "veryfast",
281
+ "-pix_fmt",
282
+ "yuv420p",
283
+ dst_path,
284
+ ]
285
+ try:
286
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
287
+ if os.path.exists(dst_path) and os.path.getsize(dst_path) > 1024:
288
+ return dst_path
289
+ except Exception:
290
+ return None
291
+ return None
292
+
293
+
294
+ def _apply_roi_overlay(frame_bgr: np.ndarray, dets: List[Dict[str, Any]], target_w: int, target_h: int) -> np.ndarray:
295
+ h, w = frame_bgr.shape[:2]
296
+ bg_small = cv2.resize(frame_bgr, (int(target_w), int(target_h)), interpolation=cv2.INTER_AREA)
297
+ bg = cv2.resize(bg_small, (int(w), int(h)), interpolation=cv2.INTER_LINEAR)
298
+ out = bg.copy()
299
+ pad = max(2, int(min(w, h) * 0.005))
300
+ for d in dets:
301
+ b = d.get("bbox_xyxy")
302
+ if not (isinstance(b, (list, tuple)) and len(b) == 4):
303
+ continue
304
+ x1, y1, x2, y2 = [int(v) for v in b]
305
+ x1 = max(0, x1 - pad)
306
+ y1 = max(0, y1 - pad)
307
+ x2 = min(w, x2 + pad)
308
+ y2 = min(h, y2 + pad)
309
+ if x2 <= x1 or y2 <= y1:
310
+ continue
311
+ out[y1:y2, x1:x2] = frame_bgr[y1:y2, x1:x2]
312
+ return out
313
+
314
+
315
+ @dataclass
316
+ class Job:
317
+ id: str
318
+ video_path: str
319
+ created: float = field(default_factory=time.time)
320
+ status: str = "tracking"
321
+ error: Optional[str] = None
322
+ fps: float = 30.0
323
+ w: int = 0
324
+ h: int = 0
325
+ frame_step: int = 1
326
+ target_fps: int = 15
327
+ target_width: int = 0
328
+ target_height: int = 0
329
+ bandwidth_kbps: int = 1500
330
+ conf: float = DEFAULT_CONF
331
+ weights: str = DEFAULT_WEIGHTS
332
+ device: str = DEFAULT_DEVICE
333
+ fast_mode: bool = False
334
+ queries: List[str] = field(default_factory=list)
335
+ overlay_video_path: Optional[str] = None
336
+ compressed_video_path: Optional[str] = None
337
+ roi_video_path: Optional[str] = None
338
+ det_by_frame: Dict[int, List[Dict[str, Any]]] = field(default_factory=dict)
339
+ latest_jpeg: Optional[bytes] = None
340
+ latest_compressed_jpeg: Optional[bytes] = None
341
+ latest_roi_jpeg: Optional[bytes] = None
342
+ lock: threading.Lock = field(default_factory=threading.Lock)
343
+ tracker_state: Dict[str, Any] = field(default_factory=lambda: {"next_id": 1, "tracks": []})
344
+
345
+
346
+ jobs: Dict[str, Job] = {}
347
+
348
+
349
+ def _process_job(job: Job):
350
+ try:
351
+ model = get_model(job.weights)
352
+ cap = cv2.VideoCapture(job.video_path)
353
+ if not cap.isOpened():
354
+ raise RuntimeError("Could not open video.")
355
+ fps = float(cap.get(cv2.CAP_PROP_FPS) or 30.0)
356
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
357
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
358
+
359
+ tw, th, tfps, frame_step = _compute_target_params(
360
+ w=w,
361
+ h=h,
362
+ fps=fps,
363
+ bandwidth_kbps=job.bandwidth_kbps,
364
+ target_fps=job.target_fps,
365
+ target_w=job.target_width,
366
+ target_h=job.target_height,
367
+ scale=max(0.25, min(1.0, (job.target_width / w) if (job.target_width and w) else 1.0)),
368
+ )
369
+
370
+ os.makedirs("outputs", exist_ok=True)
371
+ overlay_path = os.path.join("outputs", f"{job.id}_overlay.mp4")
372
+ overlay_writer = _open_writer(overlay_path, w, h, tfps)
373
+
374
+ with job.lock:
375
+ job.fps = fps
376
+ job.w = w
377
+ job.h = h
378
+ job.frame_step = frame_step
379
+ job.target_fps = tfps
380
+ job.target_width = tw
381
+ job.target_height = th
382
+ job.overlay_video_path = overlay_path if overlay_writer is not None else None
383
+ job.status = "tracking"
384
+
385
+ frame_idx = 0
386
+ tracker = job.tracker_state
387
+ while True:
388
+ ok, frame = cap.read()
389
+ if not ok:
390
+ break
391
+ if frame_idx % frame_step != 0:
392
+ frame_idx += 1
393
+ continue
394
+
395
+ dets = _yolo_detect_frame(model, frame, conf=job.conf, queries=job.queries, device=job.device, fast_mode=job.fast_mode)
396
+ if dets and not any("track_id" in d for d in dets):
397
+ dets = _assign_tracks(dets, tracker)
398
+ elif dets:
399
+ tracker["tracks"] = [
400
+ {"id": int(d.get("track_id")), "bbox_xyxy": d.get("bbox_xyxy"), "label": d.get("label", "")}
401
+ for d in dets
402
+ ]
403
+ max_id = max((int(d.get("track_id", 0)) for d in dets), default=0)
404
+ tracker["next_id"] = max(tracker.get("next_id", 1), max_id + 1)
405
+
406
+ with job.lock:
407
+ job.det_by_frame[int(frame_idx)] = dets
408
+
409
+ overlay = _draw_boxes(frame, dets)
410
+ ok2, jpg = cv2.imencode(".jpg", overlay, [int(cv2.IMWRITE_JPEG_QUALITY), 80])
411
+ if ok2:
412
+ with job.lock:
413
+ job.latest_jpeg = jpg.tobytes()
414
+
415
+ if overlay_writer is not None:
416
+ overlay_writer.write(overlay)
417
+
418
+ frame_idx += 1
419
+
420
+ cap.release()
421
+ if overlay_writer is not None:
422
+ try:
423
+ overlay_writer.release()
424
+ except Exception:
425
+ pass
426
+
427
+ h264_overlay = _transcode_h264(overlay_path) if overlay_writer is not None else None
428
+ with job.lock:
429
+ if h264_overlay:
430
+ job.overlay_video_path = h264_overlay
431
+ job.status = "tracked"
432
+ except Exception as e:
433
+ with job.lock:
434
+ job.status = "error"
435
+ job.error = str(e)
436
+
437
+
438
+ def _compress_job(job: Job, bandwidth_kbps: int, target_fps: int, target_w: int, target_h: int, resolution_scale: float):
439
+ try:
440
+ cap = cv2.VideoCapture(job.video_path)
441
+ if not cap.isOpened():
442
+ raise RuntimeError("Could not open video.")
443
+ fps = float(cap.get(cv2.CAP_PROP_FPS) or 30.0)
444
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
445
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
446
+
447
+ tw, th, tfps, frame_step = _compute_target_params(
448
+ w=w,
449
+ h=h,
450
+ fps=fps,
451
+ bandwidth_kbps=bandwidth_kbps,
452
+ target_fps=target_fps,
453
+ target_w=target_w,
454
+ target_h=target_h,
455
+ scale=resolution_scale,
456
+ )
457
+
458
+ os.makedirs("outputs", exist_ok=True)
459
+ compressed_path = os.path.join("outputs", f"{job.id}_compressed_rt.mp4")
460
+ roi_path = os.path.join("outputs", f"{job.id}_roi_rt.mp4")
461
+
462
+ compressed_writer = _open_writer(compressed_path, tw, th, tfps)
463
+ roi_writer = _open_writer(roi_path, w, h, tfps)
464
+
465
+ with job.lock:
466
+ job.status = "compressing"
467
+ job.bandwidth_kbps = int(bandwidth_kbps)
468
+ job.target_fps = int(tfps)
469
+ job.target_width = int(tw)
470
+ job.target_height = int(th)
471
+
472
+ frame_idx = 0
473
+ last_dets: List[Dict[str, Any]] = []
474
+ while True:
475
+ ok, frame = cap.read()
476
+ if not ok:
477
+ break
478
+ if frame_idx % frame_step != 0:
479
+ frame_idx += 1
480
+ continue
481
+
482
+ dets = job.det_by_frame.get(int(frame_idx))
483
+ if dets is None:
484
+ dets = last_dets
485
+ else:
486
+ last_dets = dets
487
+
488
+ compressed_frame = None
489
+ roi_frame = None
490
+
491
+ if compressed_writer is not None:
492
+ try:
493
+ compressed_frame = cv2.resize(frame, (tw, th), interpolation=cv2.INTER_AREA)
494
+ compressed_writer.write(compressed_frame)
495
+ except Exception:
496
+ compressed_frame = None
497
+
498
+ if roi_writer is not None:
499
+ try:
500
+ roi_frame = _apply_roi_overlay(frame, dets, tw, th)
501
+ roi_writer.write(roi_frame)
502
+ except Exception:
503
+ roi_frame = None
504
+
505
+ try:
506
+ if compressed_frame is not None:
507
+ okc, jpgc = cv2.imencode(".jpg", compressed_frame, [int(cv2.IMWRITE_JPEG_QUALITY), 80])
508
+ if okc:
509
+ with job.lock:
510
+ job.latest_compressed_jpeg = jpgc.tobytes()
511
+ if roi_frame is not None:
512
+ okr, jpgr = cv2.imencode(".jpg", roi_frame, [int(cv2.IMWRITE_JPEG_QUALITY), 80])
513
+ if okr:
514
+ with job.lock:
515
+ job.latest_roi_jpeg = jpgr.tobytes()
516
+ except Exception:
517
+ pass
518
+
519
+ frame_idx += 1
520
+
521
+ cap.release()
522
+ for wtr in (compressed_writer, roi_writer):
523
+ if wtr is not None:
524
+ try:
525
+ wtr.release()
526
+ except Exception:
527
+ pass
528
+
529
+ h264_compressed = _transcode_h264(compressed_path) if compressed_writer is not None else None
530
+ h264_roi = _transcode_h264(roi_path) if roi_writer is not None else None
531
+
532
+ with job.lock:
533
+ if h264_compressed:
534
+ job.compressed_video_path = h264_compressed
535
+ else:
536
+ job.compressed_video_path = compressed_path if os.path.exists(compressed_path) else job.compressed_video_path
537
+ if h264_roi:
538
+ job.roi_video_path = h264_roi
539
+ else:
540
+ job.roi_video_path = roi_path if os.path.exists(roi_path) else job.roi_video_path
541
+ job.status = "completed"
542
+ except Exception as e:
543
+ with job.lock:
544
+ job.status = "error"
545
+ job.error = str(e)
546
+
547
+
548
+ @app.post("/track/async")
549
+ async def track_async(
550
+ video: UploadFile = File(...),
551
+ queries: str = Form(""),
552
+ conf: float = Form(DEFAULT_CONF),
553
+ weights: str = Form(DEFAULT_WEIGHTS),
554
+ device: str = Form(""),
555
+ fast_mode: bool = Form(False),
556
+ bandwidth_kbps: int = Form(1500),
557
+ target_fps: int = Form(15),
558
+ target_width: int = Form(0),
559
+ target_height: int = Form(0),
560
+ resolution_scale: float = Form(1.0),
561
+ ):
562
+ job_id = uuid.uuid4().hex[:12]
563
+ os.makedirs("uploads", exist_ok=True)
564
+ dst = os.path.join("uploads", f"{job_id}_{os.path.basename(video.filename or 'input.mp4')}")
565
+ data = await video.read()
566
+ with open(dst, "wb") as f:
567
+ f.write(data)
568
+
569
+ job = Job(
570
+ id=job_id,
571
+ video_path=dst,
572
+ status="tracking",
573
+ conf=float(conf),
574
+ weights=str(weights),
575
+ device=str(device).strip() or DEFAULT_DEVICE,
576
+ queries=_parse_queries(queries),
577
+ fast_mode=bool(fast_mode),
578
+ target_fps=int(target_fps or 15),
579
+ bandwidth_kbps=int(bandwidth_kbps or 1500),
580
+ target_width=int(target_width or 0),
581
+ target_height=int(target_height or 0),
582
+ )
583
+ jobs[job_id] = job
584
+
585
+ # fast preview for MJPEG
586
+ try:
587
+ cap = cv2.VideoCapture(dst)
588
+ ok, frame0 = cap.read()
589
+ cap.release()
590
+ if ok and frame0 is not None:
591
+ model = get_model(job.weights)
592
+ det0 = _yolo_detect_frame(model, frame0, conf=job.conf, queries=job.queries, device=job.device, fast_mode=job.fast_mode)
593
+ det0 = _assign_tracks(det0, job.tracker_state)
594
+ with job.lock:
595
+ job.det_by_frame[0] = det0
596
+ vis0 = _draw_boxes(frame0, det0)
597
+ ok2, jpg = cv2.imencode(".jpg", vis0, [int(cv2.IMWRITE_JPEG_QUALITY), 80])
598
+ if ok2:
599
+ with job.lock:
600
+ job.latest_jpeg = jpg.tobytes()
601
+ except Exception:
602
+ pass
603
+
604
+ t = threading.Thread(target=_process_job, args=(job,), daemon=True)
605
+ t.start()
606
+
607
+ return JSONResponse({
608
+ "job_id": job_id,
609
+ "status_url": f"/process/status/{job_id}",
610
+ "stream_url": f"/detect/stream/{job_id}",
611
+ "overlay_video_url": f"/process/video/overlay/{job_id}",
612
+ "compressed_video_url": f"/process/video/compressed/{job_id}",
613
+ "roi_video_url": f"/process/video/roi/{job_id}",
614
+ })
615
+
616
+
617
+ @app.post("/process/compress/{job_id}")
618
+ async def process_compress(
619
+ job_id: str,
620
+ bandwidth_kbps: int = Form(1500),
621
+ target_fps: int = Form(15),
622
+ target_width: int = Form(0),
623
+ target_height: int = Form(0),
624
+ resolution_scale: float = Form(1.0),
625
+ ):
626
+ job = jobs.get(job_id)
627
+ if not job:
628
+ raise HTTPException(status_code=404, detail="Unknown job_id")
629
+ with job.lock:
630
+ if job.status in ("tracking", "compressing"):
631
+ raise HTTPException(status_code=409, detail="Job still running")
632
+ if job.status not in ("tracked", "completed"):
633
+ raise HTTPException(status_code=409, detail="Tracking not ready")
634
+
635
+ t = threading.Thread(
636
+ target=_compress_job,
637
+ args=(job, int(bandwidth_kbps), int(target_fps), int(target_width), int(target_height), float(resolution_scale)),
638
+ daemon=True,
639
+ )
640
+ t.start()
641
+ return JSONResponse({"job_id": job_id, "status": "compressing"})
642
+
643
+
644
+ @app.get("/process/status/{job_id}")
645
+ def process_status(job_id: str):
646
+ job = jobs.get(job_id)
647
+ if not job:
648
+ raise HTTPException(status_code=404, detail="Unknown job_id")
649
+ with job.lock:
650
+ return {
651
+ "job_id": job.id,
652
+ "status": job.status,
653
+ "error": job.error,
654
+ "target_width": job.target_width,
655
+ "target_height": job.target_height,
656
+ "target_fps": job.target_fps,
657
+ "bandwidth_kbps": job.bandwidth_kbps,
658
+ }
659
+
660
+
661
+ def _mjpeg_generator(job: Job):
662
+ boundary = b"--frame"
663
+ while True:
664
+ with job.lock:
665
+ jpg = job.latest_jpeg
666
+ status = job.status
667
+ err = job.error
668
+ if err:
669
+ break
670
+ if jpg:
671
+ yield boundary + b"\r\n"
672
+ yield b"Content-Type: image/jpeg\r\n"
673
+ yield f"Content-Length: {len(jpg)}\r\n\r\n".encode("ascii")
674
+ yield jpg + b"\r\n"
675
+ time.sleep(0.15)
676
+ if status in ("completed", "error"):
677
+ time.sleep(0.5)
678
+ break
679
+
680
+
681
+ def _mjpeg_generator_compressed(job: Job):
682
+ boundary = b"--frame"
683
+ while True:
684
+ with job.lock:
685
+ jpg = job.latest_compressed_jpeg
686
+ status = job.status
687
+ err = job.error
688
+ if err:
689
+ break
690
+ if jpg:
691
+ yield boundary + b"\r\n"
692
+ yield b"Content-Type: image/jpeg\r\n"
693
+ yield f"Content-Length: {len(jpg)}\r\n\r\n".encode("ascii")
694
+ yield jpg + b"\r\n"
695
+ time.sleep(0.15)
696
+ if status in ("completed", "error"):
697
+ time.sleep(0.5)
698
+ break
699
+
700
+
701
+ def _mjpeg_generator_roi(job: Job):
702
+ boundary = b"--frame"
703
+ while True:
704
+ with job.lock:
705
+ jpg = job.latest_roi_jpeg
706
+ status = job.status
707
+ err = job.error
708
+ if err:
709
+ break
710
+ if jpg:
711
+ yield boundary + b"\r\n"
712
+ yield b"Content-Type: image/jpeg\r\n"
713
+ yield f"Content-Length: {len(jpg)}\r\n\r\n".encode("ascii")
714
+ yield jpg + b"\r\n"
715
+ time.sleep(0.15)
716
+ if status in ("completed", "error"):
717
+ time.sleep(0.5)
718
+ break
719
+
720
+
721
+ @app.get("/detect/stream/{job_id}")
722
+ def detect_stream(job_id: str):
723
+ job = jobs.get(job_id)
724
+ if not job:
725
+ raise HTTPException(status_code=404, detail="Unknown job_id")
726
+ return StreamingResponse(_mjpeg_generator(job), media_type="multipart/x-mixed-replace; boundary=frame")
727
+
728
+
729
+ @app.get("/process/stream/compressed/{job_id}")
730
+ def process_stream_compressed(job_id: str):
731
+ job = jobs.get(job_id)
732
+ if not job:
733
+ raise HTTPException(status_code=404, detail="Unknown job_id")
734
+ return StreamingResponse(_mjpeg_generator_compressed(job), media_type="multipart/x-mixed-replace; boundary=frame")
735
+
736
+
737
+ @app.get("/process/stream/roi/{job_id}")
738
+ def process_stream_roi(job_id: str):
739
+ job = jobs.get(job_id)
740
+ if not job:
741
+ raise HTTPException(status_code=404, detail="Unknown job_id")
742
+ return StreamingResponse(_mjpeg_generator_roi(job), media_type="multipart/x-mixed-replace; boundary=frame")
743
+
744
+
745
+ @app.get("/process/video/overlay/{job_id}")
746
+ def process_video_overlay(job_id: str):
747
+ job = jobs.get(job_id)
748
+ if not job:
749
+ raise HTTPException(status_code=404, detail="Unknown job_id")
750
+ path = job.overlay_video_path if job.overlay_video_path and os.path.exists(job.overlay_video_path) and os.path.getsize(job.overlay_video_path) > 1024 else job.video_path
751
+ return FileResponse(path, media_type="video/mp4")
752
+
753
+
754
+ @app.get("/process/video/compressed/{job_id}")
755
+ def process_video_compressed(job_id: str):
756
+ job = jobs.get(job_id)
757
+ if not job:
758
+ raise HTTPException(status_code=404, detail="Unknown job_id")
759
+ path = job.compressed_video_path if job.compressed_video_path and os.path.exists(job.compressed_video_path) and os.path.getsize(job.compressed_video_path) > 1024 else job.video_path
760
+ return FileResponse(path, media_type="video/mp4")
761
+
762
+
763
+ @app.get("/process/video/roi/{job_id}")
764
+ def process_video_roi(job_id: str):
765
+ job = jobs.get(job_id)
766
+ if not job:
767
+ raise HTTPException(status_code=404, detail="Unknown job_id")
768
+ path = job.roi_video_path if job.roi_video_path and os.path.exists(job.roi_video_path) and os.path.getsize(job.roi_video_path) > 1024 else job.video_path
769
+ return FileResponse(path, media_type="video/mp4")
770
+
771
+
772
+ if __name__ == "__main__":
773
+ import argparse
774
+ import uvicorn
775
+
776
+ p = argparse.ArgumentParser()
777
+ p.add_argument("--host", default="127.0.0.1")
778
+ p.add_argument("--port", default=8000, type=int)
779
+ p.add_argument("--weights", default=DEFAULT_WEIGHTS)
780
+ p.add_argument("--device", default=DEFAULT_DEVICE)
781
+ args = p.parse_args()
782
+
783
+ DEFAULT_WEIGHTS = args.weights
784
+ DEFAULT_DEVICE = args.device
785
+ get_model(args.weights)
786
+
787
+ host = os.environ.get("HOST", args.host or "0.0.0.0")
788
+ port = int(os.environ.get("PORT", args.port))
789
+ uvicorn.run(app, host=host, port=port)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.115.2
2
+ uvicorn[standard]==0.30.6
3
+ ultralytics==8.3.34
4
+ opencv-python-headless==4.10.0.84
5
+ numpy==1.26.4