ISR

Sleeping

App Files Files Community

Zhen Ye Claude Opus 4.6 (1M context) commited on Mar 14

Commit

3d7acee

1 Parent(s): 1bdac0d

feat(inspection): add frame extraction and mask retrieval API endpoints

Browse files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (3) hide show

app.py +2 -0
inspection/router.py +228 -0
jobs/storage.py +2 -2

app.py CHANGED Viewed

@@ -58,6 +58,7 @@ from jobs.storage import (
 )
 from models.segmenters.model_loader import get_segmenter_detector
 from pydantic import BaseModel
 logging.basicConfig(level=logging.INFO)
@@ -82,6 +83,7 @@ async def lifespan(_: FastAPI):
 app = FastAPI(title="Video Object Detection", lifespan=lifespan)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],

 )
 from models.segmenters.model_loader import get_segmenter_detector
 from pydantic import BaseModel
+from inspection.router import router as inspection_router
 logging.basicConfig(level=logging.INFO)
 app = FastAPI(title="Video Object Detection", lifespan=lifespan)
+app.include_router(inspection_router)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],

inspection/router.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""FastAPI router for Object Deep-Inspection endpoints.
+All endpoints are on-demand — they do not affect the main inference pipeline.
+Endpoints are mounted at /inspect in app.py.
+"""
+import logging
+from pathlib import Path
+from typing import Optional
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import JSONResponse, Response
+from jobs.storage import get_job_storage
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/inspect", tags=["inspection"])
+def _get_job_or_404(job_id: str):
+    """Retrieve a job from storage or raise 404."""
+    job = get_job_storage().get(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found or expired.")
+    return job
+def _validate_frame_idx(video_path: str, frame_idx: int) -> None:
+    """Raise 400 if frame_idx is out of range for the video."""
+    import cv2
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise HTTPException(status_code=404, detail="Input video not found.")
+    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    if frame_idx < 0 or frame_idx >= total:
+        raise HTTPException(
+            status_code=400,
+            detail=f"frame_idx {frame_idx} out of range [0, {total}).",
+        )
+# ── Frame extraction ──────────────────────────────────────────────
+@router.get("/frame/{job_id}/{frame_idx}")
+async def get_frame(
+    job_id: str,
+    frame_idx: int,
+    track_id: Optional[str] = Query(None, description="Track ID to crop to, e.g. 'T01'"),
+    padding: float = Query(0.15, ge=0.0, le=2.0, description="Padding ratio around bbox"),
+    max_size: int = Query(1920, ge=64, le=4096, description="Max dimension for output"),
+):
+    """Extract a raw frame from the input video, optionally cropped to a track.
+    Returns a JPEG image. If track_id is provided and found in the frame's
+    track data, the image is cropped to that track's bounding box with
+    the specified padding ratio.
+    """
+    import asyncio
+    import cv2
+    from inspection.frames import extract_frame, crop_frame, frame_to_jpeg
+    job = _get_job_or_404(job_id)
+    input_path = job.input_video_path
+    if not input_path or not Path(input_path).exists():
+        raise HTTPException(status_code=404, detail="Input video not found on disk.")
+    _validate_frame_idx(input_path, frame_idx)
+    # Extract frame in thread pool (cv2 seek can block)
+    frame = await asyncio.to_thread(extract_frame, input_path, frame_idx)
+    # Optionally crop to track bbox
+    if track_id is not None:
+        from jobs.storage import get_track_data
+        tracks = get_track_data(job_id, frame_idx)
+        target = None
+        # Parse "T01" -> 1 for instance_id matching
+        instance_id = int(track_id.replace("T", "")) if track_id.startswith("T") else int(track_id)
+        for t in tracks:
+            tid = t.get("instance_id") or t.get("track_id")
+            if tid == instance_id or tid == track_id:
+                target = t
+                break
+        if target and "bbox" in target:
+            frame = crop_frame(frame, target["bbox"], padding=padding)
+        else:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Track {track_id} not found in frame {frame_idx}.",
+            )
+    # Resize if larger than max_size
+    h, w = frame.shape[:2]
+    if max(h, w) > max_size:
+        scale = max_size / max(h, w)
+        new_w = int(w * scale)
+        new_h = int(h * scale)
+        frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    jpeg_bytes = frame_to_jpeg(frame, quality=90)
+    return Response(content=jpeg_bytes, media_type="image/jpeg")
+# ── Mask retrieval ────────────────────────────────────────────────
+@router.get("/mask/{job_id}/{frame_idx}/{track_id}")
+async def get_mask(
+    job_id: str,
+    frame_idx: int,
+    track_id: str,
+    format: str = Query("json", description="Response format: 'json' or 'png'"),
+):
+    """Get the segmentation mask for a specific object at a specific frame.
+    Only available for jobs run in segmentation mode.
+    track_id is a string like "T01".
+    Returns either:
+    - JSON with RLE-encoded mask, bbox, area, label, width, height, color, mask_format (default)
+    - PNG image of the mask (white on black) if format=png
+    """
+    from jobs.storage import get_mask_data, get_track_data
+    from inspection.masks import mask_area, rle_decode, mask_to_png_bytes
+    job = _get_job_or_404(job_id)
+    if job.mode != "segmentation":
+        raise HTTPException(
+            status_code=400,
+            detail="Mask data is only available for segmentation mode jobs.",
+        )
+    # Parse track_id: accept "T01" or "1", store as int internally
+    instance_id = int(track_id.replace("T", "")) if isinstance(track_id, str) and track_id.startswith("T") else int(track_id)
+    rle = get_mask_data(job_id, frame_idx, instance_id)
+    if rle is None:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No mask found for track {track_id} at frame {frame_idx}.",
+        )
+    if format == "png":
+        mask = rle_decode(rle)
+        png_bytes = mask_to_png_bytes(mask)
+        return Response(content=png_bytes, media_type="image/png")
+    # JSON response: include track metadata per unified contract
+    label = ""
+    bbox = None
+    tracks = get_track_data(job_id, frame_idx)
+    for t in tracks:
+        tid = t.get("instance_id")
+        if tid == instance_id:
+            label = t.get("label", "")
+            bbox = t.get("bbox")
+            break
+    h, w = rle["size"]
+    # Deterministic color per track ID
+    TRACK_COLORS = [
+        [255, 0, 128], [0, 255, 128], [128, 0, 255], [255, 128, 0],
+        [0, 128, 255], [128, 255, 0], [255, 0, 0], [0, 255, 0],
+        [0, 0, 255], [255, 255, 0], [255, 0, 255], [0, 255, 255],
+    ]
+    color = TRACK_COLORS[instance_id % len(TRACK_COLORS)]
+    return JSONResponse({
+        "track_id": track_id,
+        "frame_idx": frame_idx,
+        "label": label,
+        "width": w,
+        "height": h,
+        "mask_format": "rle",
+        "rle": rle,
+        "bbox": bbox,
+        "area": mask_area(rle),
+        "color": color,
+    })
+@router.get("/masks/{job_id}/{frame_idx}")
+async def get_all_masks(job_id: str, frame_idx: int):
+    """Get all segmentation masks for a frame.
+    Returns a list of {track_id, label, rle, bbox, area} for every
+    object detected in the given frame.
+    """
+    from jobs.storage import get_all_masks_for_frame, get_track_data
+    from inspection.masks import mask_area
+    job = _get_job_or_404(job_id)
+    if job.mode != "segmentation":
+        raise HTTPException(
+            status_code=400,
+            detail="Mask data is only available for segmentation mode jobs.",
+        )
+    masks = get_all_masks_for_frame(job_id, frame_idx)
+    if not masks:
+        return JSONResponse([])
+    # Enrich with track metadata
+    tracks = get_track_data(job_id, frame_idx)
+    track_lookup = {}
+    for t in tracks:
+        tid = t.get("instance_id")
+        if tid is not None:
+            track_lookup[tid] = t
+    results = []
+    for tid, rle in masks.items():
+        t = track_lookup.get(tid, {})
+        results.append({
+            "track_id": tid,
+            "frame_idx": frame_idx,
+            "label": t.get("label", ""),
+            "rle": rle,
+            "bbox": t.get("bbox"),
+            "area": mask_area(rle),
+        })
+    return JSONResponse(results)

jobs/storage.py CHANGED Viewed

@@ -87,7 +87,7 @@ class JobStorage:
             key = f"{frame_idx}:{track_id}"
             self._mask_data[job_id][key] = rle
-    def get_mask_data(self, job_id: str, frame_idx: int, track_id: int) -> dict | None:
         """Retrieve RLE mask for a specific object at a specific frame."""
         with self._lock:
             key = f"{frame_idx}:{track_id}"
@@ -162,7 +162,7 @@ def get_latest_frame(job_id: str):
 def set_mask_data(job_id: str, frame_idx: int, track_id: int, rle: dict) -> None:
     get_job_storage().set_mask_data(job_id, frame_idx, track_id, rle)
-def get_mask_data(job_id: str, frame_idx: int, track_id: int) -> dict | None:
     return get_job_storage().get_mask_data(job_id, frame_idx, track_id)
 def get_all_masks_for_frame(job_id: str, frame_idx: int) -> dict:

             key = f"{frame_idx}:{track_id}"
             self._mask_data[job_id][key] = rle
+    def get_mask_data(self, job_id: str, frame_idx: int, track_id: int) -> Optional[dict]:
         """Retrieve RLE mask for a specific object at a specific frame."""
         with self._lock:
             key = f"{frame_idx}:{track_id}"
 def set_mask_data(job_id: str, frame_idx: int, track_id: int, rle: dict) -> None:
     get_job_storage().set_mask_data(job_id, frame_idx, track_id, rle)
+def get_mask_data(job_id: str, frame_idx: int, track_id: int) -> Optional[dict]:
     return get_job_storage().get_mask_data(job_id, frame_idx, track_id)
 def get_all_masks_for_frame(job_id: str, frame_idx: int) -> dict: