Spaces:

BiasLab2025
/

perception

Sleeping

App Files Files Community

Zhen Ye commited on 24 days ago

Commit

52536ca

1 Parent(s): 537aca9

added async first frame/video detection

Browse files

Files changed (7) hide show

app.py +173 -2
demo.html +64 -14
inference.py +61 -6
jobs/__init__.py +1 -0
jobs/background.py +48 -0
jobs/models.py +27 -0
jobs/storage.py +72 -0

app.py CHANGED Viewed

@@ -1,18 +1,49 @@
 import logging
 import os
 import tempfile
 from pathlib import Path
 from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 import uvicorn
-from inference import run_inference, run_segmentation
 logging.basicConfig(level=logging.INFO)
-app = FastAPI(title="Video Object Detection")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -36,6 +67,13 @@ def _save_upload_to_tmp(upload: UploadFile) -> str:
     return path
 def _safe_delete(path: str) -> None:
     """Safely delete a file, ignoring errors."""
     try:
@@ -54,6 +92,14 @@ def _schedule_cleanup(background_tasks: BackgroundTasks, path: str) -> None:
     background_tasks.add_task(_cleanup)
 @app.get("/", response_class=HTMLResponse)
 async def demo_page() -> str:
     """Serve the demo page."""
@@ -198,5 +244,130 @@ async def detect_endpoint(
     return response
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

+import asyncio
 import logging
 import os
+import shutil
 import tempfile
+import uuid
+from contextlib import asynccontextmanager
+from datetime import timedelta
 from pathlib import Path
+import cv2
 from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 import uvicorn
+from inference import process_first_frame, run_inference, run_segmentation
+from jobs.background import process_video_async
+from jobs.models import JobInfo, JobStatus
+from jobs.storage import (
+    get_first_frame_path,
+    get_input_video_path,
+    get_job_directory,
+    get_job_storage,
+    get_output_video_path,
+)
 logging.basicConfig(level=logging.INFO)
+async def _periodic_cleanup() -> None:
+    while True:
+        await asyncio.sleep(600)
+        get_job_storage().cleanup_expired(timedelta(hours=1))
+@asynccontextmanager
+async def lifespan(_: FastAPI):
+    cleanup_task = asyncio.create_task(_periodic_cleanup())
+    try:
+        yield
+    finally:
+        cleanup_task.cancel()
+app = FastAPI(title="Video Object Detection", lifespan=lifespan)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     return path
+def _save_upload_to_path(upload: UploadFile, path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "wb") as buffer:
+        data = upload.file.read()
+        buffer.write(data)
 def _safe_delete(path: str) -> None:
     """Safely delete a file, ignoring errors."""
     try:
     background_tasks.add_task(_cleanup)
+def _default_queries_for_mode(mode: str) -> list[str]:
+    if mode == "segmentation":
+        return ["object"]
+    if mode == "drone_detection":
+        return ["drone"]
+    return ["person", "car", "truck", "motorcycle", "bicycle", "bus", "train", "airplane"]
 @app.get("/", response_class=HTMLResponse)
 async def demo_page() -> str:
     """Serve the demo page."""
     return response
+@app.post("/detect/async")
+async def detect_async_endpoint(
+    video: UploadFile = File(...),
+    mode: str = Form(...),
+    queries: str = Form(""),
+    detector: str = Form("hf_yolov8"),
+    segmenter: str = Form("sam3"),
+):
+    if mode not in VALID_MODES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid mode '{mode}'. Must be one of: {', '.join(VALID_MODES)}",
+        )
+    if video is None:
+        raise HTTPException(status_code=400, detail="Video file is required.")
+    job_id = uuid.uuid4().hex
+    job_dir = get_job_directory(job_id)
+    input_path = get_input_video_path(job_id)
+    output_path = get_output_video_path(job_id)
+    first_frame_path = get_first_frame_path(job_id)
+    try:
+        _save_upload_to_path(video, input_path)
+    except Exception:
+        logging.exception("Failed to save uploaded file.")
+        raise HTTPException(status_code=500, detail="Failed to save uploaded video.")
+    finally:
+        await video.close()
+    query_list = [q.strip() for q in queries.split(",") if q.strip()]
+    if not query_list:
+        query_list = _default_queries_for_mode(mode)
+    detector_name = detector
+    if mode == "drone_detection":
+        detector_name = "drone_yolo"
+    try:
+        processed_frame, detections = process_first_frame(
+            str(input_path),
+            query_list,
+            mode=mode,
+            detector_name=detector_name,
+            segmenter_name=segmenter,
+        )
+        cv2.imwrite(str(first_frame_path), processed_frame)
+    except Exception:
+        logging.exception("First-frame processing failed.")
+        shutil.rmtree(job_dir, ignore_errors=True)
+        raise HTTPException(status_code=500, detail="Failed to process first frame.")
+    job = JobInfo(
+        job_id=job_id,
+        status=JobStatus.PROCESSING,
+        mode=mode,
+        queries=query_list,
+        detector_name=detector_name,
+        segmenter_name=segmenter,
+        input_video_path=str(input_path),
+        output_video_path=str(output_path),
+        first_frame_path=str(first_frame_path),
+        first_frame_detections=detections,
+    )
+    get_job_storage().create(job)
+    asyncio.create_task(process_video_async(job_id))
+    return {
+        "job_id": job_id,
+        "first_frame_url": f"/detect/first-frame/{job_id}",
+        "status_url": f"/detect/status/{job_id}",
+        "video_url": f"/detect/video/{job_id}",
+        "status": job.status.value,
+        "first_frame_detections": detections,
+    }
+@app.get("/detect/status/{job_id}")
+async def detect_status(job_id: str):
+    job = get_job_storage().get(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found or expired.")
+    return {
+        "job_id": job.job_id,
+        "status": job.status.value,
+        "created_at": job.created_at.isoformat(),
+        "completed_at": job.completed_at.isoformat() if job.completed_at else None,
+        "error": job.error,
+    }
+@app.get("/detect/first-frame/{job_id}")
+async def detect_first_frame(job_id: str):
+    job = get_job_storage().get(job_id)
+    if not job or not Path(job.first_frame_path).exists():
+        raise HTTPException(status_code=404, detail="First frame not found.")
+    return FileResponse(
+        path=job.first_frame_path,
+        media_type="image/jpeg",
+        filename="first_frame.jpg",
+    )
+@app.get("/detect/video/{job_id}")
+async def detect_video(job_id: str):
+    job = get_job_storage().get(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found or expired.")
+    if job.status == JobStatus.FAILED:
+        raise HTTPException(status_code=500, detail=f"Job failed: {job.error}")
+    if job.status == JobStatus.PROCESSING:
+        return JSONResponse(
+            status_code=202,
+            content={"detail": "Video still processing", "status": "processing"},
+        )
+    if not job.output_video_path or not Path(job.output_video_path).exists():
+        raise HTTPException(status_code=404, detail="Video file not found.")
+    return FileResponse(
+        path=job.output_video_path,
+        media_type="video/mp4",
+        filename="processed.mp4",
+    )
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

demo.html CHANGED Viewed

@@ -231,6 +231,13 @@
             background: #000;
         }
         .download-btn {
             margin-top: 12px;
             padding: 10px 16px;
@@ -381,6 +388,12 @@
             <div class="section hidden" id="resultsSection">
                 <div class="section-title">Results</div>
                 <div class="results-grid">
                     <div class="video-card">
                         <div class="video-card-header">Original Video</div>
                         <div class="video-card-body">
@@ -421,7 +434,9 @@
         const resultsSection = document.getElementById('resultsSection');
         const originalVideo = document.getElementById('originalVideo');
         const processedVideo = document.getElementById('processedVideo');
         const downloadBtn = document.getElementById('downloadBtn');
         // Mode selection handler
         modeCards.forEach(card => {
             card.addEventListener('click', (e) => {
@@ -483,6 +498,13 @@
             processBtn.disabled = true;
             loading.classList.add('show');
             resultsSection.classList.add('hidden');
             // Prepare form data
             const formData = new FormData();
@@ -493,27 +515,55 @@
             formData.append('segmenter', document.getElementById('segmenter').value);
             try {
-                const response = await fetch('/detect', {
                     method: 'POST',
                     body: formData
                 });
-                if (response.ok) {
-                    const contentType = response.headers.get('content-type') || '';
-                    if (contentType.includes('application/json')) {
-                        const data = await response.json();
-                        alert(data.message || 'Request completed.');
-                        return;
-                    }
-                    const blob = await response.blob();
-                    const videoUrl = URL.createObjectURL(blob);
-                    processedVideo.src = videoUrl;
-                    downloadBtn.href = videoUrl;
-                    resultsSection.classList.remove('hidden');
-                } else {
                     const error = await response.json();
                     alert(`Error: ${error.detail || error.error || 'Processing failed'}`);
                 }
             } catch (error) {
                 console.error('Error:', error);
                 alert('Network error: ' + error.message);

             background: #000;
         }
+        .frame-preview {
+            width: 100%;
+            border-radius: 8px;
+            background: #f3f4f6;
+            display: block;
+        }
         .download-btn {
             margin-top: 12px;
             padding: 10px 16px;
             <div class="section hidden" id="resultsSection">
                 <div class="section-title">Results</div>
                 <div class="results-grid">
+                    <div class="video-card">
+                        <div class="video-card-header">First Frame</div>
+                        <div class="video-card-body">
+                            <img id="firstFrameImage" class="frame-preview" alt="First frame preview">
+                        </div>
+                    </div>
                     <div class="video-card">
                         <div class="video-card-header">Original Video</div>
                         <div class="video-card-body">
         const resultsSection = document.getElementById('resultsSection');
         const originalVideo = document.getElementById('originalVideo');
         const processedVideo = document.getElementById('processedVideo');
+        const firstFrameImage = document.getElementById('firstFrameImage');
         const downloadBtn = document.getElementById('downloadBtn');
+        let statusPoller = null;
         // Mode selection handler
         modeCards.forEach(card => {
             card.addEventListener('click', (e) => {
             processBtn.disabled = true;
             loading.classList.add('show');
             resultsSection.classList.add('hidden');
+            if (statusPoller) {
+                clearInterval(statusPoller);
+                statusPoller = null;
+            }
+            firstFrameImage.removeAttribute('src');
+            processedVideo.removeAttribute('src');
+            downloadBtn.removeAttribute('href');
             // Prepare form data
             const formData = new FormData();
             formData.append('segmenter', document.getElementById('segmenter').value);
             try {
+                const response = await fetch('/detect/async', {
                     method: 'POST',
                     body: formData
                 });
+                if (!response.ok) {
                     const error = await response.json();
                     alert(`Error: ${error.detail || error.error || 'Processing failed'}`);
+                    return;
                 }
+                const data = await response.json();
+                firstFrameImage.src = `${data.first_frame_url}?t=${Date.now()}`;
+                resultsSection.classList.remove('hidden');
+                statusPoller = setInterval(async () => {
+                    try {
+                        const statusResponse = await fetch(data.status_url);
+                        if (!statusResponse.ok) {
+                            clearInterval(statusPoller);
+                            statusPoller = null;
+                            alert('Job expired. Please re-upload the video.');
+                            return;
+                        }
+                        const statusData = await statusResponse.json();
+                        if (statusData.status === 'completed') {
+                            clearInterval(statusPoller);
+                            statusPoller = null;
+                            const videoResponse = await fetch(data.video_url);
+                            if (!videoResponse.ok) {
+                                alert('Failed to fetch processed video.');
+                                return;
+                            }
+                            const blob = await videoResponse.blob();
+                            const videoUrl = URL.createObjectURL(blob);
+                            processedVideo.src = videoUrl;
+                            downloadBtn.href = videoUrl;
+                        } else if (statusData.status === 'failed') {
+                            clearInterval(statusPoller);
+                            statusPoller = null;
+                            alert(statusData.error || 'Processing failed.');
+                        }
+                    } catch (pollError) {
+                        clearInterval(statusPoller);
+                        statusPoller = null;
+                        console.error('Polling error:', pollError);
+                        alert('Polling error: ' + pollError.message);
+                    }
+                }, 2000);
             } catch (error) {
                 console.error('Error:', error);
                 alert('Network error: ' + error.message);

inference.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
-from typing import Any, Dict, List, Optional, Sequence
 import cv2
 import numpy as np
@@ -71,6 +72,20 @@ def _build_detection_records(
     return detections
 def infer_frame(
     frame: np.ndarray,
     queries: Sequence[str],
@@ -79,10 +94,12 @@ def infer_frame(
     detector = load_detector(detector_name)
     text_queries = list(queries) or ["object"]
     try:
-        result = detector.predict(frame, text_queries)
-        detections = _build_detection_records(
-            result.boxes, result.scores, result.labels, text_queries, result.label_names
-        )
     except Exception:
         logging.exception("Inference failed for queries %s", text_queries)
         raise
@@ -95,10 +112,48 @@ def infer_segmentation_frame(
     segmenter_name: Optional[str] = None,
 ) -> tuple[np.ndarray, Any]:
     segmenter = load_segmenter(segmenter_name)
-    result = segmenter.predict(frame, text_prompts=text_queries)
     return draw_masks(frame, result.masks), result
 def run_inference(
     input_video_path: str,
     output_video_path: str,

 import logging
+from threading import RLock
+from typing import Any, Dict, List, Optional, Sequence, Tuple
 import cv2
 import numpy as np
     return detections
+_MODEL_LOCKS: Dict[str, RLock] = {}
+_MODEL_LOCKS_GUARD = RLock()
+def _get_model_lock(kind: str, name: str) -> RLock:
+    key = f"{kind}:{name}"
+    with _MODEL_LOCKS_GUARD:
+        lock = _MODEL_LOCKS.get(key)
+        if lock is None:
+            lock = RLock()
+            _MODEL_LOCKS[key] = lock
+        return lock
 def infer_frame(
     frame: np.ndarray,
     queries: Sequence[str],
     detector = load_detector(detector_name)
     text_queries = list(queries) or ["object"]
     try:
+        lock = _get_model_lock("detector", detector.name)
+        with lock:
+            result = detector.predict(frame, text_queries)
+            detections = _build_detection_records(
+                result.boxes, result.scores, result.labels, text_queries, result.label_names
+            )
     except Exception:
         logging.exception("Inference failed for queries %s", text_queries)
         raise
     segmenter_name: Optional[str] = None,
 ) -> tuple[np.ndarray, Any]:
     segmenter = load_segmenter(segmenter_name)
+    lock = _get_model_lock("segmenter", segmenter.name)
+    with lock:
+        result = segmenter.predict(frame, text_prompts=text_queries)
     return draw_masks(frame, result.masks), result
+def extract_first_frame(video_path: str) -> Tuple[np.ndarray, float, int, int]:
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise ValueError("Unable to open video.")
+    fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    success, frame = cap.read()
+    cap.release()
+    if not success or frame is None:
+        raise ValueError("Video decode produced zero frames.")
+    return frame, fps, width, height
+def process_first_frame(
+    video_path: str,
+    queries: List[str],
+    mode: str,
+    detector_name: Optional[str] = None,
+    segmenter_name: Optional[str] = None,
+) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
+    frame, _, _, _ = extract_first_frame(video_path)
+    if mode == "segmentation":
+        processed, _ = infer_segmentation_frame(
+            frame, text_queries=queries, segmenter_name=segmenter_name
+        )
+        return processed, []
+    processed, detections = infer_frame(
+        frame, queries, detector_name=detector_name
+    )
+    return processed, detections
 def run_inference(
     input_video_path: str,
     output_video_path: str,

jobs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Job management package for async detection."""

jobs/background.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import asyncio
+import logging
+from datetime import datetime
+from jobs.models import JobStatus
+from jobs.storage import get_job_storage
+from inference import run_inference, run_segmentation
+async def process_video_async(job_id: str) -> None:
+    storage = get_job_storage()
+    job = storage.get(job_id)
+    if not job:
+        return
+    try:
+        if job.mode == "segmentation":
+            output_path = await asyncio.to_thread(
+                run_segmentation,
+                job.input_video_path,
+                job.output_video_path,
+                job.queries,
+                None,
+                job.segmenter_name,
+            )
+        else:
+            output_path = await asyncio.to_thread(
+                run_inference,
+                job.input_video_path,
+                job.output_video_path,
+                job.queries,
+                None,
+                job.detector_name,
+            )
+        storage.update(
+            job_id,
+            status=JobStatus.COMPLETED,
+            completed_at=datetime.utcnow(),
+            output_video_path=output_path,
+        )
+    except Exception as exc:
+        logging.exception("Background processing failed for job %s", job_id)
+        storage.update(
+            job_id,
+            status=JobStatus.FAILED,
+            completed_at=datetime.utcnow(),
+            error=str(exc),
+        )

jobs/models.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional
+class JobStatus(str, Enum):
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+@dataclass
+class JobInfo:
+    job_id: str
+    status: JobStatus
+    mode: str
+    queries: List[str]
+    detector_name: Optional[str]
+    segmenter_name: Optional[str]
+    input_video_path: str
+    output_video_path: Optional[str]
+    first_frame_path: str
+    created_at: datetime = field(default_factory=datetime.utcnow)
+    completed_at: Optional[datetime] = None
+    error: Optional[str] = None
+    first_frame_detections: List[Dict[str, Any]] = field(default_factory=list)

jobs/storage.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import shutil
+from datetime import datetime, timedelta
+from pathlib import Path
+from threading import RLock
+from typing import Dict, Optional
+from jobs.models import JobInfo, JobStatus
+_BASE_DIR = Path("/tmp/detection_jobs")
+def get_job_directory(job_id: str) -> Path:
+    return _BASE_DIR / job_id
+def get_input_video_path(job_id: str) -> Path:
+    return get_job_directory(job_id) / "input.mp4"
+def get_output_video_path(job_id: str) -> Path:
+    return get_job_directory(job_id) / "output.mp4"
+def get_first_frame_path(job_id: str) -> Path:
+    return get_job_directory(job_id) / "first_frame.jpg"
+class JobStorage:
+    def __init__(self) -> None:
+        self._jobs: Dict[str, JobInfo] = {}
+        self._lock = RLock()
+    def create(self, job: JobInfo) -> None:
+        with self._lock:
+            self._jobs[job.job_id] = job
+    def get(self, job_id: str) -> Optional[JobInfo]:
+        with self._lock:
+            return self._jobs.get(job_id)
+    def update(self, job_id: str, **updates) -> None:
+        with self._lock:
+            job = self._jobs.get(job_id)
+            if not job:
+                return
+            for key, value in updates.items():
+                setattr(job, key, value)
+    def delete(self, job_id: str) -> None:
+        with self._lock:
+            self._jobs.pop(job_id, None)
+        shutil.rmtree(get_job_directory(job_id), ignore_errors=True)
+    def cleanup_expired(self, max_age: timedelta) -> None:
+        cutoff = datetime.utcnow() - max_age
+        to_delete = []
+        with self._lock:
+            for job_id, job in self._jobs.items():
+                if job.status in {JobStatus.COMPLETED, JobStatus.FAILED} and job.created_at < cutoff:
+                    to_delete.append(job_id)
+        for job_id in to_delete:
+            self.delete(job_id)
+_STORAGE: Optional[JobStorage] = None
+def get_job_storage() -> JobStorage:
+    global _STORAGE
+    if _STORAGE is None:
+        _STORAGE = JobStorage()
+    return _STORAGE