Spaces:

BV-Tech-Team
/

VisioTrack

Sleeping

App Files Files Community

azizerorahman commited on Nov 29, 2025

Commit

61ed6d9

verified ·

1 Parent(s): 1842054

Update app.py

Browse files

Files changed (1) hide show

app.py +369 -173

app.py CHANGED Viewed

@@ -1,37 +1,27 @@
-#!/usr/bin/env python
-"""
-FastAPI Server for VisioTrack on Hugging Face Spaces
-REST API for object tracking in videos
-"""
-from fastapi import FastAPI, File, UploadFile, Form, HTTPException
-from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 import cv2
 import torch
-import numpy as np
 import tempfile
 import os
 import subprocess
 import shutil
-from pathlib import Path
 from siamrpn import TrackerSiamRPN
 import logging
-# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize FastAPI app
-app = FastAPI(
-    title="VisioTrack API",
-    description="Object tracking API using SiamRPN",
-    version="1.0.0",
-    docs_url="/",  # Swagger UI at root
-    redoc_url="/redoc"
-)
-# Enable CORS for frontend integration
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -40,90 +30,68 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Model configuration
 MODEL_PATH = "model.pth"
 tracker = None
 device = None
 def load_tracker():
-    """Load the SiamRPN tracker with GPU support"""
     global tracker, device
     if tracker is None:
-        if not os.path.exists(MODEL_PATH):
-            raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found!")
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         tracker = TrackerSiamRPN(net_path=MODEL_PATH)
         logger.info(f"✓ Tracker loaded on {device}")
     return tracker
-def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int,
-                          bbox_w: int, bbox_h: int):
-    """
-    Process video with object tracking
-    Args:
-        video_path: Path to input video
-        bbox_x, bbox_y, bbox_w, bbox_h: Bounding box coordinates
-    Returns:
-        tuple: (output_path, message, metadata)
-    """
     try:
         tracker_instance = load_tracker()
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
-            return None, "Could not open video file", None
-        # Get video properties
-        fps = int(cap.get(cv2.CAP_PROP_FPS))
-        if fps == 0:
-            fps = 30
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        logger.info(f"Video: {width}x{height} @ {fps}fps, {total_frames} frames")
         ret, frame = cap.read()
         if not ret:
-            return None, "Could not read first frame", None
-        # Validate bounding box
         if bbox_w <= 0 or bbox_h <= 0:
-            return None, "Invalid bounding box dimensions", None
-        if (bbox_x < 0 or bbox_y < 0 or
-            bbox_x + bbox_w > width or bbox_y + bbox_h > height):
-            return None, f"Bounding box out of bounds (frame: {width}x{height})", None
         bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
-        # Initialize tracker
         tracker_instance.init(frame, bbox)
-        # Create temporary output file
         temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
         temp_output.close()
-        # Use XVID codec for initial write
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
         writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
         if not writer.isOpened():
-            return None, "Could not create video writer", None
-        # Draw first frame with initial bbox
         x, y, w, h = [int(v) for v in bbox]
         cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
-        cv2.putText(frame, 'Frame: 1', (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
         writer.write(frame)
-        # Process remaining frames
         frame_count = 1
         while True:
             ret, frame = cap.read()
             if not ret:
@@ -133,98 +101,357 @@ def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int,
             # Update tracker
             bbox = tracker_instance.update(frame)
-            # Draw tracking result
             x, y, w, h = [int(v) for v in bbox]
             x = max(0, min(x, width - 1))
             y = max(0, min(y, height - 1))
             w = max(1, min(w, width - x))
             h = max(1, min(h, height - y))
             cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
-            cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
             writer.write(frame)
             if frame_count % 30 == 0:
-                logger.info(f"Processed {frame_count}/{total_frames} frames")
         cap.release()
         writer.release()
-        # Re-encode with H.264 for browser compatibility
         final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
         final_output.close()
         try:
-            logger.info("Re-encoding video for browser compatibility...")
-            subprocess.run([
                 'ffmpeg', '-i', temp_output.name,
-                '-c:v', 'libx264',
-                '-preset', 'fast',
-                '-crf', '23',
-                '-pix_fmt', 'yuv420p',
                 '-movflags', '+faststart',
-                '-y',
-                final_output.name
-            ], check=True, capture_output=True, text=True)
             os.unlink(temp_output.name)
-            logger.info("✓ Video re-encoded successfully")
-        except (subprocess.CalledProcessError, FileNotFoundError) as e:
-            logger.warning(f"FFmpeg encoding failed: {e}, using original")
             shutil.move(temp_output.name, final_output.name)
-        metadata = {
-            'frames_processed': frame_count,
-            'resolution': f"{width}x{height}",
-            'fps': fps,
-            'device': str(device)
-        }
-        return final_output.name, f"Successfully tracked {frame_count} frames", metadata
     except Exception as e:
-        logger.error(f"Tracking error: {str(e)}")
-        return None, f"Error: {str(e)}", None
-@app.get("/health")
-async def health_check():
-    """
-    Health check endpoint (required by HF Spaces)
     """
-    return JSONResponse({
-        'status': 'healthy',
-        'gpu_available': torch.cuda.is_available(),
-        'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
-        'model_loaded': tracker is not None
-    })
 @app.post("/track")
 async def track_video(
-    video: UploadFile = File(..., description="Video file to process"),
-    bbox_x: int = Form(..., description="X coordinate of bounding box"),
-    bbox_y: int = Form(..., description="Y coordinate of bounding box"),
-    bbox_w: int = Form(..., description="Width of bounding box"),
-    bbox_h: int = Form(..., description="Height of bounding box")
 ):
-    """
-    Main tracking endpoint
-    Upload a video and bounding box coordinates to track an object.
-    Returns the processed video with tracking visualization.
-    """
     temp_input = None
     output_path = None
     try:
-        # Validate file type
-        if not video.content_type.startswith('video/'):
-            raise HTTPException(status_code=400, detail="File must be a video")
         # Save uploaded video
         temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
@@ -232,97 +459,66 @@ async def track_video(
         temp_input.write(content)
         temp_input.close()
-        logger.info(f"Processing video: {video.filename}")
-        logger.info(f"Bounding box: ({bbox_x}, {bbox_y}, {bbox_w}, {bbox_h})")
-        # Process video
-        output_path, message, metadata = process_video_tracking(
-            temp_input.name, bbox_x, bbox_y, bbox_w, bbox_h
         )
         if output_path is None:
-            raise HTTPException(status_code=400, detail=message)
-        # Return processed video
         return FileResponse(
             output_path,
             media_type='video/mp4',
             filename='tracked_video.mp4',
             headers={
-                'X-Frames-Processed': str(metadata['frames_processed']),
-                'X-Resolution': metadata['resolution'],
-                'X-FPS': str(metadata['fps'])
-            }
         )
     except HTTPException:
         raise
     except Exception as e:
-        logger.error(f"Error: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
     finally:
-        # Cleanup temporary files
         if temp_input and os.path.exists(temp_input.name):
             try:
                 os.unlink(temp_input.name)
             except:
                 pass
-@app.get("/info")
-async def get_info():
-    """
-    Get API information and usage instructions
-    """
-    return {
-        'name': 'VisioTrack API',
-        'version': '1.0.0',
-        'description': 'Object tracking API using SiamRPN',
-        'endpoints': {
-            '/health': 'Health check',
-            '/track': 'Track object in video (POST with multipart/form-data)',
-            '/info': 'API information',
-            '/': 'Interactive API documentation (Swagger UI)'
-        },
-        'usage': {
-            'method': 'POST',
-            'endpoint': '/track',
-            'content_type': 'multipart/form-data',
-            'parameters': {
-                'video': 'Video file',
-                'bbox_x': 'X coordinate (int)',
-                'bbox_y': 'Y coordinate (int)',
-                'bbox_w': 'Width (int)',
-                'bbox_h': 'Height (int)'
-            }
-        },
-        'example_curl': '''
-curl -X POST "https://your-space.hf.space/track" \\
-  -F "video=@video.mp4" \\
-  -F "bbox_x=100" \\
-  -F "bbox_y=100" \\
-  -F "bbox_w=200" \\
-  -F "bbox_h=200" \\
-  -o tracked_video.mp4
-        '''
-    }
 @app.on_event("startup")
-async def startup_event():
-    """Load model on startup"""
     logger.info("=" * 50)
-    logger.info("VisioTrack FastAPI Server Starting...")
     logger.info("=" * 50)
-    try:
-        load_tracker()
-        logger.info("✓ Model loaded successfully")
-    except Exception as e:
-        logger.error(f"✗ Failed to load model: {e}")
     logger.info("=" * 50)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 import cv2
 import torch
 import tempfile
 import os
 import subprocess
 import shutil
 from siamrpn import TrackerSiamRPN
 import logging
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Thread pool for blocking operations
+executor = ThreadPoolExecutor(max_workers=2)
+# Initialize FastAPI
+app = FastAPI(title="VisioTrack API", version="1.0.0")
+# CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
 MODEL_PATH = "model.pth"
 tracker = None
 device = None
 def load_tracker():
     global tracker, device
     if tracker is None:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         tracker = TrackerSiamRPN(net_path=MODEL_PATH)
         logger.info(f"✓ Tracker loaded on {device}")
     return tracker
+def process_video_sync(video_path: str, bbox_x: int, bbox_y: int, bbox_w: int, bbox_h: int):
+    """Synchronous video processing function"""
     try:
         tracker_instance = load_tracker()
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
+            return None, "Cannot open video"
+        fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        logger.info(f"Video: {width}x{height}, {fps}fps, {total_frames} frames")
         ret, frame = cap.read()
         if not ret:
+            return None, "Cannot read first frame"
+        # Validate bbox
         if bbox_w <= 0 or bbox_h <= 0:
+            return None, "Invalid bounding box"
+        if bbox_x < 0 or bbox_y < 0 or bbox_x + bbox_w > width or bbox_y + bbox_h > height:
+            return None, f"Bounding box out of bounds. Video size: {width}x{height}"
         bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
         tracker_instance.init(frame, bbox)
+        # Create output
         temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
         temp_output.close()
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
         if not writer.isOpened():
+            return None, "Cannot create video writer"
+        # Draw first frame
         x, y, w, h = [int(v) for v in bbox]
         cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
+        cv2.putText(frame, 'Frame: 1', (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
         writer.write(frame)
         frame_count = 1
+        # Process remaining frames
         while True:
             ret, frame = cap.read()
             if not ret:
             # Update tracker
             bbox = tracker_instance.update(frame)
             x, y, w, h = [int(v) for v in bbox]
+            # Clamp values
             x = max(0, min(x, width - 1))
             y = max(0, min(y, height - 1))
             w = max(1, min(w, width - x))
             h = max(1, min(h, height - y))
             cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
+            cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
             writer.write(frame)
+            # Log progress
             if frame_count % 30 == 0:
+                logger.info(f"Progress: {frame_count}/{total_frames}")
         cap.release()
         writer.release()
+        logger.info(f"✓ Processed {frame_count} frames")
+        # Re-encode with ffmpeg
         final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
         final_output.close()
         try:
+            logger.info("Re-encoding video...")
+            result = subprocess.run([
                 'ffmpeg', '-i', temp_output.name,
+                '-c:v', 'libx264', '-preset', 'ultrafast',
+                '-crf', '23', '-pix_fmt', 'yuv420p',
                 '-movflags', '+faststart',
+                '-y', final_output.name
+            ], check=True, capture_output=True, timeout=300)  # 5 min timeout
             os.unlink(temp_output.name)
+            logger.info("✓ Video re-encoded")
+        except subprocess.TimeoutExpired:
+            logger.warning("FFmpeg timeout, using original")
+            shutil.move(temp_output.name, final_output.name)
+        except Exception as e:
+            logger.warning(f"FFmpeg failed: {e}, using original")
             shutil.move(temp_output.name, final_output.name)
+        return final_output.name, frame_count
     except Exception as e:
+        logger.error(f"Processing error: {e}")
+        return None, str(e)
+@app.get("/", response_class=HTMLResponse)
+async def home():
+    return """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>VisioTrack API</title>
+        <style>
+            * { margin: 0; padding: 0; box-sizing: border-box; }
+            body {
+                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                min-height: 100vh;
+                padding: 20px;
+            }
+            .container {
+                max-width: 900px;
+                margin: 0 auto;
+                background: white;
+                padding: 40px;
+                border-radius: 15px;
+                box-shadow: 0 10px 40px rgba(0,0,0,0.2);
+            }
+            h1 { color: #667eea; margin-bottom: 10px; }
+            .status {
+                background: #d4edda;
+                color: #155724;
+                padding: 15px;
+                border-radius: 8px;
+                margin: 20px 0;
+                border: 1px solid #c3e6cb;
+            }
+            .form-group {
+                margin-bottom: 15px;
+            }
+            label {
+                display: block;
+                margin-bottom: 5px;
+                font-weight: 600;
+                color: #333;
+            }
+            input[type="file"] {
+                width: 100%;
+                padding: 10px;
+                border: 2px dashed #667eea;
+                border-radius: 8px;
+                background: #f8f9fa;
+            }
+            input[type="number"] {
+                width: 100%;
+                padding: 10px;
+                border: 2px solid #e0e0e0;
+                border-radius: 8px;
+                font-size: 16px;
+            }
+            .bbox-grid {
+                display: grid;
+                grid-template-columns: repeat(2, 1fr);
+                gap: 15px;
+                margin: 15px 0;
+            }
+            button {
+                width: 100%;
+                padding: 15px;
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                color: white;
+                border: none;
+                border-radius: 8px;
+                font-size: 18px;
+                font-weight: 600;
+                cursor: pointer;
+                margin-top: 10px;
+            }
+            button:hover { opacity: 0.9; }
+            button:disabled {
+                opacity: 0.6;
+                cursor: not-allowed;
+            }
+            .loading {
+                display: none;
+                text-align: center;
+                padding: 30px;
+                background: #e3f2fd;
+                border-radius: 8px;
+                margin-top: 20px;
+            }
+            .loading.active { display: block; }
+            .spinner {
+                border: 4px solid #f3f3f3;
+                border-top: 4px solid #667eea;
+                border-radius: 50%;
+                width: 50px;
+                height: 50px;
+                animation: spin 1s linear infinite;
+                margin: 0 auto 15px;
+            }
+            @keyframes spin {
+                0% { transform: rotate(0deg); }
+                100% { transform: rotate(360deg); }
+            }
+            .result {
+                display: none;
+                margin-top: 30px;
+                padding: 20px;
+                background: #f8f9fa;
+                border-radius: 8px;
+            }
+            .result.active { display: block; }
+            .success {
+                background: #d4edda;
+                color: #155724;
+                padding: 15px;
+                border-radius: 8px;
+                margin-bottom: 15px;
+            }
+            video {
+                width: 100%;
+                border-radius: 8px;
+                margin-top: 15px;
+            }
+            .error {
+                display: none;
+                background: #f8d7da;
+                color: #721c24;
+                padding: 15px;
+                border-radius: 8px;
+                margin-top: 20px;
+            }
+            .error.active { display: block; }
+            .download-btn {
+                background: #28a745;
+                margin-top: 10px;
+            }
+            .progress-text {
+                font-size: 16px;
+                color: #333;
+                margin-top: 10px;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>🎯 VisioTrack - Object Tracker</h1>
+            <div class="status">
+                ✅ API is running! Upload a video to track objects.
+            </div>
+            <form id="trackForm">
+                <div class="form-group">
+                    <label>📹 Upload Video</label>
+                    <input type="file" id="video" accept="video/*" required>
+                </div>
+                <label>🎯 Bounding Box (first frame)</label>
+                <div class="bbox-grid">
+                    <div class="form-group">
+                        <label>X (left)</label>
+                        <input type="number" id="x" value="100" required>
+                    </div>
+                    <div class="form-group">
+                        <label>Y (top)</label>
+                        <input type="number" id="y" value="100" required>
+                    </div>
+                    <div class="form-group">
+                        <label>Width</label>
+                        <input type="number" id="w" value="200" required>
+                    </div>
+                    <div class="form-group">
+                        <label>Height</label>
+                        <input type="number" id="h" value="200" required>
+                    </div>
+                </div>
+                <button type="submit" id="submitBtn">🚀 Start Tracking</button>
+            </form>
+            <div class="loading" id="loading">
+                <div class="spinner"></div>
+                <div class="progress-text" id="progressText">Uploading and processing video... This may take a few minutes.</div>
+            </div>
+            <div class="error" id="error"></div>
+            <div class="result" id="result">
+                <div class="success">
+                    ✅ <strong>Tracking Complete!</strong>
+                    <p id="info"></p>
+                </div>
+                <video id="resultVideo" controls></video>
+                <button class="download-btn" onclick="downloadVideo()">⬇️ Download Tracked Video</button>
+            </div>
+        </div>
+        <script>
+            let videoBlob = null;
+            document.getElementById('trackForm').onsubmit = async (e) => {
+                e.preventDefault();
+                const loading = document.getElementById('loading');
+                const result = document.getElementById('result');
+                const error = document.getElementById('error');
+                const submitBtn = document.getElementById('submitBtn');
+                const progressText = document.getElementById('progressText');
+                // Reset UI
+                loading.classList.add('active');
+                result.classList.remove('active');
+                error.classList.remove('active');
+                submitBtn.disabled = true;
+                const formData = new FormData();
+                const videoFile = document.getElementById('video').files[0];
+                formData.append('video', videoFile);
+                formData.append('bbox_x', document.getElementById('x').value);
+                formData.append('bbox_y', document.getElementById('y').value);
+                formData.append('bbox_w', document.getElementById('w').value);
+                formData.append('bbox_h', document.getElementById('h').value);
+                progressText.textContent = `Uploading ${videoFile.name}... Please wait.`;
+                try {
+                    const startTime = Date.now();
+                    const response = await fetch('/track', {
+                        method: 'POST',
+                        body: formData
+                    });
+                    if (!response.ok) {
+                        const errorData = await response.json();
+                        throw new Error(errorData.detail || 'Tracking failed');
+                    }
+                    progressText.textContent = 'Downloading result...';
+                    videoBlob = await response.blob();
+                    const url = URL.createObjectURL(videoBlob);
+                    const frames = response.headers.get('X-Frames-Processed') || 'N/A';
+                    const processingTime = ((Date.now() - startTime) / 1000).toFixed(1);
+                    document.getElementById('resultVideo').src = url;
+                    document.getElementById('info').innerHTML =
+                        `Processed <strong>${frames}</strong> frames in <strong>${processingTime}s</strong>`;
+                    loading.classList.remove('active');
+                    result.classList.add('active');
+                } catch (err) {
+                    loading.classList.remove('active');
+                    error.textContent = '❌ Error: ' + err.message;
+                    error.classList.add('active');
+                } finally {
+                    submitBtn.disabled = false;
+                }
+            };
+            function downloadVideo() {
+                if (videoBlob) {
+                    const url = URL.createObjectURL(videoBlob);
+                    const a = document.createElement('a');
+                    a.href = url;
+                    a.download = 'tracked_video.mp4';
+                    document.body.appendChild(a);
+                    a.click();
+                    document.body.removeChild(a);
+                }
+            }
+        </script>
+    </body>
+    </html>
     """
+@app.get("/health")
+async def health():
+    return {
+        "status": "healthy",
+        "gpu": torch.cuda.is_available(),
+        "model_loaded": tracker is not None
+    }
 @app.post("/track")
 async def track_video(
+    video: UploadFile = File(...),
+    bbox_x: int = Form(...),
+    bbox_y: int = Form(...),
+    bbox_w: int = Form(...),
+    bbox_h: int = Form(...)
 ):
     temp_input = None
     output_path = None
     try:
+        # Validate file
+        if not video.content_type or not video.content_type.startswith('video/'):
+            raise HTTPException(400, "File must be a video")
         # Save uploaded video
         temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
         temp_input.write(content)
         temp_input.close()
+        logger.info(f"Processing: {video.filename}, bbox: ({bbox_x},{bbox_y},{bbox_w},{bbox_h})")
+        # Process video in thread pool (non-blocking)
+        loop = asyncio.get_event_loop()
+        output_path, frame_count = await loop.run_in_executor(
+            executor,
+            process_video_sync,
+            temp_input.name,
+            bbox_x,
+            bbox_y,
+            bbox_w,
+            bbox_h
         )
         if output_path is None:
+            raise HTTPException(400, f"Processing failed: {frame_count}")
+        logger.info(f"✓ Returning result: {frame_count} frames")
+        # Return video file
         return FileResponse(
             output_path,
             media_type='video/mp4',
             filename='tracked_video.mp4',
             headers={
+                'X-Frames-Processed': str(frame_count),
+                'Cache-Control': 'no-cache'
+            },
+            background=BackgroundTasks().add_task(cleanup_file, output_path)
         )
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Error: {e}")
+        raise HTTPException(500, str(e))
     finally:
         if temp_input and os.path.exists(temp_input.name):
             try:
                 os.unlink(temp_input.name)
             except:
                 pass
+def cleanup_file(path: str):
+    """Background task to cleanup temp files"""
+    try:
+        if os.path.exists(path):
+            os.unlink(path)
+    except:
+        pass
 @app.on_event("startup")
+async def startup():
     logger.info("=" * 50)
+    logger.info("VisioTrack Starting...")
     logger.info("=" * 50)
+    load_tracker()
+    logger.info("✓ Ready on port 7860")
     logger.info("=" * 50)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)