File size: 10,439 Bytes
74e694d
 
 
 
 
 
 
 
59cb1b6
d9c5f70
 
74e694d
d9c5f70
 
59cb1b6
 
74e694d
d9c5f70
59cb1b6
 
74e694d
59cb1b6
 
 
74e694d
 
 
 
 
 
 
 
d9c5f70
74e694d
59cb1b6
 
 
 
 
 
 
 
74e694d
59cb1b6
d9c5f70
 
 
 
74e694d
d9c5f70
 
74e694d
 
 
d9c5f70
59cb1b6
 
d9c5f70
 
74e694d
 
 
 
 
 
 
 
 
 
 
 
d9c5f70
 
 
59cb1b6
d9c5f70
74e694d
d9c5f70
74e694d
 
 
 
d9c5f70
 
 
 
74e694d
d9c5f70
59cb1b6
d9c5f70
74e694d
d9c5f70
74e694d
59cb1b6
74e694d
59cb1b6
74e694d
 
 
59cb1b6
 
74e694d
 
59cb1b6
d9c5f70
74e694d
59cb1b6
d9c5f70
 
74e694d
 
59cb1b6
d9c5f70
59cb1b6
74e694d
d9c5f70
74e694d
59cb1b6
 
74e694d
59cb1b6
 
d9c5f70
74e694d
d9c5f70
59cb1b6
d9c5f70
 
 
 
 
59cb1b6
 
 
d9c5f70
61ed6d9
74e694d
 
d9c5f70
 
 
 
 
59cb1b6
74e694d
59cb1b6
d9c5f70
59cb1b6
 
 
74e694d
d9c5f70
 
59cb1b6
 
74e694d
59cb1b6
 
 
 
74e694d
 
59cb1b6
74e694d
 
 
 
59cb1b6
74e694d
 
 
59cb1b6
 
74e694d
59cb1b6
74e694d
 
59cb1b6
 
74e694d
 
 
 
 
 
 
 
59cb1b6
 
74e694d
 
59cb1b6
7035315
61ed6d9
74e694d
 
 
 
 
 
 
 
 
 
 
7035315
59cb1b6
 
74e694d
 
 
 
 
59cb1b6
74e694d
 
 
 
 
 
59cb1b6
 
 
 
74e694d
 
 
59cb1b6
 
 
 
 
 
 
74e694d
 
59cb1b6
74e694d
 
 
59cb1b6
 
 
74e694d
d9c5f70
74e694d
59cb1b6
 
 
 
 
74e694d
 
 
 
59cb1b6
d9c5f70
59cb1b6
 
d9c5f70
74e694d
 
 
59cb1b6
74e694d
59cb1b6
 
 
 
 
 
74e694d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59cb1b6
 
74e694d
 
59cb1b6
74e694d
59cb1b6
74e694d
 
 
 
 
59cb1b6
d9c5f70
74e694d
d9c5f70
59cb1b6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#!/usr/bin/env python
"""
FastAPI Server for VisioTrack on Hugging Face Spaces
REST API for object tracking in videos
"""

from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import cv2
import torch
import numpy as np
import tempfile
import os
import subprocess
import shutil
from pathlib import Path
from siamrpn import TrackerSiamRPN
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI(
    title="VisioTrack API",
    description="Object tracking API using SiamRPN",
    version="1.0.0",
    docs_url="/",  # Swagger UI at root
    redoc_url="/redoc"
)

# Enable CORS for frontend integration
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Model configuration
MODEL_PATH = "model.pth"
tracker = None
device = None

def load_tracker():
    """Load the SiamRPN tracker with GPU support"""
    global tracker, device
    if tracker is None:
        if not os.path.exists(MODEL_PATH):
            raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found!")
        
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        tracker = TrackerSiamRPN(net_path=MODEL_PATH)
        logger.info(f"✓ Tracker loaded on {device}")
    return tracker

def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int, 
                          bbox_w: int, bbox_h: int):
    """
    Process video with object tracking
    
    Args:
        video_path: Path to input video
        bbox_x, bbox_y, bbox_w, bbox_h: Bounding box coordinates
        
    Returns:
        tuple: (output_path, message, metadata)
    """
    try:
        tracker_instance = load_tracker()
        
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            return None, "Could not open video file", None
        
        # Get video properties
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        if fps == 0:
            fps = 30
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        logger.info(f"Video: {width}x{height} @ {fps}fps, {total_frames} frames")
        
        ret, frame = cap.read()
        if not ret:
            return None, "Could not read first frame", None
        
        # Validate bounding box
        if bbox_w <= 0 or bbox_h <= 0:
            return None, "Invalid bounding box dimensions", None
        
        if (bbox_x < 0 or bbox_y < 0 or 
            bbox_x + bbox_w > width or bbox_y + bbox_h > height):
            return None, f"Bounding box out of bounds (frame: {width}x{height})", None
        
        bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
        
        # Initialize tracker
        tracker_instance.init(frame, bbox)
        
        # Create temporary output file
        temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
        temp_output.close()
        
        # Use XVID codec for initial write
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
        
        if not writer.isOpened():
            return None, "Could not create video writer", None
        
        # Draw first frame with initial bbox
        x, y, w, h = [int(v) for v in bbox]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
        cv2.putText(frame, 'Frame: 1', (10, 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        writer.write(frame)
        
        # Process remaining frames
        frame_count = 1
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_count += 1
            
            # Update tracker
            bbox = tracker_instance.update(frame)
            
            # Draw tracking result
            x, y, w, h = [int(v) for v in bbox]
            x = max(0, min(x, width - 1))
            y = max(0, min(y, height - 1))
            w = max(1, min(w, width - x))
            h = max(1, min(h, height - y))
            
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
            cv2.putText(frame, f'Frame: {frame_count}', (10, 30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            writer.write(frame)
            
            if frame_count % 30 == 0:
                logger.info(f"Processed {frame_count}/{total_frames} frames")
        
        cap.release()
        writer.release()
        
        # Re-encode with H.264 for browser compatibility
        final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
        final_output.close()
        
        try:
            logger.info("Re-encoding video for browser compatibility...")
            subprocess.run([
                'ffmpeg', '-i', temp_output.name,
                '-c:v', 'libx264',
                '-preset', 'fast',
                '-crf', '23',
                '-pix_fmt', 'yuv420p',
                '-movflags', '+faststart',
                '-y',
                final_output.name
            ], check=True, capture_output=True, text=True)
            
            os.unlink(temp_output.name)
            logger.info("✓ Video re-encoded successfully")
            
        except (subprocess.CalledProcessError, FileNotFoundError) as e:
            logger.warning(f"FFmpeg encoding failed: {e}, using original")
            shutil.move(temp_output.name, final_output.name)
        
        metadata = {
            'frames_processed': frame_count,
            'resolution': f"{width}x{height}",
            'fps': fps,
            'device': str(device)
        }
        
        return final_output.name, f"Successfully tracked {frame_count} frames", metadata
        
    except Exception as e:
        logger.error(f"Tracking error: {str(e)}")
        return None, f"Error: {str(e)}", None


@app.get("/health")
async def health_check():
    """
    Health check endpoint (required by HF Spaces)
    """
    return JSONResponse({
        'status': 'healthy',
        'gpu_available': torch.cuda.is_available(),
        'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
        'model_loaded': tracker is not None
    })


@app.post("/track")
async def track_video(
    video: UploadFile = File(..., description="Video file to process"),
    bbox_x: int = Form(..., description="X coordinate of bounding box"),
    bbox_y: int = Form(..., description="Y coordinate of bounding box"),
    bbox_w: int = Form(..., description="Width of bounding box"),
    bbox_h: int = Form(..., description="Height of bounding box")
):
    """
    Main tracking endpoint
    
    Upload a video and bounding box coordinates to track an object.
    Returns the processed video with tracking visualization.
    """
    temp_input = None
    output_path = None
    
    try:
        # Validate file type
        if not video.content_type.startswith('video/'):
            raise HTTPException(status_code=400, detail="File must be a video")
        
        # Save uploaded video
        temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
        content = await video.read()
        temp_input.write(content)
        temp_input.close()
        
        logger.info(f"Processing video: {video.filename}")
        logger.info(f"Bounding box: ({bbox_x}, {bbox_y}, {bbox_w}, {bbox_h})")
        
        # Process video
        output_path, message, metadata = process_video_tracking(
            temp_input.name, bbox_x, bbox_y, bbox_w, bbox_h
        )
        
        if output_path is None:
            raise HTTPException(status_code=400, detail=message)
        
        # Return processed video
        return FileResponse(
            output_path,
            media_type='video/mp4',
            filename='tracked_video.mp4',
            headers={
                'X-Frames-Processed': str(metadata['frames_processed']),
                'X-Resolution': metadata['resolution'],
                'X-FPS': str(metadata['fps'])
            }
        )
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
    
    finally:
        # Cleanup temporary files
        if temp_input and os.path.exists(temp_input.name):
            try:
                os.unlink(temp_input.name)
            except:
                pass


@app.get("/info")
async def get_info():
    """
    Get API information and usage instructions
    """
    return {
        'name': 'VisioTrack API',
        'version': '1.0.0',
        'description': 'Object tracking API using SiamRPN',
        'endpoints': {
            '/health': 'Health check',
            '/track': 'Track object in video (POST with multipart/form-data)',
            '/info': 'API information',
            '/': 'Interactive API documentation (Swagger UI)'
        },
        'usage': {
            'method': 'POST',
            'endpoint': '/track',
            'content_type': 'multipart/form-data',
            'parameters': {
                'video': 'Video file',
                'bbox_x': 'X coordinate (int)',
                'bbox_y': 'Y coordinate (int)',
                'bbox_w': 'Width (int)',
                'bbox_h': 'Height (int)'
            }
        },
        'example_curl': '''
curl -X POST "https://your-space.hf.space/track" \\
  -F "video=@video.mp4" \\
  -F "bbox_x=100" \\
  -F "bbox_y=100" \\
  -F "bbox_w=200" \\
  -F "bbox_h=200" \\
  -o tracked_video.mp4
        '''
    }


@app.on_event("startup")
async def startup_event():
    """Load model on startup"""
    logger.info("=" * 50)
    logger.info("VisioTrack FastAPI Server Starting...")
    logger.info("=" * 50)
    try:
        load_tracker()
        logger.info("✓ Model loaded successfully")
    except Exception as e:
        logger.error(f"✗ Failed to load model: {e}")
    logger.info("=" * 50)


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)