azizerorahman commited on
Commit
61ed6d9
·
verified ·
1 Parent(s): 1842054

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +369 -173
app.py CHANGED
@@ -1,37 +1,27 @@
1
- #!/usr/bin/env python
2
- """
3
- FastAPI Server for VisioTrack on Hugging Face Spaces
4
- REST API for object tracking in videos
5
- """
6
-
7
- from fastapi import FastAPI, File, UploadFile, Form, HTTPException
8
- from fastapi.responses import FileResponse, JSONResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
  import cv2
11
  import torch
12
- import numpy as np
13
  import tempfile
14
  import os
15
  import subprocess
16
  import shutil
17
- from pathlib import Path
18
  from siamrpn import TrackerSiamRPN
19
  import logging
 
 
20
 
21
- # Configure logging
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
- # Initialize FastAPI app
26
- app = FastAPI(
27
- title="VisioTrack API",
28
- description="Object tracking API using SiamRPN",
29
- version="1.0.0",
30
- docs_url="/", # Swagger UI at root
31
- redoc_url="/redoc"
32
- )
33
 
34
- # Enable CORS for frontend integration
35
  app.add_middleware(
36
  CORSMiddleware,
37
  allow_origins=["*"],
@@ -40,90 +30,68 @@ app.add_middleware(
40
  allow_headers=["*"],
41
  )
42
 
43
- # Model configuration
44
  MODEL_PATH = "model.pth"
45
  tracker = None
46
  device = None
47
 
48
  def load_tracker():
49
- """Load the SiamRPN tracker with GPU support"""
50
  global tracker, device
51
  if tracker is None:
52
- if not os.path.exists(MODEL_PATH):
53
- raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found!")
54
-
55
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
56
  tracker = TrackerSiamRPN(net_path=MODEL_PATH)
57
  logger.info(f"✓ Tracker loaded on {device}")
58
  return tracker
59
 
60
- def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int,
61
- bbox_w: int, bbox_h: int):
62
- """
63
- Process video with object tracking
64
-
65
- Args:
66
- video_path: Path to input video
67
- bbox_x, bbox_y, bbox_w, bbox_h: Bounding box coordinates
68
-
69
- Returns:
70
- tuple: (output_path, message, metadata)
71
- """
72
  try:
73
  tracker_instance = load_tracker()
74
 
75
  cap = cv2.VideoCapture(video_path)
76
  if not cap.isOpened():
77
- return None, "Could not open video file", None
78
 
79
- # Get video properties
80
- fps = int(cap.get(cv2.CAP_PROP_FPS))
81
- if fps == 0:
82
- fps = 30
83
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
84
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
85
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
86
 
87
- logger.info(f"Video: {width}x{height} @ {fps}fps, {total_frames} frames")
88
 
89
  ret, frame = cap.read()
90
  if not ret:
91
- return None, "Could not read first frame", None
92
 
93
- # Validate bounding box
94
  if bbox_w <= 0 or bbox_h <= 0:
95
- return None, "Invalid bounding box dimensions", None
96
 
97
- if (bbox_x < 0 or bbox_y < 0 or
98
- bbox_x + bbox_w > width or bbox_y + bbox_h > height):
99
- return None, f"Bounding box out of bounds (frame: {width}x{height})", None
100
 
101
  bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
102
-
103
- # Initialize tracker
104
  tracker_instance.init(frame, bbox)
105
 
106
- # Create temporary output file
107
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
108
  temp_output.close()
109
 
110
- # Use XVID codec for initial write
111
- fourcc = cv2.VideoWriter_fourcc(*'XVID')
112
  writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
113
 
114
  if not writer.isOpened():
115
- return None, "Could not create video writer", None
116
 
117
- # Draw first frame with initial bbox
118
  x, y, w, h = [int(v) for v in bbox]
119
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
120
- cv2.putText(frame, 'Frame: 1', (10, 30),
121
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
122
  writer.write(frame)
123
 
124
- # Process remaining frames
125
  frame_count = 1
126
 
 
127
  while True:
128
  ret, frame = cap.read()
129
  if not ret:
@@ -133,98 +101,357 @@ def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int,
133
 
134
  # Update tracker
135
  bbox = tracker_instance.update(frame)
136
-
137
- # Draw tracking result
138
  x, y, w, h = [int(v) for v in bbox]
 
 
139
  x = max(0, min(x, width - 1))
140
  y = max(0, min(y, height - 1))
141
  w = max(1, min(w, width - x))
142
  h = max(1, min(h, height - y))
143
 
144
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
145
- cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
146
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
147
 
148
  writer.write(frame)
149
 
 
150
  if frame_count % 30 == 0:
151
- logger.info(f"Processed {frame_count}/{total_frames} frames")
152
 
153
  cap.release()
154
  writer.release()
155
 
156
- # Re-encode with H.264 for browser compatibility
 
 
157
  final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
158
  final_output.close()
159
 
160
  try:
161
- logger.info("Re-encoding video for browser compatibility...")
162
- subprocess.run([
163
  'ffmpeg', '-i', temp_output.name,
164
- '-c:v', 'libx264',
165
- '-preset', 'fast',
166
- '-crf', '23',
167
- '-pix_fmt', 'yuv420p',
168
  '-movflags', '+faststart',
169
- '-y',
170
- final_output.name
171
- ], check=True, capture_output=True, text=True)
172
 
173
  os.unlink(temp_output.name)
174
- logger.info("✓ Video re-encoded successfully")
175
 
176
- except (subprocess.CalledProcessError, FileNotFoundError) as e:
177
- logger.warning(f"FFmpeg encoding failed: {e}, using original")
 
 
 
178
  shutil.move(temp_output.name, final_output.name)
179
 
180
- metadata = {
181
- 'frames_processed': frame_count,
182
- 'resolution': f"{width}x{height}",
183
- 'fps': fps,
184
- 'device': str(device)
185
- }
186
-
187
- return final_output.name, f"Successfully tracked {frame_count} frames", metadata
188
 
189
  except Exception as e:
190
- logger.error(f"Tracking error: {str(e)}")
191
- return None, f"Error: {str(e)}", None
192
-
193
 
194
- @app.get("/health")
195
- async def health_check():
196
- """
197
- Health check endpoint (required by HF Spaces)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  """
199
- return JSONResponse({
200
- 'status': 'healthy',
201
- 'gpu_available': torch.cuda.is_available(),
202
- 'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
203
- 'model_loaded': tracker is not None
204
- })
205
 
 
 
 
 
 
 
 
206
 
207
  @app.post("/track")
208
  async def track_video(
209
- video: UploadFile = File(..., description="Video file to process"),
210
- bbox_x: int = Form(..., description="X coordinate of bounding box"),
211
- bbox_y: int = Form(..., description="Y coordinate of bounding box"),
212
- bbox_w: int = Form(..., description="Width of bounding box"),
213
- bbox_h: int = Form(..., description="Height of bounding box")
214
  ):
215
- """
216
- Main tracking endpoint
217
-
218
- Upload a video and bounding box coordinates to track an object.
219
- Returns the processed video with tracking visualization.
220
- """
221
  temp_input = None
222
  output_path = None
223
 
224
  try:
225
- # Validate file type
226
- if not video.content_type.startswith('video/'):
227
- raise HTTPException(status_code=400, detail="File must be a video")
228
 
229
  # Save uploaded video
230
  temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
@@ -232,97 +459,66 @@ async def track_video(
232
  temp_input.write(content)
233
  temp_input.close()
234
 
235
- logger.info(f"Processing video: {video.filename}")
236
- logger.info(f"Bounding box: ({bbox_x}, {bbox_y}, {bbox_w}, {bbox_h})")
237
 
238
- # Process video
239
- output_path, message, metadata = process_video_tracking(
240
- temp_input.name, bbox_x, bbox_y, bbox_w, bbox_h
 
 
 
 
 
 
 
241
  )
242
 
243
  if output_path is None:
244
- raise HTTPException(status_code=400, detail=message)
245
 
246
- # Return processed video
 
 
247
  return FileResponse(
248
  output_path,
249
  media_type='video/mp4',
250
  filename='tracked_video.mp4',
251
  headers={
252
- 'X-Frames-Processed': str(metadata['frames_processed']),
253
- 'X-Resolution': metadata['resolution'],
254
- 'X-FPS': str(metadata['fps'])
255
- }
256
  )
257
 
258
  except HTTPException:
259
  raise
260
  except Exception as e:
261
- logger.error(f"Error: {str(e)}")
262
- raise HTTPException(status_code=500, detail=str(e))
263
-
264
  finally:
265
- # Cleanup temporary files
266
  if temp_input and os.path.exists(temp_input.name):
267
  try:
268
  os.unlink(temp_input.name)
269
  except:
270
  pass
271
 
272
-
273
- @app.get("/info")
274
- async def get_info():
275
- """
276
- Get API information and usage instructions
277
- """
278
- return {
279
- 'name': 'VisioTrack API',
280
- 'version': '1.0.0',
281
- 'description': 'Object tracking API using SiamRPN',
282
- 'endpoints': {
283
- '/health': 'Health check',
284
- '/track': 'Track object in video (POST with multipart/form-data)',
285
- '/info': 'API information',
286
- '/': 'Interactive API documentation (Swagger UI)'
287
- },
288
- 'usage': {
289
- 'method': 'POST',
290
- 'endpoint': '/track',
291
- 'content_type': 'multipart/form-data',
292
- 'parameters': {
293
- 'video': 'Video file',
294
- 'bbox_x': 'X coordinate (int)',
295
- 'bbox_y': 'Y coordinate (int)',
296
- 'bbox_w': 'Width (int)',
297
- 'bbox_h': 'Height (int)'
298
- }
299
- },
300
- 'example_curl': '''
301
- curl -X POST "https://your-space.hf.space/track" \\
302
- -F "video=@video.mp4" \\
303
- -F "bbox_x=100" \\
304
- -F "bbox_y=100" \\
305
- -F "bbox_w=200" \\
306
- -F "bbox_h=200" \\
307
- -o tracked_video.mp4
308
- '''
309
- }
310
-
311
 
312
  @app.on_event("startup")
313
- async def startup_event():
314
- """Load model on startup"""
315
  logger.info("=" * 50)
316
- logger.info("VisioTrack FastAPI Server Starting...")
317
  logger.info("=" * 50)
318
- try:
319
- load_tracker()
320
- logger.info("✓ Model loaded successfully")
321
- except Exception as e:
322
- logger.error(f"✗ Failed to load model: {e}")
323
  logger.info("=" * 50)
324
 
325
-
326
  if __name__ == "__main__":
327
  import uvicorn
328
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks
2
+ from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 
 
 
 
 
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import cv2
5
  import torch
 
6
  import tempfile
7
  import os
8
  import subprocess
9
  import shutil
 
10
  from siamrpn import TrackerSiamRPN
11
  import logging
12
+ import asyncio
13
+ from concurrent.futures import ThreadPoolExecutor
14
 
 
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Thread pool for blocking operations
19
+ executor = ThreadPoolExecutor(max_workers=2)
20
+
21
+ # Initialize FastAPI
22
+ app = FastAPI(title="VisioTrack API", version="1.0.0")
 
 
 
23
 
24
+ # CORS
25
  app.add_middleware(
26
  CORSMiddleware,
27
  allow_origins=["*"],
 
30
  allow_headers=["*"],
31
  )
32
 
 
33
  MODEL_PATH = "model.pth"
34
  tracker = None
35
  device = None
36
 
37
  def load_tracker():
 
38
  global tracker, device
39
  if tracker is None:
 
 
 
40
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
41
  tracker = TrackerSiamRPN(net_path=MODEL_PATH)
42
  logger.info(f"✓ Tracker loaded on {device}")
43
  return tracker
44
 
45
+ def process_video_sync(video_path: str, bbox_x: int, bbox_y: int, bbox_w: int, bbox_h: int):
46
+ """Synchronous video processing function"""
 
 
 
 
 
 
 
 
 
 
47
  try:
48
  tracker_instance = load_tracker()
49
 
50
  cap = cv2.VideoCapture(video_path)
51
  if not cap.isOpened():
52
+ return None, "Cannot open video"
53
 
54
+ fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30
 
 
 
55
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
56
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
57
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
58
 
59
+ logger.info(f"Video: {width}x{height}, {fps}fps, {total_frames} frames")
60
 
61
  ret, frame = cap.read()
62
  if not ret:
63
+ return None, "Cannot read first frame"
64
 
65
+ # Validate bbox
66
  if bbox_w <= 0 or bbox_h <= 0:
67
+ return None, "Invalid bounding box"
68
 
69
+ if bbox_x < 0 or bbox_y < 0 or bbox_x + bbox_w > width or bbox_y + bbox_h > height:
70
+ return None, f"Bounding box out of bounds. Video size: {width}x{height}"
 
71
 
72
  bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
 
 
73
  tracker_instance.init(frame, bbox)
74
 
75
+ # Create output
76
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
77
  temp_output.close()
78
 
79
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 
80
  writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
81
 
82
  if not writer.isOpened():
83
+ return None, "Cannot create video writer"
84
 
85
+ # Draw first frame
86
  x, y, w, h = [int(v) for v in bbox]
87
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
88
+ cv2.putText(frame, 'Frame: 1', (10, 30),
89
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
90
  writer.write(frame)
91
 
 
92
  frame_count = 1
93
 
94
+ # Process remaining frames
95
  while True:
96
  ret, frame = cap.read()
97
  if not ret:
 
101
 
102
  # Update tracker
103
  bbox = tracker_instance.update(frame)
 
 
104
  x, y, w, h = [int(v) for v in bbox]
105
+
106
+ # Clamp values
107
  x = max(0, min(x, width - 1))
108
  y = max(0, min(y, height - 1))
109
  w = max(1, min(w, width - x))
110
  h = max(1, min(h, height - y))
111
 
112
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
113
+ cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
114
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
115
 
116
  writer.write(frame)
117
 
118
+ # Log progress
119
  if frame_count % 30 == 0:
120
+ logger.info(f"Progress: {frame_count}/{total_frames}")
121
 
122
  cap.release()
123
  writer.release()
124
 
125
+ logger.info(f"✓ Processed {frame_count} frames")
126
+
127
+ # Re-encode with ffmpeg
128
  final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
129
  final_output.close()
130
 
131
  try:
132
+ logger.info("Re-encoding video...")
133
+ result = subprocess.run([
134
  'ffmpeg', '-i', temp_output.name,
135
+ '-c:v', 'libx264', '-preset', 'ultrafast',
136
+ '-crf', '23', '-pix_fmt', 'yuv420p',
 
 
137
  '-movflags', '+faststart',
138
+ '-y', final_output.name
139
+ ], check=True, capture_output=True, timeout=300) # 5 min timeout
 
140
 
141
  os.unlink(temp_output.name)
142
+ logger.info("✓ Video re-encoded")
143
 
144
+ except subprocess.TimeoutExpired:
145
+ logger.warning("FFmpeg timeout, using original")
146
+ shutil.move(temp_output.name, final_output.name)
147
+ except Exception as e:
148
+ logger.warning(f"FFmpeg failed: {e}, using original")
149
  shutil.move(temp_output.name, final_output.name)
150
 
151
+ return final_output.name, frame_count
 
 
 
 
 
 
 
152
 
153
  except Exception as e:
154
+ logger.error(f"Processing error: {e}")
155
+ return None, str(e)
 
156
 
157
+ @app.get("/", response_class=HTMLResponse)
158
+ async def home():
159
+ return """
160
+ <!DOCTYPE html>
161
+ <html>
162
+ <head>
163
+ <title>VisioTrack API</title>
164
+ <style>
165
+ * { margin: 0; padding: 0; box-sizing: border-box; }
166
+ body {
167
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
168
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
169
+ min-height: 100vh;
170
+ padding: 20px;
171
+ }
172
+ .container {
173
+ max-width: 900px;
174
+ margin: 0 auto;
175
+ background: white;
176
+ padding: 40px;
177
+ border-radius: 15px;
178
+ box-shadow: 0 10px 40px rgba(0,0,0,0.2);
179
+ }
180
+ h1 { color: #667eea; margin-bottom: 10px; }
181
+ .status {
182
+ background: #d4edda;
183
+ color: #155724;
184
+ padding: 15px;
185
+ border-radius: 8px;
186
+ margin: 20px 0;
187
+ border: 1px solid #c3e6cb;
188
+ }
189
+ .form-group {
190
+ margin-bottom: 15px;
191
+ }
192
+ label {
193
+ display: block;
194
+ margin-bottom: 5px;
195
+ font-weight: 600;
196
+ color: #333;
197
+ }
198
+ input[type="file"] {
199
+ width: 100%;
200
+ padding: 10px;
201
+ border: 2px dashed #667eea;
202
+ border-radius: 8px;
203
+ background: #f8f9fa;
204
+ }
205
+ input[type="number"] {
206
+ width: 100%;
207
+ padding: 10px;
208
+ border: 2px solid #e0e0e0;
209
+ border-radius: 8px;
210
+ font-size: 16px;
211
+ }
212
+ .bbox-grid {
213
+ display: grid;
214
+ grid-template-columns: repeat(2, 1fr);
215
+ gap: 15px;
216
+ margin: 15px 0;
217
+ }
218
+ button {
219
+ width: 100%;
220
+ padding: 15px;
221
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
222
+ color: white;
223
+ border: none;
224
+ border-radius: 8px;
225
+ font-size: 18px;
226
+ font-weight: 600;
227
+ cursor: pointer;
228
+ margin-top: 10px;
229
+ }
230
+ button:hover { opacity: 0.9; }
231
+ button:disabled {
232
+ opacity: 0.6;
233
+ cursor: not-allowed;
234
+ }
235
+ .loading {
236
+ display: none;
237
+ text-align: center;
238
+ padding: 30px;
239
+ background: #e3f2fd;
240
+ border-radius: 8px;
241
+ margin-top: 20px;
242
+ }
243
+ .loading.active { display: block; }
244
+ .spinner {
245
+ border: 4px solid #f3f3f3;
246
+ border-top: 4px solid #667eea;
247
+ border-radius: 50%;
248
+ width: 50px;
249
+ height: 50px;
250
+ animation: spin 1s linear infinite;
251
+ margin: 0 auto 15px;
252
+ }
253
+ @keyframes spin {
254
+ 0% { transform: rotate(0deg); }
255
+ 100% { transform: rotate(360deg); }
256
+ }
257
+ .result {
258
+ display: none;
259
+ margin-top: 30px;
260
+ padding: 20px;
261
+ background: #f8f9fa;
262
+ border-radius: 8px;
263
+ }
264
+ .result.active { display: block; }
265
+ .success {
266
+ background: #d4edda;
267
+ color: #155724;
268
+ padding: 15px;
269
+ border-radius: 8px;
270
+ margin-bottom: 15px;
271
+ }
272
+ video {
273
+ width: 100%;
274
+ border-radius: 8px;
275
+ margin-top: 15px;
276
+ }
277
+ .error {
278
+ display: none;
279
+ background: #f8d7da;
280
+ color: #721c24;
281
+ padding: 15px;
282
+ border-radius: 8px;
283
+ margin-top: 20px;
284
+ }
285
+ .error.active { display: block; }
286
+ .download-btn {
287
+ background: #28a745;
288
+ margin-top: 10px;
289
+ }
290
+ .progress-text {
291
+ font-size: 16px;
292
+ color: #333;
293
+ margin-top: 10px;
294
+ }
295
+ </style>
296
+ </head>
297
+ <body>
298
+ <div class="container">
299
+ <h1>🎯 VisioTrack - Object Tracker</h1>
300
+ <div class="status">
301
+ ✅ API is running! Upload a video to track objects.
302
+ </div>
303
+
304
+ <form id="trackForm">
305
+ <div class="form-group">
306
+ <label>📹 Upload Video</label>
307
+ <input type="file" id="video" accept="video/*" required>
308
+ </div>
309
+
310
+ <label>🎯 Bounding Box (first frame)</label>
311
+ <div class="bbox-grid">
312
+ <div class="form-group">
313
+ <label>X (left)</label>
314
+ <input type="number" id="x" value="100" required>
315
+ </div>
316
+ <div class="form-group">
317
+ <label>Y (top)</label>
318
+ <input type="number" id="y" value="100" required>
319
+ </div>
320
+ <div class="form-group">
321
+ <label>Width</label>
322
+ <input type="number" id="w" value="200" required>
323
+ </div>
324
+ <div class="form-group">
325
+ <label>Height</label>
326
+ <input type="number" id="h" value="200" required>
327
+ </div>
328
+ </div>
329
+
330
+ <button type="submit" id="submitBtn">🚀 Start Tracking</button>
331
+ </form>
332
+
333
+ <div class="loading" id="loading">
334
+ <div class="spinner"></div>
335
+ <div class="progress-text" id="progressText">Uploading and processing video... This may take a few minutes.</div>
336
+ </div>
337
+
338
+ <div class="error" id="error"></div>
339
+
340
+ <div class="result" id="result">
341
+ <div class="success">
342
+ ✅ <strong>Tracking Complete!</strong>
343
+ <p id="info"></p>
344
+ </div>
345
+ <video id="resultVideo" controls></video>
346
+ <button class="download-btn" onclick="downloadVideo()">⬇️ Download Tracked Video</button>
347
+ </div>
348
+ </div>
349
+
350
+ <script>
351
+ let videoBlob = null;
352
+
353
+ document.getElementById('trackForm').onsubmit = async (e) => {
354
+ e.preventDefault();
355
+
356
+ const loading = document.getElementById('loading');
357
+ const result = document.getElementById('result');
358
+ const error = document.getElementById('error');
359
+ const submitBtn = document.getElementById('submitBtn');
360
+ const progressText = document.getElementById('progressText');
361
+
362
+ // Reset UI
363
+ loading.classList.add('active');
364
+ result.classList.remove('active');
365
+ error.classList.remove('active');
366
+ submitBtn.disabled = true;
367
+
368
+ const formData = new FormData();
369
+ const videoFile = document.getElementById('video').files[0];
370
+
371
+ formData.append('video', videoFile);
372
+ formData.append('bbox_x', document.getElementById('x').value);
373
+ formData.append('bbox_y', document.getElementById('y').value);
374
+ formData.append('bbox_w', document.getElementById('w').value);
375
+ formData.append('bbox_h', document.getElementById('h').value);
376
+
377
+ progressText.textContent = `Uploading ${videoFile.name}... Please wait.`;
378
+
379
+ try {
380
+ const startTime = Date.now();
381
+
382
+ const response = await fetch('/track', {
383
+ method: 'POST',
384
+ body: formData
385
+ });
386
+
387
+ if (!response.ok) {
388
+ const errorData = await response.json();
389
+ throw new Error(errorData.detail || 'Tracking failed');
390
+ }
391
+
392
+ progressText.textContent = 'Downloading result...';
393
+
394
+ videoBlob = await response.blob();
395
+ const url = URL.createObjectURL(videoBlob);
396
+
397
+ const frames = response.headers.get('X-Frames-Processed') || 'N/A';
398
+ const processingTime = ((Date.now() - startTime) / 1000).toFixed(1);
399
+
400
+ document.getElementById('resultVideo').src = url;
401
+ document.getElementById('info').innerHTML =
402
+ `Processed <strong>${frames}</strong> frames in <strong>${processingTime}s</strong>`;
403
+
404
+ loading.classList.remove('active');
405
+ result.classList.add('active');
406
+
407
+ } catch (err) {
408
+ loading.classList.remove('active');
409
+ error.textContent = '❌ Error: ' + err.message;
410
+ error.classList.add('active');
411
+ } finally {
412
+ submitBtn.disabled = false;
413
+ }
414
+ };
415
+
416
+ function downloadVideo() {
417
+ if (videoBlob) {
418
+ const url = URL.createObjectURL(videoBlob);
419
+ const a = document.createElement('a');
420
+ a.href = url;
421
+ a.download = 'tracked_video.mp4';
422
+ document.body.appendChild(a);
423
+ a.click();
424
+ document.body.removeChild(a);
425
+ }
426
+ }
427
+ </script>
428
+ </body>
429
+ </html>
430
  """
 
 
 
 
 
 
431
 
432
+ @app.get("/health")
433
+ async def health():
434
+ return {
435
+ "status": "healthy",
436
+ "gpu": torch.cuda.is_available(),
437
+ "model_loaded": tracker is not None
438
+ }
439
 
440
  @app.post("/track")
441
  async def track_video(
442
+ video: UploadFile = File(...),
443
+ bbox_x: int = Form(...),
444
+ bbox_y: int = Form(...),
445
+ bbox_w: int = Form(...),
446
+ bbox_h: int = Form(...)
447
  ):
 
 
 
 
 
 
448
  temp_input = None
449
  output_path = None
450
 
451
  try:
452
+ # Validate file
453
+ if not video.content_type or not video.content_type.startswith('video/'):
454
+ raise HTTPException(400, "File must be a video")
455
 
456
  # Save uploaded video
457
  temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
 
459
  temp_input.write(content)
460
  temp_input.close()
461
 
462
+ logger.info(f"Processing: {video.filename}, bbox: ({bbox_x},{bbox_y},{bbox_w},{bbox_h})")
 
463
 
464
+ # Process video in thread pool (non-blocking)
465
+ loop = asyncio.get_event_loop()
466
+ output_path, frame_count = await loop.run_in_executor(
467
+ executor,
468
+ process_video_sync,
469
+ temp_input.name,
470
+ bbox_x,
471
+ bbox_y,
472
+ bbox_w,
473
+ bbox_h
474
  )
475
 
476
  if output_path is None:
477
+ raise HTTPException(400, f"Processing failed: {frame_count}")
478
 
479
+ logger.info(f"✓ Returning result: {frame_count} frames")
480
+
481
+ # Return video file
482
  return FileResponse(
483
  output_path,
484
  media_type='video/mp4',
485
  filename='tracked_video.mp4',
486
  headers={
487
+ 'X-Frames-Processed': str(frame_count),
488
+ 'Cache-Control': 'no-cache'
489
+ },
490
+ background=BackgroundTasks().add_task(cleanup_file, output_path)
491
  )
492
 
493
  except HTTPException:
494
  raise
495
  except Exception as e:
496
+ logger.error(f"Error: {e}")
497
+ raise HTTPException(500, str(e))
 
498
  finally:
 
499
  if temp_input and os.path.exists(temp_input.name):
500
  try:
501
  os.unlink(temp_input.name)
502
  except:
503
  pass
504
 
505
+ def cleanup_file(path: str):
506
+ """Background task to cleanup temp files"""
507
+ try:
508
+ if os.path.exists(path):
509
+ os.unlink(path)
510
+ except:
511
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
  @app.on_event("startup")
514
+ async def startup():
 
515
  logger.info("=" * 50)
516
+ logger.info("VisioTrack Starting...")
517
  logger.info("=" * 50)
518
+ load_tracker()
519
+ logger.info("✓ Ready on port 7860")
 
 
 
520
  logger.info("=" * 50)
521
 
 
522
  if __name__ == "__main__":
523
  import uvicorn
524
  uvicorn.run(app, host="0.0.0.0", port=7860)