azizerorahman commited on
Commit
74e694d
·
verified ·
1 Parent(s): 61ed6d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -369
app.py CHANGED
@@ -1,27 +1,37 @@
1
- from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks
2
- from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 
 
 
 
 
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import cv2
5
  import torch
 
6
  import tempfile
7
  import os
8
  import subprocess
9
  import shutil
 
10
  from siamrpn import TrackerSiamRPN
11
  import logging
12
- import asyncio
13
- from concurrent.futures import ThreadPoolExecutor
14
 
 
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
- # Thread pool for blocking operations
19
- executor = ThreadPoolExecutor(max_workers=2)
20
-
21
- # Initialize FastAPI
22
- app = FastAPI(title="VisioTrack API", version="1.0.0")
 
 
 
23
 
24
- # CORS
25
  app.add_middleware(
26
  CORSMiddleware,
27
  allow_origins=["*"],
@@ -30,68 +40,90 @@ app.add_middleware(
30
  allow_headers=["*"],
31
  )
32
 
 
33
  MODEL_PATH = "model.pth"
34
  tracker = None
35
  device = None
36
 
37
  def load_tracker():
 
38
  global tracker, device
39
  if tracker is None:
 
 
 
40
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
41
  tracker = TrackerSiamRPN(net_path=MODEL_PATH)
42
  logger.info(f"✓ Tracker loaded on {device}")
43
  return tracker
44
 
45
- def process_video_sync(video_path: str, bbox_x: int, bbox_y: int, bbox_w: int, bbox_h: int):
46
- """Synchronous video processing function"""
 
 
 
 
 
 
 
 
 
 
47
  try:
48
  tracker_instance = load_tracker()
49
 
50
  cap = cv2.VideoCapture(video_path)
51
  if not cap.isOpened():
52
- return None, "Cannot open video"
53
 
54
- fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30
 
 
 
55
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
56
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
57
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
58
 
59
- logger.info(f"Video: {width}x{height}, {fps}fps, {total_frames} frames")
60
 
61
  ret, frame = cap.read()
62
  if not ret:
63
- return None, "Cannot read first frame"
64
 
65
- # Validate bbox
66
  if bbox_w <= 0 or bbox_h <= 0:
67
- return None, "Invalid bounding box"
68
 
69
- if bbox_x < 0 or bbox_y < 0 or bbox_x + bbox_w > width or bbox_y + bbox_h > height:
70
- return None, f"Bounding box out of bounds. Video size: {width}x{height}"
 
71
 
72
  bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
 
 
73
  tracker_instance.init(frame, bbox)
74
 
75
- # Create output
76
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
77
  temp_output.close()
78
 
79
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 
80
  writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
81
 
82
  if not writer.isOpened():
83
- return None, "Cannot create video writer"
84
 
85
- # Draw first frame
86
  x, y, w, h = [int(v) for v in bbox]
87
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
88
- cv2.putText(frame, 'Frame: 1', (10, 30),
89
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
90
  writer.write(frame)
91
 
 
92
  frame_count = 1
93
 
94
- # Process remaining frames
95
  while True:
96
  ret, frame = cap.read()
97
  if not ret:
@@ -101,357 +133,98 @@ def process_video_sync(video_path: str, bbox_x: int, bbox_y: int, bbox_w: int, b
101
 
102
  # Update tracker
103
  bbox = tracker_instance.update(frame)
104
- x, y, w, h = [int(v) for v in bbox]
105
 
106
- # Clamp values
 
107
  x = max(0, min(x, width - 1))
108
  y = max(0, min(y, height - 1))
109
  w = max(1, min(w, width - x))
110
  h = max(1, min(h, height - y))
111
 
112
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
113
- cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
114
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
115
 
116
  writer.write(frame)
117
 
118
- # Log progress
119
  if frame_count % 30 == 0:
120
- logger.info(f"Progress: {frame_count}/{total_frames}")
121
 
122
  cap.release()
123
  writer.release()
124
 
125
- logger.info(f"✓ Processed {frame_count} frames")
126
-
127
- # Re-encode with ffmpeg
128
  final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
129
  final_output.close()
130
 
131
  try:
132
- logger.info("Re-encoding video...")
133
- result = subprocess.run([
134
  'ffmpeg', '-i', temp_output.name,
135
- '-c:v', 'libx264', '-preset', 'ultrafast',
136
- '-crf', '23', '-pix_fmt', 'yuv420p',
 
 
137
  '-movflags', '+faststart',
138
- '-y', final_output.name
139
- ], check=True, capture_output=True, timeout=300) # 5 min timeout
 
140
 
141
  os.unlink(temp_output.name)
142
- logger.info("✓ Video re-encoded")
143
 
144
- except subprocess.TimeoutExpired:
145
- logger.warning("FFmpeg timeout, using original")
146
- shutil.move(temp_output.name, final_output.name)
147
- except Exception as e:
148
- logger.warning(f"FFmpeg failed: {e}, using original")
149
  shutil.move(temp_output.name, final_output.name)
150
 
151
- return final_output.name, frame_count
 
 
 
 
 
 
 
152
 
153
  except Exception as e:
154
- logger.error(f"Processing error: {e}")
155
- return None, str(e)
156
 
157
- @app.get("/", response_class=HTMLResponse)
158
- async def home():
159
- return """
160
- <!DOCTYPE html>
161
- <html>
162
- <head>
163
- <title>VisioTrack API</title>
164
- <style>
165
- * { margin: 0; padding: 0; box-sizing: border-box; }
166
- body {
167
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
168
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
169
- min-height: 100vh;
170
- padding: 20px;
171
- }
172
- .container {
173
- max-width: 900px;
174
- margin: 0 auto;
175
- background: white;
176
- padding: 40px;
177
- border-radius: 15px;
178
- box-shadow: 0 10px 40px rgba(0,0,0,0.2);
179
- }
180
- h1 { color: #667eea; margin-bottom: 10px; }
181
- .status {
182
- background: #d4edda;
183
- color: #155724;
184
- padding: 15px;
185
- border-radius: 8px;
186
- margin: 20px 0;
187
- border: 1px solid #c3e6cb;
188
- }
189
- .form-group {
190
- margin-bottom: 15px;
191
- }
192
- label {
193
- display: block;
194
- margin-bottom: 5px;
195
- font-weight: 600;
196
- color: #333;
197
- }
198
- input[type="file"] {
199
- width: 100%;
200
- padding: 10px;
201
- border: 2px dashed #667eea;
202
- border-radius: 8px;
203
- background: #f8f9fa;
204
- }
205
- input[type="number"] {
206
- width: 100%;
207
- padding: 10px;
208
- border: 2px solid #e0e0e0;
209
- border-radius: 8px;
210
- font-size: 16px;
211
- }
212
- .bbox-grid {
213
- display: grid;
214
- grid-template-columns: repeat(2, 1fr);
215
- gap: 15px;
216
- margin: 15px 0;
217
- }
218
- button {
219
- width: 100%;
220
- padding: 15px;
221
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
222
- color: white;
223
- border: none;
224
- border-radius: 8px;
225
- font-size: 18px;
226
- font-weight: 600;
227
- cursor: pointer;
228
- margin-top: 10px;
229
- }
230
- button:hover { opacity: 0.9; }
231
- button:disabled {
232
- opacity: 0.6;
233
- cursor: not-allowed;
234
- }
235
- .loading {
236
- display: none;
237
- text-align: center;
238
- padding: 30px;
239
- background: #e3f2fd;
240
- border-radius: 8px;
241
- margin-top: 20px;
242
- }
243
- .loading.active { display: block; }
244
- .spinner {
245
- border: 4px solid #f3f3f3;
246
- border-top: 4px solid #667eea;
247
- border-radius: 50%;
248
- width: 50px;
249
- height: 50px;
250
- animation: spin 1s linear infinite;
251
- margin: 0 auto 15px;
252
- }
253
- @keyframes spin {
254
- 0% { transform: rotate(0deg); }
255
- 100% { transform: rotate(360deg); }
256
- }
257
- .result {
258
- display: none;
259
- margin-top: 30px;
260
- padding: 20px;
261
- background: #f8f9fa;
262
- border-radius: 8px;
263
- }
264
- .result.active { display: block; }
265
- .success {
266
- background: #d4edda;
267
- color: #155724;
268
- padding: 15px;
269
- border-radius: 8px;
270
- margin-bottom: 15px;
271
- }
272
- video {
273
- width: 100%;
274
- border-radius: 8px;
275
- margin-top: 15px;
276
- }
277
- .error {
278
- display: none;
279
- background: #f8d7da;
280
- color: #721c24;
281
- padding: 15px;
282
- border-radius: 8px;
283
- margin-top: 20px;
284
- }
285
- .error.active { display: block; }
286
- .download-btn {
287
- background: #28a745;
288
- margin-top: 10px;
289
- }
290
- .progress-text {
291
- font-size: 16px;
292
- color: #333;
293
- margin-top: 10px;
294
- }
295
- </style>
296
- </head>
297
- <body>
298
- <div class="container">
299
- <h1>🎯 VisioTrack - Object Tracker</h1>
300
- <div class="status">
301
- ✅ API is running! Upload a video to track objects.
302
- </div>
303
-
304
- <form id="trackForm">
305
- <div class="form-group">
306
- <label>📹 Upload Video</label>
307
- <input type="file" id="video" accept="video/*" required>
308
- </div>
309
-
310
- <label>🎯 Bounding Box (first frame)</label>
311
- <div class="bbox-grid">
312
- <div class="form-group">
313
- <label>X (left)</label>
314
- <input type="number" id="x" value="100" required>
315
- </div>
316
- <div class="form-group">
317
- <label>Y (top)</label>
318
- <input type="number" id="y" value="100" required>
319
- </div>
320
- <div class="form-group">
321
- <label>Width</label>
322
- <input type="number" id="w" value="200" required>
323
- </div>
324
- <div class="form-group">
325
- <label>Height</label>
326
- <input type="number" id="h" value="200" required>
327
- </div>
328
- </div>
329
-
330
- <button type="submit" id="submitBtn">🚀 Start Tracking</button>
331
- </form>
332
-
333
- <div class="loading" id="loading">
334
- <div class="spinner"></div>
335
- <div class="progress-text" id="progressText">Uploading and processing video... This may take a few minutes.</div>
336
- </div>
337
-
338
- <div class="error" id="error"></div>
339
-
340
- <div class="result" id="result">
341
- <div class="success">
342
- ✅ <strong>Tracking Complete!</strong>
343
- <p id="info"></p>
344
- </div>
345
- <video id="resultVideo" controls></video>
346
- <button class="download-btn" onclick="downloadVideo()">⬇️ Download Tracked Video</button>
347
- </div>
348
- </div>
349
-
350
- <script>
351
- let videoBlob = null;
352
-
353
- document.getElementById('trackForm').onsubmit = async (e) => {
354
- e.preventDefault();
355
-
356
- const loading = document.getElementById('loading');
357
- const result = document.getElementById('result');
358
- const error = document.getElementById('error');
359
- const submitBtn = document.getElementById('submitBtn');
360
- const progressText = document.getElementById('progressText');
361
-
362
- // Reset UI
363
- loading.classList.add('active');
364
- result.classList.remove('active');
365
- error.classList.remove('active');
366
- submitBtn.disabled = true;
367
-
368
- const formData = new FormData();
369
- const videoFile = document.getElementById('video').files[0];
370
-
371
- formData.append('video', videoFile);
372
- formData.append('bbox_x', document.getElementById('x').value);
373
- formData.append('bbox_y', document.getElementById('y').value);
374
- formData.append('bbox_w', document.getElementById('w').value);
375
- formData.append('bbox_h', document.getElementById('h').value);
376
-
377
- progressText.textContent = `Uploading ${videoFile.name}... Please wait.`;
378
-
379
- try {
380
- const startTime = Date.now();
381
-
382
- const response = await fetch('/track', {
383
- method: 'POST',
384
- body: formData
385
- });
386
-
387
- if (!response.ok) {
388
- const errorData = await response.json();
389
- throw new Error(errorData.detail || 'Tracking failed');
390
- }
391
-
392
- progressText.textContent = 'Downloading result...';
393
-
394
- videoBlob = await response.blob();
395
- const url = URL.createObjectURL(videoBlob);
396
-
397
- const frames = response.headers.get('X-Frames-Processed') || 'N/A';
398
- const processingTime = ((Date.now() - startTime) / 1000).toFixed(1);
399
-
400
- document.getElementById('resultVideo').src = url;
401
- document.getElementById('info').innerHTML =
402
- `Processed <strong>${frames}</strong> frames in <strong>${processingTime}s</strong>`;
403
-
404
- loading.classList.remove('active');
405
- result.classList.add('active');
406
-
407
- } catch (err) {
408
- loading.classList.remove('active');
409
- error.textContent = '❌ Error: ' + err.message;
410
- error.classList.add('active');
411
- } finally {
412
- submitBtn.disabled = false;
413
- }
414
- };
415
-
416
- function downloadVideo() {
417
- if (videoBlob) {
418
- const url = URL.createObjectURL(videoBlob);
419
- const a = document.createElement('a');
420
- a.href = url;
421
- a.download = 'tracked_video.mp4';
422
- document.body.appendChild(a);
423
- a.click();
424
- document.body.removeChild(a);
425
- }
426
- }
427
- </script>
428
- </body>
429
- </html>
430
- """
431
 
432
  @app.get("/health")
433
- async def health():
434
- return {
435
- "status": "healthy",
436
- "gpu": torch.cuda.is_available(),
437
- "model_loaded": tracker is not None
438
- }
 
 
 
 
 
439
 
440
  @app.post("/track")
441
  async def track_video(
442
- video: UploadFile = File(...),
443
- bbox_x: int = Form(...),
444
- bbox_y: int = Form(...),
445
- bbox_w: int = Form(...),
446
- bbox_h: int = Form(...)
447
  ):
 
 
 
 
 
 
448
  temp_input = None
449
  output_path = None
450
 
451
  try:
452
- # Validate file
453
- if not video.content_type or not video.content_type.startswith('video/'):
454
- raise HTTPException(400, "File must be a video")
455
 
456
  # Save uploaded video
457
  temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
@@ -459,66 +232,97 @@ async def track_video(
459
  temp_input.write(content)
460
  temp_input.close()
461
 
462
- logger.info(f"Processing: {video.filename}, bbox: ({bbox_x},{bbox_y},{bbox_w},{bbox_h})")
 
463
 
464
- # Process video in thread pool (non-blocking)
465
- loop = asyncio.get_event_loop()
466
- output_path, frame_count = await loop.run_in_executor(
467
- executor,
468
- process_video_sync,
469
- temp_input.name,
470
- bbox_x,
471
- bbox_y,
472
- bbox_w,
473
- bbox_h
474
  )
475
 
476
  if output_path is None:
477
- raise HTTPException(400, f"Processing failed: {frame_count}")
478
 
479
- logger.info(f"✓ Returning result: {frame_count} frames")
480
-
481
- # Return video file
482
  return FileResponse(
483
  output_path,
484
  media_type='video/mp4',
485
  filename='tracked_video.mp4',
486
  headers={
487
- 'X-Frames-Processed': str(frame_count),
488
- 'Cache-Control': 'no-cache'
489
- },
490
- background=BackgroundTasks().add_task(cleanup_file, output_path)
491
  )
492
 
493
  except HTTPException:
494
  raise
495
  except Exception as e:
496
- logger.error(f"Error: {e}")
497
- raise HTTPException(500, str(e))
 
498
  finally:
 
499
  if temp_input and os.path.exists(temp_input.name):
500
  try:
501
  os.unlink(temp_input.name)
502
  except:
503
  pass
504
 
505
- def cleanup_file(path: str):
506
- """Background task to cleanup temp files"""
507
- try:
508
- if os.path.exists(path):
509
- os.unlink(path)
510
- except:
511
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
  @app.on_event("startup")
514
- async def startup():
 
515
  logger.info("=" * 50)
516
- logger.info("VisioTrack Starting...")
517
  logger.info("=" * 50)
518
- load_tracker()
519
- logger.info("✓ Ready on port 7860")
 
 
 
520
  logger.info("=" * 50)
521
 
 
522
  if __name__ == "__main__":
523
  import uvicorn
524
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ #!/usr/bin/env python
2
+ """
3
+ FastAPI Server for VisioTrack on Hugging Face Spaces
4
+ REST API for object tracking in videos
5
+ """
6
+
7
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
8
+ from fastapi.responses import FileResponse, JSONResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
  import cv2
11
  import torch
12
+ import numpy as np
13
  import tempfile
14
  import os
15
  import subprocess
16
  import shutil
17
+ from pathlib import Path
18
  from siamrpn import TrackerSiamRPN
19
  import logging
 
 
20
 
21
+ # Configure logging
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
+ # Initialize FastAPI app
26
+ app = FastAPI(
27
+ title="VisioTrack API",
28
+ description="Object tracking API using SiamRPN",
29
+ version="1.0.0",
30
+ docs_url="/", # Swagger UI at root
31
+ redoc_url="/redoc"
32
+ )
33
 
34
+ # Enable CORS for frontend integration
35
  app.add_middleware(
36
  CORSMiddleware,
37
  allow_origins=["*"],
 
40
  allow_headers=["*"],
41
  )
42
 
43
+ # Model configuration
44
  MODEL_PATH = "model.pth"
45
  tracker = None
46
  device = None
47
 
48
  def load_tracker():
49
+ """Load the SiamRPN tracker with GPU support"""
50
  global tracker, device
51
  if tracker is None:
52
+ if not os.path.exists(MODEL_PATH):
53
+ raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found!")
54
+
55
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
56
  tracker = TrackerSiamRPN(net_path=MODEL_PATH)
57
  logger.info(f"✓ Tracker loaded on {device}")
58
  return tracker
59
 
60
+ def process_video_tracking(video_path: str, bbox_x: int, bbox_y: int,
61
+ bbox_w: int, bbox_h: int):
62
+ """
63
+ Process video with object tracking
64
+
65
+ Args:
66
+ video_path: Path to input video
67
+ bbox_x, bbox_y, bbox_w, bbox_h: Bounding box coordinates
68
+
69
+ Returns:
70
+ tuple: (output_path, message, metadata)
71
+ """
72
  try:
73
  tracker_instance = load_tracker()
74
 
75
  cap = cv2.VideoCapture(video_path)
76
  if not cap.isOpened():
77
+ return None, "Could not open video file", None
78
 
79
+ # Get video properties
80
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
81
+ if fps == 0:
82
+ fps = 30
83
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
84
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
85
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
86
 
87
+ logger.info(f"Video: {width}x{height} @ {fps}fps, {total_frames} frames")
88
 
89
  ret, frame = cap.read()
90
  if not ret:
91
+ return None, "Could not read first frame", None
92
 
93
+ # Validate bounding box
94
  if bbox_w <= 0 or bbox_h <= 0:
95
+ return None, "Invalid bounding box dimensions", None
96
 
97
+ if (bbox_x < 0 or bbox_y < 0 or
98
+ bbox_x + bbox_w > width or bbox_y + bbox_h > height):
99
+ return None, f"Bounding box out of bounds (frame: {width}x{height})", None
100
 
101
  bbox = [bbox_x, bbox_y, bbox_w, bbox_h]
102
+
103
+ # Initialize tracker
104
  tracker_instance.init(frame, bbox)
105
 
106
+ # Create temporary output file
107
  temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='_temp.mp4')
108
  temp_output.close()
109
 
110
+ # Use XVID codec for initial write
111
+ fourcc = cv2.VideoWriter_fourcc(*'XVID')
112
  writer = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
113
 
114
  if not writer.isOpened():
115
+ return None, "Could not create video writer", None
116
 
117
+ # Draw first frame with initial bbox
118
  x, y, w, h = [int(v) for v in bbox]
119
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
120
+ cv2.putText(frame, 'Frame: 1', (10, 30),
121
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
122
  writer.write(frame)
123
 
124
+ # Process remaining frames
125
  frame_count = 1
126
 
 
127
  while True:
128
  ret, frame = cap.read()
129
  if not ret:
 
133
 
134
  # Update tracker
135
  bbox = tracker_instance.update(frame)
 
136
 
137
+ # Draw tracking result
138
+ x, y, w, h = [int(v) for v in bbox]
139
  x = max(0, min(x, width - 1))
140
  y = max(0, min(y, height - 1))
141
  w = max(1, min(w, width - x))
142
  h = max(1, min(h, height - y))
143
 
144
  cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
145
+ cv2.putText(frame, f'Frame: {frame_count}', (10, 30),
146
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
147
 
148
  writer.write(frame)
149
 
 
150
  if frame_count % 30 == 0:
151
+ logger.info(f"Processed {frame_count}/{total_frames} frames")
152
 
153
  cap.release()
154
  writer.release()
155
 
156
+ # Re-encode with H.264 for browser compatibility
 
 
157
  final_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
158
  final_output.close()
159
 
160
  try:
161
+ logger.info("Re-encoding video for browser compatibility...")
162
+ subprocess.run([
163
  'ffmpeg', '-i', temp_output.name,
164
+ '-c:v', 'libx264',
165
+ '-preset', 'fast',
166
+ '-crf', '23',
167
+ '-pix_fmt', 'yuv420p',
168
  '-movflags', '+faststart',
169
+ '-y',
170
+ final_output.name
171
+ ], check=True, capture_output=True, text=True)
172
 
173
  os.unlink(temp_output.name)
174
+ logger.info("✓ Video re-encoded successfully")
175
 
176
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
177
+ logger.warning(f"FFmpeg encoding failed: {e}, using original")
 
 
 
178
  shutil.move(temp_output.name, final_output.name)
179
 
180
+ metadata = {
181
+ 'frames_processed': frame_count,
182
+ 'resolution': f"{width}x{height}",
183
+ 'fps': fps,
184
+ 'device': str(device)
185
+ }
186
+
187
+ return final_output.name, f"Successfully tracked {frame_count} frames", metadata
188
 
189
  except Exception as e:
190
+ logger.error(f"Tracking error: {str(e)}")
191
+ return None, f"Error: {str(e)}", None
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  @app.get("/health")
195
+ async def health_check():
196
+ """
197
+ Health check endpoint (required by HF Spaces)
198
+ """
199
+ return JSONResponse({
200
+ 'status': 'healthy',
201
+ 'gpu_available': torch.cuda.is_available(),
202
+ 'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
203
+ 'model_loaded': tracker is not None
204
+ })
205
+
206
 
207
  @app.post("/track")
208
  async def track_video(
209
+ video: UploadFile = File(..., description="Video file to process"),
210
+ bbox_x: int = Form(..., description="X coordinate of bounding box"),
211
+ bbox_y: int = Form(..., description="Y coordinate of bounding box"),
212
+ bbox_w: int = Form(..., description="Width of bounding box"),
213
+ bbox_h: int = Form(..., description="Height of bounding box")
214
  ):
215
+ """
216
+ Main tracking endpoint
217
+
218
+ Upload a video and bounding box coordinates to track an object.
219
+ Returns the processed video with tracking visualization.
220
+ """
221
  temp_input = None
222
  output_path = None
223
 
224
  try:
225
+ # Validate file type
226
+ if not video.content_type.startswith('video/'):
227
+ raise HTTPException(status_code=400, detail="File must be a video")
228
 
229
  # Save uploaded video
230
  temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
 
232
  temp_input.write(content)
233
  temp_input.close()
234
 
235
+ logger.info(f"Processing video: {video.filename}")
236
+ logger.info(f"Bounding box: ({bbox_x}, {bbox_y}, {bbox_w}, {bbox_h})")
237
 
238
+ # Process video
239
+ output_path, message, metadata = process_video_tracking(
240
+ temp_input.name, bbox_x, bbox_y, bbox_w, bbox_h
 
 
 
 
 
 
 
241
  )
242
 
243
  if output_path is None:
244
+ raise HTTPException(status_code=400, detail=message)
245
 
246
+ # Return processed video
 
 
247
  return FileResponse(
248
  output_path,
249
  media_type='video/mp4',
250
  filename='tracked_video.mp4',
251
  headers={
252
+ 'X-Frames-Processed': str(metadata['frames_processed']),
253
+ 'X-Resolution': metadata['resolution'],
254
+ 'X-FPS': str(metadata['fps'])
255
+ }
256
  )
257
 
258
  except HTTPException:
259
  raise
260
  except Exception as e:
261
+ logger.error(f"Error: {str(e)}")
262
+ raise HTTPException(status_code=500, detail=str(e))
263
+
264
  finally:
265
+ # Cleanup temporary files
266
  if temp_input and os.path.exists(temp_input.name):
267
  try:
268
  os.unlink(temp_input.name)
269
  except:
270
  pass
271
 
272
+
273
+ @app.get("/info")
274
+ async def get_info():
275
+ """
276
+ Get API information and usage instructions
277
+ """
278
+ return {
279
+ 'name': 'VisioTrack API',
280
+ 'version': '1.0.0',
281
+ 'description': 'Object tracking API using SiamRPN',
282
+ 'endpoints': {
283
+ '/health': 'Health check',
284
+ '/track': 'Track object in video (POST with multipart/form-data)',
285
+ '/info': 'API information',
286
+ '/': 'Interactive API documentation (Swagger UI)'
287
+ },
288
+ 'usage': {
289
+ 'method': 'POST',
290
+ 'endpoint': '/track',
291
+ 'content_type': 'multipart/form-data',
292
+ 'parameters': {
293
+ 'video': 'Video file',
294
+ 'bbox_x': 'X coordinate (int)',
295
+ 'bbox_y': 'Y coordinate (int)',
296
+ 'bbox_w': 'Width (int)',
297
+ 'bbox_h': 'Height (int)'
298
+ }
299
+ },
300
+ 'example_curl': '''
301
+ curl -X POST "https://your-space.hf.space/track" \\
302
+ -F "video=@video.mp4" \\
303
+ -F "bbox_x=100" \\
304
+ -F "bbox_y=100" \\
305
+ -F "bbox_w=200" \\
306
+ -F "bbox_h=200" \\
307
+ -o tracked_video.mp4
308
+ '''
309
+ }
310
+
311
 
312
  @app.on_event("startup")
313
+ async def startup_event():
314
+ """Load model on startup"""
315
  logger.info("=" * 50)
316
+ logger.info("VisioTrack FastAPI Server Starting...")
317
  logger.info("=" * 50)
318
+ try:
319
+ load_tracker()
320
+ logger.info("✓ Model loaded successfully")
321
+ except Exception as e:
322
+ logger.error(f"✗ Failed to load model: {e}")
323
  logger.info("=" * 50)
324
 
325
+
326
  if __name__ == "__main__":
327
  import uvicorn
328
  uvicorn.run(app, host="0.0.0.0", port=7860)