bahaeddinmselmi commited on
Commit
4b16a94
·
1 Parent(s): c9f75f9

fix(analyzer): robust FFmpeg streaming with user-agent and reconnection

Browse files
Files changed (3) hide show
  1. app/services/downloader.py +20 -5
  2. app/services/pipeline.py +9 -10
  3. main.py +1 -1
app/services/downloader.py CHANGED
@@ -462,9 +462,17 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
462
  fps = max_frames / duration
463
 
464
  # FFmpeg reads directly from the URL (no disk write for video)
465
- # Optimized: Scale down to max 1280px width to save RAM/Time
466
  cmd = [
467
  'ffmpeg',
 
 
 
 
 
 
 
 
468
  '-t', str(duration), # Only process first N seconds
469
  '-i', video_url, # Input directly from URL
470
  '-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
@@ -477,14 +485,13 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
477
  result = subprocess.run(
478
  cmd,
479
  capture_output=True,
480
- timeout=90,
481
- env={**os.environ, 'FFMPEG_HTTP_SEEKABLE': '1'}
482
  )
483
 
484
  if result.returncode != 0:
485
  stderr = result.stderr.decode()
486
  print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
487
- print(f"[{job_id}] FFmpeg stderr (first 500 chars): {stderr[:500]}")
488
  else:
489
  print(f"[{job_id}] FFmpeg extraction successful")
490
 
@@ -521,6 +528,14 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
521
  try:
522
  cmd = [
523
  'ffmpeg',
 
 
 
 
 
 
 
 
524
  '-t', str(duration), # Only first N seconds
525
  '-i', source_url, # Input from URL
526
  '-vn', # No video
@@ -536,7 +551,7 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
536
  if result.returncode != 0:
537
  stderr = result.stderr.decode()
538
  print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
539
- print(f"[{job_id}] Audio stderr (first 300 chars): {stderr[:300]}")
540
  else:
541
  print(f"[{job_id}] Audio extraction successful")
542
 
 
462
  fps = max_frames / duration
463
 
464
  # FFmpeg reads directly from the URL (no disk write for video)
465
+ # Added User-Agent and reconnection flags to be more robust
466
  cmd = [
467
  'ffmpeg',
468
+ '-hide_banner',
469
+ '-loglevel', 'error', # Only log errors
470
+ '-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
471
+ '-timeout', '10000000', # 10s timeout
472
+ '-reconnect', '1',
473
+ '-reconnect_at_eof', '1',
474
+ '-reconnect_streamed', '1',
475
+ '-reconnect_delay_max', '2',
476
  '-t', str(duration), # Only process first N seconds
477
  '-i', video_url, # Input directly from URL
478
  '-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
 
485
  result = subprocess.run(
486
  cmd,
487
  capture_output=True,
488
+ timeout=90
 
489
  )
490
 
491
  if result.returncode != 0:
492
  stderr = result.stderr.decode()
493
  print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
494
+ print(f"[{job_id}] FFmpeg stderr (first 1000 chars): {stderr[:1000]}")
495
  else:
496
  print(f"[{job_id}] FFmpeg extraction successful")
497
 
 
528
  try:
529
  cmd = [
530
  'ffmpeg',
531
+ '-hide_banner',
532
+ '-loglevel', 'error',
533
+ '-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
534
+ '-timeout', '10000000',
535
+ '-reconnect', '1',
536
+ '-reconnect_at_eof', '1',
537
+ '-reconnect_streamed', '1',
538
+ '-reconnect_delay_max', '2',
539
  '-t', str(duration), # Only first N seconds
540
  '-i', source_url, # Input from URL
541
  '-vn', # No video
 
551
  if result.returncode != 0:
552
  stderr = result.stderr.decode()
553
  print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
554
+ print(f"[{job_id}] Audio stderr (first 1000 chars): {stderr[:1000]}")
555
  else:
556
  print(f"[{job_id}] Audio extraction successful")
557
 
app/services/pipeline.py CHANGED
@@ -85,28 +85,27 @@ async def run_analysis_pipeline(job_id: str, url: str, uploaded_file_path: str,
85
 
86
  # PATH A: URL
87
  if url and not uploaded_file_path:
88
- print(f"[{job_id}] Path A: URL analysis")
89
  frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
90
 
91
  if not frame_paths:
92
- print(f"[{job_id}] Streaming failed, falling back to full download")
93
  video_path = download_video(url, job_id)
94
  if video_path and os.path.exists(video_path):
95
- print(f"[{job_id}] Full download successful, extracting frames")
96
  frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
97
  audio_path = extract_audio(video_path, job_id)
98
  elif is_youtube_url(url):
99
- print(f"[{job_id}] Full download failed for YouTube, trying thumbnail fallback")
100
  frame_paths = download_youtube_thumbnail(url, job_id)
101
  thumbnail_only = True
102
-
103
- if not frame_paths:
104
- error_msg = "Could not download video or extract frames (All layers failed)"
105
- print(f"[{job_id}] ERROR: {error_msg}")
106
- jobs_db[job_id] = {"status": "failed", "error": error_msg}
107
  return
108
  else:
109
- print(f"[{job_id}] Streaming successful, extracting audio")
110
  audio_path = stream_extract_audio(url, job_id, duration=30)
111
 
112
  # PATH B: Upload
 
85
 
86
  # PATH A: URL
87
  if url and not uploaded_file_path:
88
+ print(f"[{job_id}] Attempting stream extraction...")
89
  frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
90
 
91
  if not frame_paths:
92
+ print(f"[{job_id}] Stream extraction failed, attempting full download...")
93
  video_path = download_video(url, job_id)
94
  if video_path and os.path.exists(video_path):
95
+ print(f"[{job_id}] Download successful, extracting frames from file...")
96
  frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
97
  audio_path = extract_audio(video_path, job_id)
98
  elif is_youtube_url(url):
99
+ print(f"[{job_id}] YouTube video blocked, attempting thumbnail fallback...")
100
  frame_paths = download_youtube_thumbnail(url, job_id)
101
  thumbnail_only = True
102
+ else:
103
+ msg = "Could not download video or extract frames (All layers failed)"
104
+ print(f"[{job_id}] ERROR: {msg}")
105
+ jobs_db[job_id] = {"status": "failed", "error": msg}
 
106
  return
107
  else:
108
+ print(f"[{job_id}] Stream extraction successful, extracting audio stream...")
109
  audio_path = stream_extract_audio(url, job_id, duration=30)
110
 
111
  # PATH B: Upload
main.py CHANGED
@@ -61,7 +61,7 @@ app.add_middleware(
61
  async def add_security_headers(request: Request, call_next):
62
  response = await call_next(request)
63
  response.headers["X-Content-Type-Options"] = "nosniff"
64
- response.headers["X-Frame-Options"] = "DENY"
65
  response.headers["X-XSS-Protection"] = "1; mode=block"
66
  response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
67
  return response
 
61
  async def add_security_headers(request: Request, call_next):
62
  response = await call_next(request)
63
  response.headers["X-Content-Type-Options"] = "nosniff"
64
+ response.headers["X-Frame-Options"] = "SAMEORIGIN"
65
  response.headers["X-XSS-Protection"] = "1; mode=block"
66
  response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
67
  return response