Spaces:
Sleeping
Sleeping
bahaeddinmselmi commited on
Commit ·
4b16a94
1
Parent(s): c9f75f9
fix(analyzer): robust FFmpeg streaming with user-agent and reconnection
Browse files- app/services/downloader.py +20 -5
- app/services/pipeline.py +9 -10
- main.py +1 -1
app/services/downloader.py
CHANGED
|
@@ -462,9 +462,17 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
|
|
| 462 |
fps = max_frames / duration
|
| 463 |
|
| 464 |
# FFmpeg reads directly from the URL (no disk write for video)
|
| 465 |
-
#
|
| 466 |
cmd = [
|
| 467 |
'ffmpeg',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
'-t', str(duration), # Only process first N seconds
|
| 469 |
'-i', video_url, # Input directly from URL
|
| 470 |
'-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
|
|
@@ -477,14 +485,13 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
|
|
| 477 |
result = subprocess.run(
|
| 478 |
cmd,
|
| 479 |
capture_output=True,
|
| 480 |
-
timeout=90
|
| 481 |
-
env={**os.environ, 'FFMPEG_HTTP_SEEKABLE': '1'}
|
| 482 |
)
|
| 483 |
|
| 484 |
if result.returncode != 0:
|
| 485 |
stderr = result.stderr.decode()
|
| 486 |
print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
|
| 487 |
-
print(f"[{job_id}] FFmpeg stderr (first
|
| 488 |
else:
|
| 489 |
print(f"[{job_id}] FFmpeg extraction successful")
|
| 490 |
|
|
@@ -521,6 +528,14 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
|
|
| 521 |
try:
|
| 522 |
cmd = [
|
| 523 |
'ffmpeg',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
'-t', str(duration), # Only first N seconds
|
| 525 |
'-i', source_url, # Input from URL
|
| 526 |
'-vn', # No video
|
|
@@ -536,7 +551,7 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
|
|
| 536 |
if result.returncode != 0:
|
| 537 |
stderr = result.stderr.decode()
|
| 538 |
print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
|
| 539 |
-
print(f"[{job_id}] Audio stderr (first
|
| 540 |
else:
|
| 541 |
print(f"[{job_id}] Audio extraction successful")
|
| 542 |
|
|
|
|
| 462 |
fps = max_frames / duration
|
| 463 |
|
| 464 |
# FFmpeg reads directly from the URL (no disk write for video)
|
| 465 |
+
# Added User-Agent and reconnection flags to be more robust
|
| 466 |
cmd = [
|
| 467 |
'ffmpeg',
|
| 468 |
+
'-hide_banner',
|
| 469 |
+
'-loglevel', 'error', # Only log errors
|
| 470 |
+
'-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 471 |
+
'-timeout', '10000000', # 10s timeout
|
| 472 |
+
'-reconnect', '1',
|
| 473 |
+
'-reconnect_at_eof', '1',
|
| 474 |
+
'-reconnect_streamed', '1',
|
| 475 |
+
'-reconnect_delay_max', '2',
|
| 476 |
'-t', str(duration), # Only process first N seconds
|
| 477 |
'-i', video_url, # Input directly from URL
|
| 478 |
'-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
|
|
|
|
| 485 |
result = subprocess.run(
|
| 486 |
cmd,
|
| 487 |
capture_output=True,
|
| 488 |
+
timeout=90
|
|
|
|
| 489 |
)
|
| 490 |
|
| 491 |
if result.returncode != 0:
|
| 492 |
stderr = result.stderr.decode()
|
| 493 |
print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
|
| 494 |
+
print(f"[{job_id}] FFmpeg stderr (first 1000 chars): {stderr[:1000]}")
|
| 495 |
else:
|
| 496 |
print(f"[{job_id}] FFmpeg extraction successful")
|
| 497 |
|
|
|
|
| 528 |
try:
|
| 529 |
cmd = [
|
| 530 |
'ffmpeg',
|
| 531 |
+
'-hide_banner',
|
| 532 |
+
'-loglevel', 'error',
|
| 533 |
+
'-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 534 |
+
'-timeout', '10000000',
|
| 535 |
+
'-reconnect', '1',
|
| 536 |
+
'-reconnect_at_eof', '1',
|
| 537 |
+
'-reconnect_streamed', '1',
|
| 538 |
+
'-reconnect_delay_max', '2',
|
| 539 |
'-t', str(duration), # Only first N seconds
|
| 540 |
'-i', source_url, # Input from URL
|
| 541 |
'-vn', # No video
|
|
|
|
| 551 |
if result.returncode != 0:
|
| 552 |
stderr = result.stderr.decode()
|
| 553 |
print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
|
| 554 |
+
print(f"[{job_id}] Audio stderr (first 1000 chars): {stderr[:1000]}")
|
| 555 |
else:
|
| 556 |
print(f"[{job_id}] Audio extraction successful")
|
| 557 |
|
app/services/pipeline.py
CHANGED
|
@@ -85,28 +85,27 @@ async def run_analysis_pipeline(job_id: str, url: str, uploaded_file_path: str,
|
|
| 85 |
|
| 86 |
# PATH A: URL
|
| 87 |
if url and not uploaded_file_path:
|
| 88 |
-
print(f"[{job_id}]
|
| 89 |
frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
|
| 90 |
|
| 91 |
if not frame_paths:
|
| 92 |
-
print(f"[{job_id}]
|
| 93 |
video_path = download_video(url, job_id)
|
| 94 |
if video_path and os.path.exists(video_path):
|
| 95 |
-
print(f"[{job_id}]
|
| 96 |
frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
|
| 97 |
audio_path = extract_audio(video_path, job_id)
|
| 98 |
elif is_youtube_url(url):
|
| 99 |
-
print(f"[{job_id}]
|
| 100 |
frame_paths = download_youtube_thumbnail(url, job_id)
|
| 101 |
thumbnail_only = True
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
jobs_db[job_id] = {"status": "failed", "error": error_msg}
|
| 107 |
return
|
| 108 |
else:
|
| 109 |
-
print(f"[{job_id}]
|
| 110 |
audio_path = stream_extract_audio(url, job_id, duration=30)
|
| 111 |
|
| 112 |
# PATH B: Upload
|
|
|
|
| 85 |
|
| 86 |
# PATH A: URL
|
| 87 |
if url and not uploaded_file_path:
|
| 88 |
+
print(f"[{job_id}] Attempting stream extraction...")
|
| 89 |
frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
|
| 90 |
|
| 91 |
if not frame_paths:
|
| 92 |
+
print(f"[{job_id}] Stream extraction failed, attempting full download...")
|
| 93 |
video_path = download_video(url, job_id)
|
| 94 |
if video_path and os.path.exists(video_path):
|
| 95 |
+
print(f"[{job_id}] Download successful, extracting frames from file...")
|
| 96 |
frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
|
| 97 |
audio_path = extract_audio(video_path, job_id)
|
| 98 |
elif is_youtube_url(url):
|
| 99 |
+
print(f"[{job_id}] YouTube video blocked, attempting thumbnail fallback...")
|
| 100 |
frame_paths = download_youtube_thumbnail(url, job_id)
|
| 101 |
thumbnail_only = True
|
| 102 |
+
else:
|
| 103 |
+
msg = "Could not download video or extract frames (All layers failed)"
|
| 104 |
+
print(f"[{job_id}] ERROR: {msg}")
|
| 105 |
+
jobs_db[job_id] = {"status": "failed", "error": msg}
|
|
|
|
| 106 |
return
|
| 107 |
else:
|
| 108 |
+
print(f"[{job_id}] Stream extraction successful, extracting audio stream...")
|
| 109 |
audio_path = stream_extract_audio(url, job_id, duration=30)
|
| 110 |
|
| 111 |
# PATH B: Upload
|
main.py
CHANGED
|
@@ -61,7 +61,7 @@ app.add_middleware(
|
|
| 61 |
async def add_security_headers(request: Request, call_next):
|
| 62 |
response = await call_next(request)
|
| 63 |
response.headers["X-Content-Type-Options"] = "nosniff"
|
| 64 |
-
response.headers["X-Frame-Options"] = "
|
| 65 |
response.headers["X-XSS-Protection"] = "1; mode=block"
|
| 66 |
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
| 67 |
return response
|
|
|
|
| 61 |
async def add_security_headers(request: Request, call_next):
|
| 62 |
response = await call_next(request)
|
| 63 |
response.headers["X-Content-Type-Options"] = "nosniff"
|
| 64 |
+
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
| 65 |
response.headers["X-XSS-Protection"] = "1; mode=block"
|
| 66 |
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
| 67 |
return response
|