Spaces:

bahaeddinms
/

verivid

Sleeping

App Files Files Community

bahaeddinmselmi commited on Feb 12

Commit

4b16a94

1 Parent(s): c9f75f9

fix(analyzer): robust FFmpeg streaming with user-agent and reconnection

Browse files

Files changed (3) hide show

app/services/downloader.py +20 -5
app/services/pipeline.py +9 -10
main.py +1 -1

app/services/downloader.py CHANGED Viewed

@@ -462,9 +462,17 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
         fps = max_frames / duration
         # FFmpeg reads directly from the URL (no disk write for video)
-        # Optimized: Scale down to max 1280px width to save RAM/Time
         cmd = [
             'ffmpeg',
             '-t', str(duration),           # Only process first N seconds
             '-i', video_url,               # Input directly from URL
             '-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
@@ -477,14 +485,13 @@ def stream_extract_frames(url: str, job_id: str, max_frames: int = 5, duration:
         result = subprocess.run(
             cmd,
             capture_output=True,
-            timeout=90,
-            env={**os.environ, 'FFMPEG_HTTP_SEEKABLE': '1'}
         )
         if result.returncode != 0:
             stderr = result.stderr.decode()
             print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
-            print(f"[{job_id}] FFmpeg stderr (first 500 chars): {stderr[:500]}")
         else:
             print(f"[{job_id}] FFmpeg extraction successful")
@@ -521,6 +528,14 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
     try:
         cmd = [
             'ffmpeg',
             '-t', str(duration),       # Only first N seconds
             '-i', source_url,          # Input from URL
             '-vn',                     # No video
@@ -536,7 +551,7 @@ def stream_extract_audio(url: str, job_id: str, duration: int = 30) -> str:
         if result.returncode != 0:
             stderr = result.stderr.decode()
             print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
-            print(f"[{job_id}] Audio stderr (first 300 chars): {stderr[:300]}")
         else:
             print(f"[{job_id}] Audio extraction successful")

         fps = max_frames / duration
         # FFmpeg reads directly from the URL (no disk write for video)
+        # Added User-Agent and reconnection flags to be more robust
         cmd = [
             'ffmpeg',
+            '-hide_banner',
+            '-loglevel', 'error',          # Only log errors
+            '-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            '-timeout', '10000000',        # 10s timeout
+            '-reconnect', '1',
+            '-reconnect_at_eof', '1',
+            '-reconnect_streamed', '1',
+            '-reconnect_delay_max', '2',
             '-t', str(duration),           # Only process first N seconds
             '-i', video_url,               # Input directly from URL
             '-vf', f'scale=min(1280,iw):-1,fps={fps}', # Extract at calculated fps + Scaled
         result = subprocess.run(
             cmd,
             capture_output=True,
+            timeout=90
         )
         if result.returncode != 0:
             stderr = result.stderr.decode()
             print(f"[{job_id}] FFmpeg Error (Return Code {result.returncode}):")
+            print(f"[{job_id}] FFmpeg stderr (first 1000 chars): {stderr[:1000]}")
         else:
             print(f"[{job_id}] FFmpeg extraction successful")
     try:
         cmd = [
             'ffmpeg',
+            '-hide_banner',
+            '-loglevel', 'error',
+            '-user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            '-timeout', '10000000',
+            '-reconnect', '1',
+            '-reconnect_at_eof', '1',
+            '-reconnect_streamed', '1',
+            '-reconnect_delay_max', '2',
             '-t', str(duration),       # Only first N seconds
             '-i', source_url,          # Input from URL
             '-vn',                     # No video
         if result.returncode != 0:
             stderr = result.stderr.decode()
             print(f"[{job_id}] Audio extraction Error (Return Code {result.returncode}):")
+            print(f"[{job_id}] Audio stderr (first 1000 chars): {stderr[:1000]}")
         else:
             print(f"[{job_id}] Audio extraction successful")

app/services/pipeline.py CHANGED Viewed

@@ -85,28 +85,27 @@ async def run_analysis_pipeline(job_id: str, url: str, uploaded_file_path: str,
         # PATH A: URL
         if url and not uploaded_file_path:
-            print(f"[{job_id}] Path A: URL analysis")
             frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
             if not frame_paths:
-                print(f"[{job_id}] Streaming failed, falling back to full download")
                 video_path = download_video(url, job_id)
                 if video_path and os.path.exists(video_path):
-                    print(f"[{job_id}] Full download successful, extracting frames")
                     frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
                     audio_path = extract_audio(video_path, job_id)
                 elif is_youtube_url(url):
-                    print(f"[{job_id}] Full download failed for YouTube, trying thumbnail fallback")
                     frame_paths = download_youtube_thumbnail(url, job_id)
                     thumbnail_only = True
-                if not frame_paths:
-                    error_msg = "Could not download video or extract frames (All layers failed)"
-                    print(f"[{job_id}] ERROR: {error_msg}")
-                    jobs_db[job_id] = {"status": "failed", "error": error_msg}
                     return
             else:
-                print(f"[{job_id}] Streaming successful, extracting audio")
                 audio_path = stream_extract_audio(url, job_id, duration=30)
         # PATH B: Upload

         # PATH A: URL
         if url and not uploaded_file_path:
+            print(f"[{job_id}] Attempting stream extraction...")
             frame_paths = stream_extract_frames(url, job_id, max_frames=8, duration=30)
             if not frame_paths:
+                print(f"[{job_id}] Stream extraction failed, attempting full download...")
                 video_path = download_video(url, job_id)
                 if video_path and os.path.exists(video_path):
+                    print(f"[{job_id}] Download successful, extracting frames from file...")
                     frame_paths = extract_frames(video_path, job_id, fps=0.5, max_frames=8)
                     audio_path = extract_audio(video_path, job_id)
                 elif is_youtube_url(url):
+                    print(f"[{job_id}] YouTube video blocked, attempting thumbnail fallback...")
                     frame_paths = download_youtube_thumbnail(url, job_id)
                     thumbnail_only = True
+                else:
+                    msg = "Could not download video or extract frames (All layers failed)"
+                    print(f"[{job_id}] ERROR: {msg}")
+                    jobs_db[job_id] = {"status": "failed", "error": msg}
                     return
             else:
+                print(f"[{job_id}] Stream extraction successful, extracting audio stream...")
                 audio_path = stream_extract_audio(url, job_id, duration=30)
         # PATH B: Upload

main.py CHANGED Viewed

@@ -61,7 +61,7 @@ app.add_middleware(
 async def add_security_headers(request: Request, call_next):
     response = await call_next(request)
     response.headers["X-Content-Type-Options"] = "nosniff"
-    response.headers["X-Frame-Options"] = "DENY"
     response.headers["X-XSS-Protection"] = "1; mode=block"
     response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
     return response

 async def add_security_headers(request: Request, call_next):
     response = await call_next(request)
     response.headers["X-Content-Type-Options"] = "nosniff"
+    response.headers["X-Frame-Options"] = "SAMEORIGIN"
     response.headers["X-XSS-Protection"] = "1; mode=block"
     response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
     return response