Spaces:

devusman
/

fb-dl

Sleeping

App Files Files Community

devusman commited on Nov 11, 2025

Commit

c187eb7

1 Parent(s): a2e646a

feat: only ffmpeg

Browse files

Files changed (1) hide show

app.py +86 -111

app.py CHANGED Viewed

@@ -1,132 +1,107 @@
-from flask import Flask, render_template, request, jsonify, Response, stream_with_context
-import yt_dlp
 import os
 import shutil
-import urllib.parse
-import requests # We now use requests to stream the final content
-app = Flask(__name__)
-def get_ffmpeg_path():
-    """Finds the full path to the ffmpeg executable."""
-    return shutil.which('ffmpeg')
-def sanitize_facebook_url(url):
-    """Cleans up Facebook URLs that are wrapped in a redirect link."""
-    try:
-        parsed_url = urllib.parse.urlparse(url)
-        if 'l.facebook.com' in parsed_url.netloc:
-            query_params = urllib.parse.parse_qs(parsed_url.query)
-            if 'u' in query_params:
-                clean_url = query_params['u'][0]
-                print(f"Sanitized URL: {url} -> {clean_url}", flush=True)
-                return clean_url
-    except Exception as e:
-        print(f"Could not sanitize URL, using original. Error: {e}", flush=True)
-    return url
-@app.route('/', methods=['GET'])
-def index():
-    """Renders the main page."""
-    return render_template('index.html')
-@app.route('/download', methods=['POST'])
-def download():
     """
-    This function now acts as a streaming proxy.
-    1. Extracts the direct media URL using yt-dlp.
-    2. Streams the content from that URL directly to the user's browser.
     """
-    url = request.form.get('url')
-    output_format = request.form.get('format', 'mp4')
-    if not url:
-        return jsonify({'error': 'URL is required'}), 400
-    # --- Pre-flight checks ---
-    if not os.path.exists('cookies.txt'):
-        return jsonify({'error': '`cookies.txt` not found. Required for Facebook downloads.'}), 500
-    ffmpeg_path = get_ffmpeg_path()
-    if not ffmpeg_path:
-        return jsonify({'error': 'FFmpeg not found. It is required for format processing.'}), 500
     try:
-        clean_url = sanitize_facebook_url(url)
-        # --- yt-dlp Options to EXTRACT INFO, NOT DOWNLOAD ---
-        ydl_opts = {
-            'quiet': True, 'cookiefile': 'cookies.txt', 'noplaylist': True,
-            'ffmpeg_location': os.path.dirname(ffmpeg_path)
-        }
-        # Determine format selection for yt-dlp
-        audio_formats = ['mp3', 'm4a', 'wav']
-        if output_format in audio_formats:
-            ydl_opts['format'] = 'bestaudio/best'
-            # We request a postprocessor but won't run it; this helps select the right stream
-            ydl_opts['postprocessors'] = [{'key': 'FFmpegExtractAudio', 'preferredcodec': output_format}]
-        else: # Video formats
-             ydl_opts['format'] = f'bestvideo[ext={output_format}]+bestaudio[ext=m4a]/best[ext={output_format}]/best'
-        # --- Stage 1: Get direct media URL from Facebook ---
-        print("Extracting media information from URL...")
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info_dict = ydl.extract_info(clean_url, download=False)
-        # Find the best URL to stream from
-        stream_url = info_dict.get('url')
-        if not stream_url:
-            # Handle cases where video and audio are separate
-            requested_formats = info_dict.get('requested_formats')
-            if requested_formats:
-                # Typically, the first URL is the most relevant one (video or combined)
-                stream_url = requested_formats[0].get('url')
-            else:
-                 return jsonify({'error': 'Could not extract a downloadable URL from the provided link.'}), 500
-        title = info_dict.get('title', 'facebook_content')
-        safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c in (' ', '-', '_')]).rstrip()
-        download_name = f'{safe_title}.{output_format}'
-        # --- Stage 2: Stream the content from the direct URL to the user ---
-        print(f"Starting to stream from direct URL for: {download_name}")
-        # Make a HEAD request first to get the total size for the progress bar
-        head_req = requests.head(stream_url, allow_redirects=True, timeout=10)
-        total_size = int(head_req.headers.get('content-length', 0))
-        # Make the streaming GET request
-        stream_req = requests.get(stream_url, stream=True, allow_redirects=True, timeout=15)
-        # Check if the request was successful
-        if not stream_req.ok:
-            return jsonify({'error': f'Failed to fetch media. Status: {stream_req.status_code}'}), 500
-        def generate_content():
-            """A generator function that yields chunks of the download."""
-            for chunk in stream_req.iter_content(chunk_size=8192):
-                if chunk:
-                    yield chunk
-        # Prepare and return the streaming response
-        mime_types = {'mp4': 'video/mp4', 'webm': 'video/webm', 'mp3': 'audio/mpeg', 'm4a': 'audio/mp4', 'wav': 'audio/wav'}
-        mimetype = mime_types.get(output_format, 'application/octet-stream')
-        response = Response(stream_with_context(generate_content()), mimetype=mimetype)
-        response.headers['Content-Disposition'] = f'attachment; filename="{download_name}"'
-        # Crucially, we provide the Content-Length for the progress bar
-        if total_size > 0:
-            response.headers['Content-Length'] = total_size
-        return response
     except Exception as e:
-        error_message = str(e)
-        print(f"An unexpected error occurred: {error_message}")
-        if "private" in error_message.lower() or "login" in error_message.lower():
-            return jsonify({'error': 'This content is private or requires login. Please check `cookies.txt`.'}), 403
-        return jsonify({'error': 'An unknown error occurred. The link may be invalid or private.'}), 500
-# For local testing
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=8080, debug=True)

+# This is a NEW, separate app deployed to a server that has FFmpeg installed (e.g., Hugging Face Space with Docker)
+from flask import Flask, request, Response, stream_with_context
+import requests
+import subprocess
+import tempfile
 import os
 import shutil
+import time
+ffmpeg_app = Flask(__name__)
+# NOTE: Ensure the 'ffmpeg' binary is available in the environment's PATH
+@ffmpeg_app.route('/convert', methods=['GET'])
+def convert_media():
     """
+    Downloads the raw media file from the provided URL and converts it using FFmpeg.
     """
+    media_url = request.args.get('url')
+    target_format = request.args.get('format') # e.g., 'mp3', 'm4a', 'wav'
+    if not media_url or not target_format:
+        return "Missing 'url' or 'format' parameter", 400
+    temp_dir = tempfile.mkdtemp()
+    # Use the original format's extension for the input file to help FFmpeg
+    input_ext = media_url.split('.')[-1].split('?')[0] # Heuristic to get file extension
+    input_file_path = os.path.join(temp_dir, f"input.{input_ext}")
+    output_file_path = os.path.join(temp_dir, f"output.{target_format}")
     try:
+        start_time = time.time()
+        print(f"Starting conversion for {target_format} from URL: {media_url[:100]}...")
+        # 1. Download the raw media file to a temporary file
+        with requests.get(media_url, stream=True, timeout=120) as r:
+            r.raise_for_status()
+            content_length = int(r.headers.get('content-length', 0))
+            print(f"Raw media size: {content_length/1024/1024:.2f} MB")
+            with open(input_file_path, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        download_time = time.time()
+        print(f"Download complete. Time taken: {download_time - start_time:.2f} seconds.")
+        # 2. Run FFmpeg conversion
+        command = [
+            'ffmpeg',
+            '-y', # Overwrite output files without asking
+            '-i', input_file_path,
+            # Audio-specific options for quality and codec
+            '-q:a', '0', # Variable bitrate, highest quality
+            '-map', 'a',  # Only include audio streams
+            '-c:a', 'libmp3lame' if target_format == 'mp3' else 'aac', # Use libmp3lame for mp3, aac for m4a/wav if necessary
+            '-f', target_format,
+            output_file_path
+        ]
+        # Override codec for WAV (PCM uncompressed)
+        if target_format == 'wav':
+             command[9] = 'pcm_s16le' # Use uncompressed PCM 16-bit little-endian
+        elif target_format == 'm4a':
+             command[9] = 'aac'
+        process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=180) # 3-minute timeout
+        conversion_time = time.time()
+        print(f"FFmpeg conversion complete. Time taken: {conversion_time - download_time:.2f} seconds.")
+        # print(f"FFmpeg Output:\n{process.stderr.decode()}")
+        # 3. Stream the converted file back to the Vercel app
+        def stream_output_file():
+            with open(output_file_path, 'rb') as f:
+                chunk = True
+                while chunk:
+                    chunk = f.read(8192)
+                    yield chunk
+        mime_type = f'audio/{target_format}' if target_format != 'm4a' else 'audio/mp4' # m4a is audio/mp4
+        return Response(stream_with_context(stream_output_file()),
+                        mimetype=mime_type,
+                        # Do NOT set Content-Length as it's not possible before streaming starts
+                       )
+    except subprocess.CalledProcessError as e:
+        print(f"FFmpeg command failed with error: {e.stderr.decode()}")
+        return f"Conversion failed: {e.stderr.decode()}", 500
+    except requests.exceptions.Timeout:
+        return "Media download timed out.", 504
     except Exception as e:
+        print(f"An error occurred in the FFmpeg service: {e}")
+        return f"Internal Server Error: {e}", 500
+    finally:
+        # 4. Cleanup temporary files
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+            print(f"Cleaned up temp directory: {temp_dir}")
 if __name__ == '__main__':
+    # You would typically use a WSGI server like Gunicorn in a production Docker deployment
+    ffmpeg_app.run(host='0.0.0.0', port=5001, debug=True)