devusman commited on
Commit
c187eb7
·
1 Parent(s): a2e646a

feat: only ffmpeg

Browse files
Files changed (1) hide show
  1. app.py +86 -111
app.py CHANGED
@@ -1,132 +1,107 @@
1
- from flask import Flask, render_template, request, jsonify, Response, stream_with_context
2
- import yt_dlp
 
 
 
 
3
  import os
4
  import shutil
5
- import urllib.parse
6
- import requests # We now use requests to stream the final content
7
-
8
- app = Flask(__name__)
9
 
10
- def get_ffmpeg_path():
11
- """Finds the full path to the ffmpeg executable."""
12
- return shutil.which('ffmpeg')
13
 
14
- def sanitize_facebook_url(url):
15
- """Cleans up Facebook URLs that are wrapped in a redirect link."""
16
- try:
17
- parsed_url = urllib.parse.urlparse(url)
18
- if 'l.facebook.com' in parsed_url.netloc:
19
- query_params = urllib.parse.parse_qs(parsed_url.query)
20
- if 'u' in query_params:
21
- clean_url = query_params['u'][0]
22
- print(f"Sanitized URL: {url} -> {clean_url}", flush=True)
23
- return clean_url
24
- except Exception as e:
25
- print(f"Could not sanitize URL, using original. Error: {e}", flush=True)
26
- return url
27
 
28
- @app.route('/', methods=['GET'])
29
- def index():
30
- """Renders the main page."""
31
- return render_template('index.html')
32
-
33
- @app.route('/download', methods=['POST'])
34
- def download():
35
  """
36
- This function now acts as a streaming proxy.
37
- 1. Extracts the direct media URL using yt-dlp.
38
- 2. Streams the content from that URL directly to the user's browser.
39
  """
40
- url = request.form.get('url')
41
- output_format = request.form.get('format', 'mp4')
42
 
43
- if not url:
44
- return jsonify({'error': 'URL is required'}), 400
45
 
46
- # --- Pre-flight checks ---
47
- if not os.path.exists('cookies.txt'):
48
- return jsonify({'error': '`cookies.txt` not found. Required for Facebook downloads.'}), 500
49
 
50
- ffmpeg_path = get_ffmpeg_path()
51
- if not ffmpeg_path:
52
- return jsonify({'error': 'FFmpeg not found. It is required for format processing.'}), 500
 
53
 
54
  try:
55
- clean_url = sanitize_facebook_url(url)
 
 
 
 
 
 
 
 
 
 
56
 
57
- # --- yt-dlp Options to EXTRACT INFO, NOT DOWNLOAD ---
58
- ydl_opts = {
59
- 'quiet': True, 'cookiefile': 'cookies.txt', 'noplaylist': True,
60
- 'ffmpeg_location': os.path.dirname(ffmpeg_path)
61
- }
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Determine format selection for yt-dlp
64
- audio_formats = ['mp3', 'm4a', 'wav']
65
- if output_format in audio_formats:
66
- ydl_opts['format'] = 'bestaudio/best'
67
- # We request a postprocessor but won't run it; this helps select the right stream
68
- ydl_opts['postprocessors'] = [{'key': 'FFmpegExtractAudio', 'preferredcodec': output_format}]
69
- else: # Video formats
70
- ydl_opts['format'] = f'bestvideo[ext={output_format}]+bestaudio[ext=m4a]/best[ext={output_format}]/best'
71
-
72
- # --- Stage 1: Get direct media URL from Facebook ---
73
- print("Extracting media information from URL...")
74
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
75
- info_dict = ydl.extract_info(clean_url, download=False)
76
-
77
- # Find the best URL to stream from
78
- stream_url = info_dict.get('url')
79
- if not stream_url:
80
- # Handle cases where video and audio are separate
81
- requested_formats = info_dict.get('requested_formats')
82
- if requested_formats:
83
- # Typically, the first URL is the most relevant one (video or combined)
84
- stream_url = requested_formats[0].get('url')
85
- else:
86
- return jsonify({'error': 'Could not extract a downloadable URL from the provided link.'}), 500
87
 
88
- title = info_dict.get('title', 'facebook_content')
89
- safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c in (' ', '-', '_')]).rstrip()
90
- download_name = f'{safe_title}.{output_format}'
91
 
92
- # --- Stage 2: Stream the content from the direct URL to the user ---
93
- print(f"Starting to stream from direct URL for: {download_name}")
94
- # Make a HEAD request first to get the total size for the progress bar
95
- head_req = requests.head(stream_url, allow_redirects=True, timeout=10)
96
- total_size = int(head_req.headers.get('content-length', 0))
97
-
98
- # Make the streaming GET request
99
- stream_req = requests.get(stream_url, stream=True, allow_redirects=True, timeout=15)
100
-
101
- # Check if the request was successful
102
- if not stream_req.ok:
103
- return jsonify({'error': f'Failed to fetch media. Status: {stream_req.status_code}'}), 500
104
-
105
- def generate_content():
106
- """A generator function that yields chunks of the download."""
107
- for chunk in stream_req.iter_content(chunk_size=8192):
108
- if chunk:
109
- yield chunk
110
 
111
- # Prepare and return the streaming response
112
- mime_types = {'mp4': 'video/mp4', 'webm': 'video/webm', 'mp3': 'audio/mpeg', 'm4a': 'audio/mp4', 'wav': 'audio/wav'}
113
- mimetype = mime_types.get(output_format, 'application/octet-stream')
114
-
115
- response = Response(stream_with_context(generate_content()), mimetype=mimetype)
116
- response.headers['Content-Disposition'] = f'attachment; filename="{download_name}"'
117
- # Crucially, we provide the Content-Length for the progress bar
118
- if total_size > 0:
119
- response.headers['Content-Length'] = total_size
120
-
121
- return response
122
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  except Exception as e:
124
- error_message = str(e)
125
- print(f"An unexpected error occurred: {error_message}")
126
- if "private" in error_message.lower() or "login" in error_message.lower():
127
- return jsonify({'error': 'This content is private or requires login. Please check `cookies.txt`.'}), 403
128
- return jsonify({'error': 'An unknown error occurred. The link may be invalid or private.'}), 500
 
 
 
129
 
130
- # For local testing
131
  if __name__ == '__main__':
132
- app.run(host='0.0.0.0', port=8080, debug=True)
 
 
1
+ # This is a NEW, separate app deployed to a server that has FFmpeg installed (e.g., Hugging Face Space with Docker)
2
+
3
+ from flask import Flask, request, Response, stream_with_context
4
+ import requests
5
+ import subprocess
6
+ import tempfile
7
  import os
8
  import shutil
9
+ import time
 
 
 
10
 
11
+ ffmpeg_app = Flask(__name__)
 
 
12
 
13
+ # NOTE: Ensure the 'ffmpeg' binary is available in the environment's PATH
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ @ffmpeg_app.route('/convert', methods=['GET'])
16
+ def convert_media():
 
 
 
 
 
17
  """
18
+ Downloads the raw media file from the provided URL and converts it using FFmpeg.
 
 
19
  """
20
+ media_url = request.args.get('url')
21
+ target_format = request.args.get('format') # e.g., 'mp3', 'm4a', 'wav'
22
 
23
+ if not media_url or not target_format:
24
+ return "Missing 'url' or 'format' parameter", 400
25
 
26
+ temp_dir = tempfile.mkdtemp()
 
 
27
 
28
+ # Use the original format's extension for the input file to help FFmpeg
29
+ input_ext = media_url.split('.')[-1].split('?')[0] # Heuristic to get file extension
30
+ input_file_path = os.path.join(temp_dir, f"input.{input_ext}")
31
+ output_file_path = os.path.join(temp_dir, f"output.{target_format}")
32
 
33
  try:
34
+ start_time = time.time()
35
+ print(f"Starting conversion for {target_format} from URL: {media_url[:100]}...")
36
+
37
+ # 1. Download the raw media file to a temporary file
38
+ with requests.get(media_url, stream=True, timeout=120) as r:
39
+ r.raise_for_status()
40
+ content_length = int(r.headers.get('content-length', 0))
41
+ print(f"Raw media size: {content_length/1024/1024:.2f} MB")
42
+ with open(input_file_path, 'wb') as f:
43
+ for chunk in r.iter_content(chunk_size=8192):
44
+ f.write(chunk)
45
 
46
+ download_time = time.time()
47
+ print(f"Download complete. Time taken: {download_time - start_time:.2f} seconds.")
48
+
49
+ # 2. Run FFmpeg conversion
50
+ command = [
51
+ 'ffmpeg',
52
+ '-y', # Overwrite output files without asking
53
+ '-i', input_file_path,
54
+ # Audio-specific options for quality and codec
55
+ '-q:a', '0', # Variable bitrate, highest quality
56
+ '-map', 'a', # Only include audio streams
57
+ '-c:a', 'libmp3lame' if target_format == 'mp3' else 'aac', # Use libmp3lame for mp3, aac for m4a/wav if necessary
58
+ '-f', target_format,
59
+ output_file_path
60
+ ]
61
 
62
+ # Override codec for WAV (PCM uncompressed)
63
+ if target_format == 'wav':
64
+ command[9] = 'pcm_s16le' # Use uncompressed PCM 16-bit little-endian
65
+ elif target_format == 'm4a':
66
+ command[9] = 'aac'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
68
 
69
+ process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=180) # 3-minute timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ conversion_time = time.time()
72
+ print(f"FFmpeg conversion complete. Time taken: {conversion_time - download_time:.2f} seconds.")
73
+ # print(f"FFmpeg Output:\n{process.stderr.decode()}")
74
+
75
+ # 3. Stream the converted file back to the Vercel app
76
+ def stream_output_file():
77
+ with open(output_file_path, 'rb') as f:
78
+ chunk = True
79
+ while chunk:
80
+ chunk = f.read(8192)
81
+ yield chunk
82
 
83
+ mime_type = f'audio/{target_format}' if target_format != 'm4a' else 'audio/mp4' # m4a is audio/mp4
84
+
85
+ return Response(stream_with_context(stream_output_file()),
86
+ mimetype=mime_type,
87
+ # Do NOT set Content-Length as it's not possible before streaming starts
88
+ )
89
+
90
+ except subprocess.CalledProcessError as e:
91
+ print(f"FFmpeg command failed with error: {e.stderr.decode()}")
92
+ return f"Conversion failed: {e.stderr.decode()}", 500
93
+ except requests.exceptions.Timeout:
94
+ return "Media download timed out.", 504
95
  except Exception as e:
96
+ print(f"An error occurred in the FFmpeg service: {e}")
97
+ return f"Internal Server Error: {e}", 500
98
+
99
+ finally:
100
+ # 4. Cleanup temporary files
101
+ if os.path.exists(temp_dir):
102
+ shutil.rmtree(temp_dir)
103
+ print(f"Cleaned up temp directory: {temp_dir}")
104
 
 
105
  if __name__ == '__main__':
106
+ # You would typically use a WSGI server like Gunicorn in a production Docker deployment
107
+ ffmpeg_app.run(host='0.0.0.0', port=5001, debug=True)