import os, re, glob, io, tempfile import gradio as gr import pandas as pd from yt_dlp import YoutubeDL MAX_IN_MEMORY_MB = 250 # keep downloads under this for in-memory delivery MAX_IN_MEMORY_BYTES = MAX_IN_MEMORY_MB * 1024 * 1024 def human_size(n): if not n or n <= 0: return "-" units = ["B","KB","MB","GB","TB"] i = 0 n = float(n) while n >= 1024 and i < len(units)-1: n /= 1024.0 i += 1 return f"{n:.1f} {units[i]}" def kind_of(fmt): v = fmt.get("vcodec") a = fmt.get("acodec") if v != "none" and a != "none": return "video+audio" if v != "none" and a == "none": return "video-only" if v == "none" and a != "none": return "audio-only" return "other" def build_menu(info): formats = info.get("formats", []) menu = [] for f in formats: menu.append({ "type": kind_of(f), "format_id": f.get("format_id"), "ext": f.get("ext"), "height": f.get("height") or 0, "width": f.get("width") or 0, "fps": f.get("fps") or 0, "vcodec": f.get("vcodec"), "acodec": f.get("acodec"), "tbr": f.get("tbr") or 0, "abr": f.get("abr") or 0, "filesize": f.get("filesize") or f.get("filesize_approx"), "format_note": f.get("format_note") or "", }) type_order = {"video+audio": 0, "video-only": 1, "audio-only": 2, "other": 3} menu.sort(key=lambda m: ( type_order.get(m["type"], 9), -(m["height"] or 0), -(m["fps"] or 0), -(m["tbr"] or 0), -(m["abr"] or 0), )) return menu def slugify(text, fallback="file"): text = (text or "").strip() if not text: return fallback text = re.sub(r"[^\w\s.-]", "", text) text = re.sub(r"\s+", "_", text).strip("._") return text or fallback def fetch_formats(url): url = (url or "").strip() if not url: return ( gr.update(choices=[], value=None), gr.update(value=None), {}, [], "Paste a YouTube URL and click Fetch." ) probe_opts = {"quiet": True, "skip_download": True, "noplaylist": True} try: with YoutubeDL(probe_opts) as ydl: info = ydl.extract_info(url, download=False) except Exception as e: return ( gr.update(choices=[], value=None), gr.update(value=None), {}, [], f"Error: {e}" ) if info.get("_type") == "playlist" and info.get("entries"): info = info["entries"][0] title = info.get("title", "Untitled") video_id = info.get("id", "video") duration = info.get("duration") dur_str = f"{duration//60}:{duration%60:02d}" if isinstance(duration, int) else "-" menu = build_menu(info) # Dropdown choices choices = ["Auto (best) — best video+audio (will merge if needed)"] for i, m in enumerate(menu): res = f"{m['height']}p" if m['height'] else "-" fps = f"{m['fps']}fps" if m['fps'] else "-" size = human_size(m["filesize"]) v = (m["vcodec"] or "-").split(".")[0][:10] a = (m["acodec"] or "-").split(".")[0][:10] label = f"#{i} {m['type']:<11} | id={m['format_id']:<6} | {m['ext']} | {res}@{fps} | v:{v} a:{a} | ~{size} | {m['format_note']}" choices.append(label) # Table for reference table_rows = [] for i, m in enumerate(menu): table_rows.append({ "#": i, "type": m["type"], "id": m["format_id"], "ext": m["ext"], "res": f"{m['height']}p" if m["height"] else "-", "fps": m["fps"] or "-", "vcodec": m["vcodec"] or "-", "acodec": m["acodec"] or "-", "size": human_size(m["filesize"]), "note": m["format_note"], }) df = pd.DataFrame(table_rows, columns=["#", "type", "id", "ext", "res", "fps", "vcodec", "acodec", "size", "note"]) info_state = {"video_id": video_id, "title": title, "url": url} status = f"Title: {title}\nDuration: {dur_str}\nFound {len(menu)} formats." return ( gr.update(choices=choices, value=choices[0]), gr.update(value=df), info_state, menu, status ) def prepare_download(url, selection, info_state, menu_state, progress=gr.Progress()): url = (url or "").strip() if not url: return gr.update(visible=False), "Paste a URL and click Fetch first." if not selection: return gr.update(visible=False), "Please select a format." video_id = (info_state or {}).get("video_id", "video") title = (info_state or {}).get("title", "Untitled") safe_base = slugify(title, fallback=video_id) # Determine format selector if selection.startswith("Auto (best)"): fmt_selector = "bestvideo*+bestaudio/best" chosen_label = "Auto (best)" else: m = re.match(r"#(\d+)\s", selection) if not m: return gr.update(visible=False), "Invalid selection label. Please fetch formats again." idx = int(m.group(1)) menu = menu_state or [] if idx < 0 or idx >= len(menu): return gr.update(visible=False), "Selected index is out of range. Please fetch formats again." sel = menu[idx] if sel["type"] == "video-only": fmt_selector = f"{sel['format_id']}+bestaudio[ext=m4a]/bestaudio" else: fmt_selector = sel["format_id"] res = f"{sel['height']}p" if sel['height'] else "-" chosen_label = f"{sel['type']} id={sel['format_id']} ({sel['ext']}, {res}@{sel['fps']}fps)" progress(0.05, desc="Starting download...") # Download to a temp dir, read into memory, then clean up with tempfile.TemporaryDirectory() as tmpdir: out_base = os.path.join(tmpdir, f"{video_id}.%(ext)s") ydl_opts = { "format": fmt_selector, "noplaylist": True, "outtmpl": out_base, "merge_output_format": "mp4", # fallback to mkv if incompatible "concurrent_fragment_downloads": 4, "quiet": True, "no_warnings": True, } try: with YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except Exception as e: return gr.update(visible=False), f"Download error: {e}" # Find the resulting file candidates = sorted(glob.glob(os.path.join(tmpdir, f"{video_id}.*")), key=os.path.getmtime, reverse=True) if not candidates: return gr.update(visible=False), "Download finished, but output file not found." final_path = candidates[0] size = os.path.getsize(final_path) if size > MAX_IN_MEMORY_BYTES: return gr.update(visible=False), ( f"Downloaded {human_size(size)} which exceeds the in-memory limit of " f"{MAX_IN_MEMORY_MB} MB for direct download. " "Please choose a lower quality or modify MAX_IN_MEMORY_MB in the app." ) progress(0.9, desc="Packaging file...") with open(final_path, "rb") as f: data = f.read() # Decide final filename ext = os.path.splitext(final_path)[1].lstrip(".") if "." in final_path else "mp4" file_name = f"{safe_base}.{ext}" status = ( f"Ready: {title}\n" f"Selected: {chosen_label}\n" f"Format selector: {fmt_selector}\n" f"Size: {human_size(len(data))}\n" "Click the Download button to save to your computer." ) # Return bytes to DownloadButton so it downloads directly without persisting on server return gr.update(value=data, file_name=file_name, visible=True), status with gr.Blocks(title="YouTube Downloader (yt-dlp)", theme="soft") as demo: gr.Markdown( "## YouTube Downloader (yt-dlp + Gradio on Hugging Face)\n" "- Paste a YouTube URL, Fetch formats, choose a quality, then Prepare download.\n" "- The Download button sends the file directly to your browser.\n\n" "Only download content you have rights to." ) with gr.Row(): url_in = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...", lines=1) fetch_btn = gr.Button("Fetch formats", variant="primary") status_box = gr.Textbox(label="Info / Status", interactive=False, lines=4) formats_table = gr.DataFrame(label="Available formats (read-only)", interactive=False, wrap=True) format_dd = gr.Dropdown(label="Choose a format", choices=[], value=None) with gr.Row(): prep_btn = gr.Button("Prepare download", variant="primary") dl_btn = gr.DownloadButton("Download file", visible=False) # Hidden state info_state = gr.State({}) menu_state = gr.State([]) fetch_btn.click(fetch_formats, inputs=[url_in], outputs=[format_dd, formats_table, info_state, menu_state, status_box]) prep_btn.click(prepare_download, inputs=[url_in, format_dd, info_state, menu_state], outputs=[dl_btn, status_box]) # On Spaces, just running this script is enough if __name__ == "__main__": demo.queue().launch()