Spaces:
Paused
Paused
| """ | |
| YouTube video downloader module using yt-dlp | |
| """ | |
| import os | |
| import random | |
| import subprocess | |
| import yt_dlp | |
| def cleanup_video_file(video_path): | |
| """ | |
| Delete a specific video file after processing | |
| Args: | |
| video_path (str): Path to the video file to delete | |
| Returns: | |
| bool: True if file was deleted successfully, False otherwise | |
| """ | |
| try: | |
| if os.path.exists(video_path): | |
| os.remove(video_path) | |
| print(f"Cleaned up video file: {video_path}") | |
| return True | |
| else: | |
| print(f"Video file not found for cleanup: {video_path}") | |
| return False | |
| except Exception as e: | |
| print(f"Error cleaning up video file {video_path}: {str(e)}") | |
| return False | |
| def cleanup_downloads_directory(output_dir="downloads", keep_annotated=True): | |
| """ | |
| Clean up downloaded videos from the downloads directory | |
| Args: | |
| output_dir (str): Directory containing downloaded videos | |
| keep_annotated (bool): Whether to keep annotated videos (default: True) | |
| Returns: | |
| dict: Cleanup results with files removed and space freed | |
| """ | |
| try: | |
| if not os.path.exists(output_dir): | |
| return {"files_removed": 0, "space_freed_mb": 0} | |
| files_removed = 0 | |
| space_freed = 0 | |
| for filename in os.listdir(output_dir): | |
| file_path = os.path.join(output_dir, filename) | |
| # Skip if not a file | |
| if not os.path.isfile(file_path): | |
| continue | |
| # Skip annotated videos if keep_annotated is True | |
| if keep_annotated and "_annotated" in filename: | |
| continue | |
| # Skip pro reference videos (they can be reused) | |
| if "pro_reference" in filename: | |
| continue | |
| # Get file size before deletion | |
| try: | |
| file_size = os.path.getsize(file_path) | |
| space_freed += file_size | |
| # Remove the file | |
| os.remove(file_path) | |
| files_removed += 1 | |
| print(f"Cleaned up: {filename}") | |
| except Exception as e: | |
| print(f"Error removing {filename}: {str(e)}") | |
| # Convert bytes to MB | |
| space_freed_mb = space_freed / (1024 * 1024) | |
| return { | |
| "files_removed": files_removed, | |
| "space_freed_mb": round(space_freed_mb, 2) | |
| } | |
| except Exception as e: | |
| print(f"Error during cleanup: {str(e)}") | |
| return {"error": str(e)} | |
| def get_user_agents(): | |
| """Get a list of common user agents to rotate between""" | |
| return [ | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', | |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0', | |
| ] | |
| def try_extract_browser_cookies(): | |
| """ | |
| Try to extract cookies from browser automatically | |
| Returns path to extracted cookies file if successful, None otherwise | |
| """ | |
| try: | |
| # Try to extract cookies from Chrome first | |
| browsers = ['chrome', 'firefox', 'safari', 'edge'] | |
| for browser in browsers: | |
| try: | |
| cookies_path = os.path.expanduser(f"~/.config/yt-dlp/cookies_{browser}.txt") | |
| # Use yt-dlp to extract cookies | |
| cmd = ['yt-dlp', '--cookies-from-browser', browser, '--print-to-file', 'cookies', cookies_path, '--no-download', 'https://www.youtube.com/'] | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) | |
| if result.returncode == 0 and os.path.exists(cookies_path): | |
| print(f"Successfully extracted cookies from {browser}") | |
| return cookies_path | |
| except (subprocess.TimeoutExpired, FileNotFoundError, Exception): | |
| continue | |
| except Exception: | |
| pass | |
| return None | |
| def find_cookies_file(): | |
| """ | |
| Look for browser cookies file that can be used for YouTube authentication | |
| Returns the path to cookies file if found, None otherwise | |
| """ | |
| possible_paths = [ | |
| os.path.expanduser("~/.config/yt-dlp/cookies.txt"), | |
| os.path.expanduser("~/cookies.txt"), | |
| "cookies.txt", | |
| os.path.join(os.getcwd(), "cookies.txt"), | |
| ] | |
| # First check for existing cookies files | |
| for path in possible_paths: | |
| if os.path.exists(path): | |
| print(f"Found existing cookies file: {path}") | |
| return path | |
| # If no existing cookies found, try to extract from browser | |
| print("No existing cookies found, trying to extract from browser...") | |
| extracted_cookies = try_extract_browser_cookies() | |
| if extracted_cookies: | |
| return extracted_cookies | |
| return None | |
| def print_cookie_help(): | |
| """ | |
| Print helpful instructions for setting up cookies to bypass YouTube bot detection | |
| """ | |
| help_text = """ | |
| 🔧 YouTube Bot Detection Fix - Cookie Setup Instructions: | |
| Method 1 - Automatic (Recommended): | |
| The system will try to automatically extract cookies from your browser. | |
| Method 2 - Manual Cookie Export: | |
| 1. Install a browser extension like "Get cookies.txt LOCALLY" | |
| 2. Go to youtube.com and make sure you're logged in | |
| 3. Use the extension to export cookies as 'cookies.txt' | |
| 4. Save the file in one of these locations: | |
| • ~/cookies.txt (your home directory) | |
| • ~/.config/yt-dlp/cookies.txt | |
| • In the same folder as this script | |
| Method 3 - Command Line (Advanced): | |
| Run: yt-dlp --cookies-from-browser chrome --print-to-file cookies ~/cookies.txt --no-download https://youtube.com | |
| (Replace 'chrome' with your browser: firefox, safari, edge) | |
| Method 4 - Alternative Video Sources: | |
| • Try using a different YouTube video URL | |
| • Consider using videos that don't require authentication | |
| Note: YouTube's bot detection is sometimes temporary - you can also try again later. | |
| """ | |
| print(help_text) | |
| def get_fallback_configs(): | |
| """ | |
| Get multiple configuration strategies to try in sequence | |
| """ | |
| user_agents = get_user_agents() | |
| cookies_file = find_cookies_file() | |
| configs = [] | |
| # Strategy 1: Use cookies if available | |
| if cookies_file: | |
| configs.append({ | |
| 'name': 'with_cookies', | |
| 'opts': { | |
| 'cookiefile': cookies_file, | |
| 'http_headers': { | |
| 'User-Agent': random.choice(user_agents), | |
| }, | |
| 'extractor_args': { | |
| 'youtube': { | |
| 'player_client': ['android', 'web'], | |
| } | |
| }, | |
| } | |
| }) | |
| # Strategy 2: Android client (often works better) | |
| configs.append({ | |
| 'name': 'android_client', | |
| 'opts': { | |
| 'http_headers': { | |
| 'User-Agent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', | |
| }, | |
| 'extractor_args': { | |
| 'youtube': { | |
| 'player_client': ['android'], | |
| } | |
| }, | |
| } | |
| }) | |
| # Strategy 3: Web client with full headers | |
| configs.append({ | |
| 'name': 'web_client_full', | |
| 'opts': { | |
| 'http_headers': { | |
| 'User-Agent': random.choice(user_agents), | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
| 'Accept-Language': 'en-us,en;q=0.5', | |
| 'Accept-Encoding': 'gzip,deflate', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1', | |
| 'Sec-Fetch-Dest': 'document', | |
| 'Sec-Fetch-Mode': 'navigate', | |
| 'Sec-Fetch-Site': 'none', | |
| 'Sec-Fetch-User': '?1', | |
| }, | |
| 'extractor_args': { | |
| 'youtube': { | |
| 'player_client': ['web'], | |
| } | |
| }, | |
| } | |
| }) | |
| # Strategy 4: Basic configuration (fallback) | |
| configs.append({ | |
| 'name': 'basic', | |
| 'opts': { | |
| 'http_headers': { | |
| 'User-Agent': random.choice(user_agents), | |
| }, | |
| } | |
| }) | |
| return configs | |
| def download_youtube_video(url, output_dir="downloads"): | |
| """ | |
| Download a YouTube video from the provided URL using yt-dlp with fallback strategies | |
| Args: | |
| url (str): YouTube video URL | |
| output_dir (str): Directory to save the downloaded video | |
| Returns: | |
| str: Path to the downloaded video file | |
| Raises: | |
| ValueError: If the URL is invalid or video is unavailable | |
| """ | |
| # Create output directory if it doesn't exist | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Set output template for the downloaded file | |
| output_template = os.path.join(output_dir, "%(title)s.%(ext)s") | |
| # Get fallback configurations to try | |
| fallback_configs = get_fallback_configs() | |
| last_error = None | |
| # Try each configuration strategy | |
| for config in fallback_configs: | |
| print(f"Trying download strategy: {config['name']}") | |
| # Base yt-dlp options | |
| ydl_opts = { | |
| 'format': 'best[ext=mp4]/best', # Prefer mp4 format | |
| 'outtmpl': output_template, | |
| 'noplaylist': True, | |
| 'quiet': False, | |
| 'no_warnings': False, | |
| 'ignoreerrors': False, | |
| 'sleep_interval': 1, | |
| 'max_sleep_interval': 5, | |
| } | |
| # Merge strategy-specific options | |
| ydl_opts.update(config['opts']) | |
| try: | |
| # Create yt-dlp object and download the video | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(url, download=True) | |
| # If we get here, download was successful | |
| print(f"Download successful with strategy: {config['name']}") | |
| # Get the downloaded file path | |
| if 'entries' in info: | |
| # Playlist (should not happen with noplaylist=True) | |
| raise ValueError("Playlists are not supported") | |
| # Get video title and extension | |
| title = info.get('title', 'video') | |
| ext = info.get('ext', 'mp4') | |
| # Construct the file path | |
| video_path = os.path.join(output_dir, f"{title}.{ext}") | |
| # Check if file exists | |
| if not os.path.exists(video_path): | |
| # Try with sanitized filename | |
| sanitized_title = ''.join(c for c in title | |
| if c.isalnum() or c in ' ._-') | |
| video_path = os.path.join(output_dir, | |
| f"{sanitized_title}.{ext}") | |
| if not os.path.exists(video_path): | |
| # If still not found, look for any mp4 file in the directory | |
| mp4_files = [ | |
| f for f in os.listdir(output_dir) if f.endswith('.mp4') | |
| ] | |
| if mp4_files: | |
| video_path = os.path.join(output_dir, mp4_files[0]) | |
| else: | |
| raise ValueError("Downloaded file not found") | |
| return video_path | |
| except yt_dlp.utils.DownloadError as e: | |
| last_error = str(e) | |
| print(f"Strategy '{config['name']}' failed: {last_error}") | |
| if "Sign in to confirm you're not a bot" in last_error: | |
| print("Bot detection encountered, trying next strategy...") | |
| continue | |
| elif config == fallback_configs[-1]: # Last strategy failed | |
| break | |
| else: | |
| continue | |
| except Exception as e: | |
| last_error = str(e) | |
| print(f"Strategy '{config['name']}' failed with error: {last_error}") | |
| continue | |
| # If all strategies failed, provide helpful error message | |
| error_msg = f"All download strategies failed. Last error: {last_error}" | |
| if "Sign in to confirm you're not a bot" in (last_error or ""): | |
| print_cookie_help() | |
| error_msg += "\n\n⚠️ YouTube bot detection encountered. See the instructions above to fix this issue." | |
| raise ValueError(error_msg) | |
| def download_youtube_video_simple(url, output_dir="downloads"): | |
| """ | |
| Simplified YouTube video downloader - tries the most reliable methods first | |
| Args: | |
| url (str): YouTube video URL | |
| output_dir (str): Directory to save the downloaded video | |
| Returns: | |
| str: Path to the downloaded video file | |
| Raises: | |
| ValueError: If the URL is invalid or video is unavailable | |
| """ | |
| print(f"📥 Starting download from: {url}") | |
| try: | |
| return download_youtube_video(url, output_dir) | |
| except ValueError as e: | |
| if "Sign in to confirm you're not a bot" in str(e): | |
| print("\n🤖 YouTube bot detection encountered!") | |
| print("💡 Quick fixes to try:") | |
| print(" • Wait a few minutes and try again") | |
| print(" • Try a different YouTube video") | |
| print(" • Use a different network/VPN") | |
| print("\n📋 For persistent issues, run print_cookie_help() for detailed setup instructions") | |
| raise e | |
| def download_pro_reference(url="https://www.youtube.com/shorts/geR666LWSHg", output_dir="downloads"): | |
| """ | |
| Download a professional golfer reference video using improved download methods | |
| Args: | |
| url (str): YouTube video URL of professional golfer (default: provided reference) | |
| output_dir (str): Directory to save the downloaded video | |
| Returns: | |
| str: Path to the downloaded pro reference video file | |
| """ | |
| try: | |
| # Create a specific filename for the pro reference | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Check if pro reference already exists to avoid re-downloading | |
| pro_file_path = os.path.join(output_dir, "pro_reference.mp4") | |
| if os.path.exists(pro_file_path): | |
| print("Pro reference video already exists, using cached version") | |
| return pro_file_path | |
| # Try to download using the improved download function first | |
| try: | |
| print("Downloading pro reference video...") | |
| video_path = download_youtube_video(url, output_dir) | |
| # Rename to pro_reference | |
| ext = os.path.splitext(video_path)[1] | |
| new_path = os.path.join(output_dir, f"pro_reference{ext}") | |
| os.rename(video_path, new_path) | |
| print(f"Pro reference downloaded and saved as: {new_path}") | |
| return new_path | |
| except Exception as download_error: | |
| print(f"Standard download failed: {download_error}") | |
| print("Trying direct download with fixed name...") | |
| # Fallback: try direct download with fixed filename | |
| output_template = os.path.join(output_dir, "pro_reference.%(ext)s") | |
| fallback_configs = get_fallback_configs() | |
| for config in fallback_configs: | |
| print(f"Trying pro reference download with strategy: {config['name']}") | |
| ydl_opts = { | |
| 'format': 'best[ext=mp4]/best', | |
| 'outtmpl': output_template, | |
| 'noplaylist': True, | |
| 'quiet': False, | |
| 'no_warnings': False, | |
| 'ignoreerrors': False, | |
| } | |
| ydl_opts.update(config['opts']) | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.extract_info(url, download=True) | |
| # Check if file exists with mp4 extension | |
| if os.path.exists(pro_file_path): | |
| print(f"Pro reference downloaded successfully with strategy: {config['name']}") | |
| return pro_file_path | |
| else: | |
| # Try other extensions | |
| for ext in ['webm', 'mkv']: | |
| alt_path = os.path.join(output_dir, f"pro_reference.{ext}") | |
| if os.path.exists(alt_path): | |
| print(f"Pro reference downloaded as {ext} format") | |
| return alt_path | |
| except Exception as e: | |
| print(f"Pro reference strategy '{config['name']}' failed: {str(e)}") | |
| continue | |
| raise ValueError("All pro reference download strategies failed") | |
| except Exception as e: | |
| raise ValueError(f"Error downloading pro reference: {str(e)}") | |