Par-ity_Project / app /utils /video_downloader.py
chenemii's picture
Refactor and improve core application modules
34aaec8
"""
YouTube video downloader module using yt-dlp
"""
import os
import random
import subprocess
import yt_dlp
def cleanup_video_file(video_path):
"""
Delete a specific video file after processing
Args:
video_path (str): Path to the video file to delete
Returns:
bool: True if file was deleted successfully, False otherwise
"""
try:
if os.path.exists(video_path):
os.remove(video_path)
print(f"Cleaned up video file: {video_path}")
return True
else:
print(f"Video file not found for cleanup: {video_path}")
return False
except Exception as e:
print(f"Error cleaning up video file {video_path}: {str(e)}")
return False
def cleanup_downloads_directory(output_dir="downloads", keep_annotated=True):
"""
Clean up downloaded videos from the downloads directory
Args:
output_dir (str): Directory containing downloaded videos
keep_annotated (bool): Whether to keep annotated videos (default: True)
Returns:
dict: Cleanup results with files removed and space freed
"""
try:
if not os.path.exists(output_dir):
return {"files_removed": 0, "space_freed_mb": 0}
files_removed = 0
space_freed = 0
for filename in os.listdir(output_dir):
file_path = os.path.join(output_dir, filename)
# Skip if not a file
if not os.path.isfile(file_path):
continue
# Skip annotated videos if keep_annotated is True
if keep_annotated and "_annotated" in filename:
continue
# Skip pro reference videos (they can be reused)
if "pro_reference" in filename:
continue
# Get file size before deletion
try:
file_size = os.path.getsize(file_path)
space_freed += file_size
# Remove the file
os.remove(file_path)
files_removed += 1
print(f"Cleaned up: {filename}")
except Exception as e:
print(f"Error removing {filename}: {str(e)}")
# Convert bytes to MB
space_freed_mb = space_freed / (1024 * 1024)
return {
"files_removed": files_removed,
"space_freed_mb": round(space_freed_mb, 2)
}
except Exception as e:
print(f"Error during cleanup: {str(e)}")
return {"error": str(e)}
def get_user_agents():
"""Get a list of common user agents to rotate between"""
return [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0',
]
def try_extract_browser_cookies():
"""
Try to extract cookies from browser automatically
Returns path to extracted cookies file if successful, None otherwise
"""
try:
# Try to extract cookies from Chrome first
browsers = ['chrome', 'firefox', 'safari', 'edge']
for browser in browsers:
try:
cookies_path = os.path.expanduser(f"~/.config/yt-dlp/cookies_{browser}.txt")
# Use yt-dlp to extract cookies
cmd = ['yt-dlp', '--cookies-from-browser', browser, '--print-to-file', 'cookies', cookies_path, '--no-download', 'https://www.youtube.com/']
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0 and os.path.exists(cookies_path):
print(f"Successfully extracted cookies from {browser}")
return cookies_path
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
continue
except Exception:
pass
return None
def find_cookies_file():
"""
Look for browser cookies file that can be used for YouTube authentication
Returns the path to cookies file if found, None otherwise
"""
possible_paths = [
os.path.expanduser("~/.config/yt-dlp/cookies.txt"),
os.path.expanduser("~/cookies.txt"),
"cookies.txt",
os.path.join(os.getcwd(), "cookies.txt"),
]
# First check for existing cookies files
for path in possible_paths:
if os.path.exists(path):
print(f"Found existing cookies file: {path}")
return path
# If no existing cookies found, try to extract from browser
print("No existing cookies found, trying to extract from browser...")
extracted_cookies = try_extract_browser_cookies()
if extracted_cookies:
return extracted_cookies
return None
def print_cookie_help():
"""
Print helpful instructions for setting up cookies to bypass YouTube bot detection
"""
help_text = """
🔧 YouTube Bot Detection Fix - Cookie Setup Instructions:
Method 1 - Automatic (Recommended):
The system will try to automatically extract cookies from your browser.
Method 2 - Manual Cookie Export:
1. Install a browser extension like "Get cookies.txt LOCALLY"
2. Go to youtube.com and make sure you're logged in
3. Use the extension to export cookies as 'cookies.txt'
4. Save the file in one of these locations:
• ~/cookies.txt (your home directory)
• ~/.config/yt-dlp/cookies.txt
• In the same folder as this script
Method 3 - Command Line (Advanced):
Run: yt-dlp --cookies-from-browser chrome --print-to-file cookies ~/cookies.txt --no-download https://youtube.com
(Replace 'chrome' with your browser: firefox, safari, edge)
Method 4 - Alternative Video Sources:
• Try using a different YouTube video URL
• Consider using videos that don't require authentication
Note: YouTube's bot detection is sometimes temporary - you can also try again later.
"""
print(help_text)
def get_fallback_configs():
"""
Get multiple configuration strategies to try in sequence
"""
user_agents = get_user_agents()
cookies_file = find_cookies_file()
configs = []
# Strategy 1: Use cookies if available
if cookies_file:
configs.append({
'name': 'with_cookies',
'opts': {
'cookiefile': cookies_file,
'http_headers': {
'User-Agent': random.choice(user_agents),
},
'extractor_args': {
'youtube': {
'player_client': ['android', 'web'],
}
},
}
})
# Strategy 2: Android client (often works better)
configs.append({
'name': 'android_client',
'opts': {
'http_headers': {
'User-Agent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
},
'extractor_args': {
'youtube': {
'player_client': ['android'],
}
},
}
})
# Strategy 3: Web client with full headers
configs.append({
'name': 'web_client_full',
'opts': {
'http_headers': {
'User-Agent': random.choice(user_agents),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Accept-Encoding': 'gzip,deflate',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
},
'extractor_args': {
'youtube': {
'player_client': ['web'],
}
},
}
})
# Strategy 4: Basic configuration (fallback)
configs.append({
'name': 'basic',
'opts': {
'http_headers': {
'User-Agent': random.choice(user_agents),
},
}
})
return configs
def download_youtube_video(url, output_dir="downloads"):
"""
Download a YouTube video from the provided URL using yt-dlp with fallback strategies
Args:
url (str): YouTube video URL
output_dir (str): Directory to save the downloaded video
Returns:
str: Path to the downloaded video file
Raises:
ValueError: If the URL is invalid or video is unavailable
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Set output template for the downloaded file
output_template = os.path.join(output_dir, "%(title)s.%(ext)s")
# Get fallback configurations to try
fallback_configs = get_fallback_configs()
last_error = None
# Try each configuration strategy
for config in fallback_configs:
print(f"Trying download strategy: {config['name']}")
# Base yt-dlp options
ydl_opts = {
'format': 'best[ext=mp4]/best', # Prefer mp4 format
'outtmpl': output_template,
'noplaylist': True,
'quiet': False,
'no_warnings': False,
'ignoreerrors': False,
'sleep_interval': 1,
'max_sleep_interval': 5,
}
# Merge strategy-specific options
ydl_opts.update(config['opts'])
try:
# Create yt-dlp object and download the video
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
# If we get here, download was successful
print(f"Download successful with strategy: {config['name']}")
# Get the downloaded file path
if 'entries' in info:
# Playlist (should not happen with noplaylist=True)
raise ValueError("Playlists are not supported")
# Get video title and extension
title = info.get('title', 'video')
ext = info.get('ext', 'mp4')
# Construct the file path
video_path = os.path.join(output_dir, f"{title}.{ext}")
# Check if file exists
if not os.path.exists(video_path):
# Try with sanitized filename
sanitized_title = ''.join(c for c in title
if c.isalnum() or c in ' ._-')
video_path = os.path.join(output_dir,
f"{sanitized_title}.{ext}")
if not os.path.exists(video_path):
# If still not found, look for any mp4 file in the directory
mp4_files = [
f for f in os.listdir(output_dir) if f.endswith('.mp4')
]
if mp4_files:
video_path = os.path.join(output_dir, mp4_files[0])
else:
raise ValueError("Downloaded file not found")
return video_path
except yt_dlp.utils.DownloadError as e:
last_error = str(e)
print(f"Strategy '{config['name']}' failed: {last_error}")
if "Sign in to confirm you're not a bot" in last_error:
print("Bot detection encountered, trying next strategy...")
continue
elif config == fallback_configs[-1]: # Last strategy failed
break
else:
continue
except Exception as e:
last_error = str(e)
print(f"Strategy '{config['name']}' failed with error: {last_error}")
continue
# If all strategies failed, provide helpful error message
error_msg = f"All download strategies failed. Last error: {last_error}"
if "Sign in to confirm you're not a bot" in (last_error or ""):
print_cookie_help()
error_msg += "\n\n⚠️ YouTube bot detection encountered. See the instructions above to fix this issue."
raise ValueError(error_msg)
def download_youtube_video_simple(url, output_dir="downloads"):
"""
Simplified YouTube video downloader - tries the most reliable methods first
Args:
url (str): YouTube video URL
output_dir (str): Directory to save the downloaded video
Returns:
str: Path to the downloaded video file
Raises:
ValueError: If the URL is invalid or video is unavailable
"""
print(f"📥 Starting download from: {url}")
try:
return download_youtube_video(url, output_dir)
except ValueError as e:
if "Sign in to confirm you're not a bot" in str(e):
print("\n🤖 YouTube bot detection encountered!")
print("💡 Quick fixes to try:")
print(" • Wait a few minutes and try again")
print(" • Try a different YouTube video")
print(" • Use a different network/VPN")
print("\n📋 For persistent issues, run print_cookie_help() for detailed setup instructions")
raise e
def download_pro_reference(url="https://www.youtube.com/shorts/geR666LWSHg", output_dir="downloads"):
"""
Download a professional golfer reference video using improved download methods
Args:
url (str): YouTube video URL of professional golfer (default: provided reference)
output_dir (str): Directory to save the downloaded video
Returns:
str: Path to the downloaded pro reference video file
"""
try:
# Create a specific filename for the pro reference
os.makedirs(output_dir, exist_ok=True)
# Check if pro reference already exists to avoid re-downloading
pro_file_path = os.path.join(output_dir, "pro_reference.mp4")
if os.path.exists(pro_file_path):
print("Pro reference video already exists, using cached version")
return pro_file_path
# Try to download using the improved download function first
try:
print("Downloading pro reference video...")
video_path = download_youtube_video(url, output_dir)
# Rename to pro_reference
ext = os.path.splitext(video_path)[1]
new_path = os.path.join(output_dir, f"pro_reference{ext}")
os.rename(video_path, new_path)
print(f"Pro reference downloaded and saved as: {new_path}")
return new_path
except Exception as download_error:
print(f"Standard download failed: {download_error}")
print("Trying direct download with fixed name...")
# Fallback: try direct download with fixed filename
output_template = os.path.join(output_dir, "pro_reference.%(ext)s")
fallback_configs = get_fallback_configs()
for config in fallback_configs:
print(f"Trying pro reference download with strategy: {config['name']}")
ydl_opts = {
'format': 'best[ext=mp4]/best',
'outtmpl': output_template,
'noplaylist': True,
'quiet': False,
'no_warnings': False,
'ignoreerrors': False,
}
ydl_opts.update(config['opts'])
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(url, download=True)
# Check if file exists with mp4 extension
if os.path.exists(pro_file_path):
print(f"Pro reference downloaded successfully with strategy: {config['name']}")
return pro_file_path
else:
# Try other extensions
for ext in ['webm', 'mkv']:
alt_path = os.path.join(output_dir, f"pro_reference.{ext}")
if os.path.exists(alt_path):
print(f"Pro reference downloaded as {ext} format")
return alt_path
except Exception as e:
print(f"Pro reference strategy '{config['name']}' failed: {str(e)}")
continue
raise ValueError("All pro reference download strategies failed")
except Exception as e:
raise ValueError(f"Error downloading pro reference: {str(e)}")