Spaces:

renesistech
/

Notes

Build error

App Files Files Community

Notes / download.py

noumanjavaid

Update download.py

1e32418 verified 10 months ago

raw

history blame contribute delete

11.9 kB

	from __future__ import unicode_literals
	import yt_dlp
	import os
	import time
	import shutil
	import logging
	import re
	import tempfile
	from pathlib import Path
	from typing import Optional, Callable, Dict, Any, Union

	# Configuration
	MAX_FILE_SIZE = 40 * 1024 * 1024 # 40 MB
	FILE_TOO_LARGE_MESSAGE = "The audio file exceeds the 40MB size limit. Please try a shorter video clip or select a lower quality option."
	MAX_RETRIES = 3
	RETRY_DELAY = 2 # seconds
	DEFAULT_AUDIO_FORMAT = "mp3"
	DEFAULT_AUDIO_QUALITY = "192" # kbps
	SUPPORTED_FORMATS = ["mp3", "m4a", "wav", "aac", "flac", "opus"]

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger("youtube_downloader")


	class DownloadLogger:
	"""Enhanced logger for yt-dlp with callback support"""

	def __init__(self, progress_callback: Optional[Callable[[str], None]] = None):
	self.progress_callback = progress_callback or (lambda x: None)

	def debug(self, msg: str) -> None:
	if msg.startswith('[download]'):
	# Extract progress information
	if '%' in msg:
	self.progress_callback(msg)
	logger.debug(msg)

	def warning(self, msg: str) -> None:
	logger.warning(msg)

	def error(self, msg: str) -> None:
	logger.error(msg)


	class DownloadError(Exception):
	"""Custom exception for download errors"""
	pass


	def validate_url(url: str) -> bool:
	"""Validate if the URL is a supported video platform URL"""
	video_platforms = [
	r'youtube\.com',
	r'youtu\.be',
	r'vimeo\.com',
	r'dailymotion\.com',
	r'twitch\.tv',
	r'soundcloud\.com',
	r'instagram\.com'
	]

	pattern = '\|'.join([f'({platform})' for platform in video_platforms])
	return bool(re.search(pattern, url, re.IGNORECASE))


	def ensure_download_directory(directory: str) -> str:
	"""Ensure download directory exists, create if it doesn't"""
	path = Path(directory)
	path.mkdir(parents=True, exist_ok=True)
	return str(path.absolute())


	def get_download_options(
	output_dir: str = "./downloads/audio",
	audio_format: str = DEFAULT_AUDIO_FORMAT,
	audio_quality: str = DEFAULT_AUDIO_QUALITY,
	progress_callback: Optional[Callable[[str], None]] = None
	) -> Dict[str, Any]:
	"""
	Get yt-dlp download options with specified parameters

	Args:
	output_dir: Directory to save downloaded files
	audio_format: Audio format (mp3, m4a, wav, etc.)
	audio_quality: Audio quality in kbps
	progress_callback: Function to call with progress updates

	Returns:
	Dictionary of yt-dlp options
	"""
	if audio_format not in SUPPORTED_FORMATS:
	logger.warning(f"Unsupported format '{audio_format}', falling back to {DEFAULT_AUDIO_FORMAT}")
	audio_format = DEFAULT_AUDIO_FORMAT

	# Ensure download directory exists
	output_dir = ensure_download_directory(output_dir)

	return {
	"format": "bestaudio/best",
	"postprocessors": [{
	"key": "FFmpegExtractAudio",
	"preferredcodec": audio_format,
	"preferredquality": audio_quality,
	}],
	"logger": DownloadLogger(progress_callback),
	"outtmpl": f"{output_dir}/%(title)s.%(ext)s",
	"noplaylist": True,
	"quiet": False,
	"no_warnings": False,
	"progress_hooks": [lambda d: download_progress_hook(d, progress_callback)],
	"overwrites": True,
	}


	def download_progress_hook(d: Dict[str, Any], callback: Optional[Callable[[str], None]] = None) -> None:
	"""
	Hook for tracking download progress

	Args:
	d: Download information dictionary
	callback: Function to call with progress updates
	"""
	if callback is None:
	callback = lambda x: None

	if d['status'] == 'downloading':
	progress = d.get('_percent_str', 'unknown progress')
	speed = d.get('_speed_str', 'unknown speed')
	eta = d.get('_eta_str', 'unknown ETA')
	callback(f"Downloading: {progress} at {speed}, ETA: {eta}")

	elif d['status'] == 'finished':
	filename = os.path.basename(d['filename'])
	callback(f"Download complete: {filename}")
	logger.info(f"Download finished: {d['filename']}")


	def estimate_file_size(info: Dict[str, Any]) -> int:
	"""
	Better estimate file size from video info

	Args:
	info: Video information dictionary

	Returns:
	Estimated file size in bytes
	"""
	# Try different fields that might contain size information
	filesize = info.get("filesize")
	if filesize is not None:
	return filesize

	filesize = info.get("filesize_approx")
	if filesize is not None:
	return filesize

	# If we have duration and a bitrate, we can estimate
	duration = info.get("duration")
	bitrate = info.get("abr") or info.get("tbr")

	if duration and bitrate:
	# Estimate using bitrate (kbps) * duration (seconds) / 8 (bits to bytes) * 1024 (to KB)
	return int(bitrate * duration * 128) # 128 = 1024 / 8

	# Default to a reasonable upper limit if we can't determine
	return MAX_FILE_SIZE


	def download_video_audio(
	url: str,
	output_dir: str = "./downloads/audio",
	audio_format: str = DEFAULT_AUDIO_FORMAT,
	audio_quality: str = DEFAULT_AUDIO_QUALITY,
	progress_callback: Optional[Callable[[str], None]] = None
	) -> Optional[str]:
	"""
	Download audio from a video URL

	Args:
	url: URL of the video
	output_dir: Directory to save downloaded files
	audio_format: Audio format (mp3, m4a, wav, etc.)
	audio_quality: Audio quality in kbps
	progress_callback: Function to call with progress updates

	Returns:
	Path to the downloaded audio file or None if download failed

	Raises:
	DownloadError: If download fails after retries
	"""
	if not validate_url(url):
	error_msg = f"Invalid or unsupported URL: {url}"
	logger.error(error_msg)
	raise DownloadError(error_msg)

	retries = 0
	while retries < MAX_RETRIES:
	try:
	if progress_callback:
	progress_callback(f"Starting download (attempt {retries + 1}/{MAX_RETRIES})...")

	ydl_opts = get_download_options(output_dir, audio_format, audio_quality, progress_callback)
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	logger.info(f"Downloading audio from: {url}")

	# Extract info first without downloading
	info = ydl.extract_info(url, download=False)

	# Better file size estimation
	estimated_size = estimate_file_size(info)
	if estimated_size > MAX_FILE_SIZE:
	error_msg = f"{FILE_TOO_LARGE_MESSAGE} (Estimated: {estimated_size / 1024 / 1024:.1f}MB)"
	logger.error(error_msg)
	raise DownloadError(error_msg)

	# Now download
	ydl.download([url])

	# Get the filename - needs some extra handling due to extraction
	filename = ydl.prepare_filename(info)
	base_filename = os.path.splitext(filename)[0]
	final_filename = f"{base_filename}.{audio_format}"

	# Verify file exists and return path
	if os.path.exists(final_filename):
	return final_filename
	else:
	# Try to find the file with a different extension
	for ext in SUPPORTED_FORMATS:
	potential_file = f"{base_filename}.{ext}"
	if os.path.exists(potential_file):
	return potential_file

	# If we get here, something went wrong
	raise FileNotFoundError(f"Could not locate downloaded file for {url}")

	except yt_dlp.utils.DownloadError as e:
	retries += 1
	error_msg = f"Download error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
	logger.error(error_msg)
	if progress_callback:
	progress_callback(error_msg)

	if "HTTP Error 429" in str(e):
	# Rate limiting - wait longer
	time.sleep(RETRY_DELAY * 5)
	elif retries >= MAX_RETRIES:
	raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
	else:
	time.sleep(RETRY_DELAY)
	except Exception as e:
	retries += 1
	error_msg = f"Unexpected error (Attempt {retries}/{MAX_RETRIES}): {str(e)}"
	logger.error(error_msg)
	if progress_callback:
	progress_callback(error_msg)

	if retries >= MAX_RETRIES:
	raise DownloadError(f"Failed to download after {MAX_RETRIES} attempts: {str(e)}")
	time.sleep(RETRY_DELAY)

	return None


	def delete_download(path: str) -> bool:
	"""
	Delete a downloaded file or directory

	Args:
	path: Path to file or directory to delete

	Returns:
	True if deletion was successful, False otherwise
	"""
	try:
	if not path or not os.path.exists(path):
	logger.warning(f"Path does not exist: {path}")
	return False

	if os.path.isfile(path):
	os.remove(path)
	logger.info(f"File deleted: {path}")
	elif os.path.isdir(path):
	shutil.rmtree(path)
	logger.info(f"Directory deleted: {path}")
	else:
	logger.warning(f"Path is neither a file nor a directory: {path}")
	return False
	return True
	except PermissionError:
	logger.error(f"Permission denied: Unable to delete {path}")
	except FileNotFoundError:
	logger.error(f"File or directory not found: {path}")
	except Exception as e:
	logger.error(f"Error deleting {path}: {str(e)}")
	return False


	def trim_audio_file(input_file: str, max_duration_seconds: int = 600) -> str:
	"""
	Trim an audio file to a maximum duration to reduce file size

	Args:
	input_file: Path to input audio file
	max_duration_seconds: Maximum duration in seconds

	Returns:
	Path to trimmed file
	"""
	try:
	import ffmpeg

	# Create output filename
	file_dir = os.path.dirname(input_file)
	file_name, file_ext = os.path.splitext(os.path.basename(input_file))
	output_file = os.path.join(file_dir, f"{file_name}_trimmed{file_ext}")

	# Trim using ffmpeg
	ffmpeg.input(input_file).output(
	output_file, t=str(max_duration_seconds), acodec='copy'
	).run(quiet=True, overwrite_output=True)

	logger.info(f"Trimmed {input_file} to {max_duration_seconds} seconds")
	return output_file
	except Exception as e:
	logger.error(f"Error trimming audio: {str(e)}")
	return input_file # Return original if trimming fails


	def get_video_info(url: str) -> Dict[str, Any]:
	"""
	Get information about a video without downloading

	Args:
	url: URL of the video

	Returns:
	Dictionary of video information
	"""
	try:
	with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
	info = ydl.extract_info(url, download=False)
	return info
	except Exception as e:
	logger.error(f"Error getting video info: {str(e)}")
	raise DownloadError(f"Could not retrieve video information: {str(e)}")