import os import logging import tempfile from typing import Optional, Tuple import re logger = logging.getLogger(__name__) YT_DOWNLOADER_AVAILABLE = False try: from pytube import YouTube YT_DOWNLOADER_AVAILABLE = True except ImportError: logger.warning("pytube not available. YouTube download functionality will be disabled.") def is_valid_youtube_url(url: str) -> bool: try: if not url or not isinstance(url, str): return False youtube_patterns = [ r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})', r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})', ] for pattern in youtube_patterns: if re.match(pattern, url): return True return False except Exception as e: logger.error(f"Error validating YouTube URL: {e}") return False def extract_video_id(url: str) -> Optional[str]: try: patterns = [ r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})', r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})', ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None except Exception as e: logger.error(f"Error extracting video ID: {e}") return None def download_youtube_video(url: str, output_path: Optional[str] = None) -> Tuple[bool, str, Optional[str]]: if not YT_DOWNLOADER_AVAILABLE: return False, "pytube library not installed. Install it with: pip install pytube", None try: if not is_valid_youtube_url(url): return False, "Invalid YouTube URL format", None video_id = extract_video_id(url) if not video_id: return False, "Could not extract video ID from URL", None yt = YouTube(url) if output_path is None: output_path = tempfile.gettempdir() video_file = yt.streams.filter( progressive=True, file_extension='mp4' ).order_by('resolution').desc().first() if video_file is None: video_file = yt.streams.filter( file_extension='mp4' ).order_by('resolution').desc().first() if video_file is None: return False, "No downloadable video stream found", None filename = f"youtube_{video_id}.mp4" filepath = os.path.join(output_path, filename) video_file.download(output_path=output_path, filename=filename) if not os.path.exists(filepath): return False, "Download failed: file not found after download", None file_size = os.path.getsize(filepath) if file_size == 0: os.remove(filepath) return False, "Download failed: file is empty", None return True, f"Successfully downloaded video ({file_size / (1024*1024):.2f} MB)", filepath except Exception as e: error_msg = f"YouTube download error: {str(e)}" logger.error(error_msg) return False, error_msg, None def get_youtube_info(url: str) -> Tuple[bool, str, dict]: if not YT_DOWNLOADER_AVAILABLE: return False, "pytube library not installed", {} try: if not is_valid_youtube_url(url): return False, "Invalid YouTube URL format", {} yt = YouTube(url) info = { "title": yt.title, "length": yt.length, "views": yt.views, "author": yt.author, "thumbnail_url": yt.thumbnail_url } return True, "Successfully retrieved video info", info except Exception as e: error_msg = f"Error getting YouTube info: {str(e)}" logger.error(error_msg) return False, error_msg, {}