Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import tempfile | |
| from typing import Optional, Tuple | |
| import re | |
| logger = logging.getLogger(__name__) | |
| YT_DOWNLOADER_AVAILABLE = False | |
| try: | |
| from pytube import YouTube | |
| YT_DOWNLOADER_AVAILABLE = True | |
| except ImportError: | |
| logger.warning("pytube not available. YouTube download functionality will be disabled.") | |
| def is_valid_youtube_url(url: str) -> bool: | |
| try: | |
| if not url or not isinstance(url, str): | |
| return False | |
| youtube_patterns = [ | |
| r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})', | |
| r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', | |
| r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})', | |
| ] | |
| for pattern in youtube_patterns: | |
| if re.match(pattern, url): | |
| return True | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error validating YouTube URL: {e}") | |
| return False | |
| def extract_video_id(url: str) -> Optional[str]: | |
| try: | |
| patterns = [ | |
| r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})', | |
| r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', | |
| r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})', | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error extracting video ID: {e}") | |
| return None | |
| def download_youtube_video(url: str, output_path: Optional[str] = None) -> Tuple[bool, str, Optional[str]]: | |
| if not YT_DOWNLOADER_AVAILABLE: | |
| return False, "pytube library not installed. Install it with: pip install pytube", None | |
| try: | |
| if not is_valid_youtube_url(url): | |
| return False, "Invalid YouTube URL format", None | |
| video_id = extract_video_id(url) | |
| if not video_id: | |
| return False, "Could not extract video ID from URL", None | |
| yt = YouTube(url) | |
| if output_path is None: | |
| output_path = tempfile.gettempdir() | |
| video_file = yt.streams.filter( | |
| progressive=True, | |
| file_extension='mp4' | |
| ).order_by('resolution').desc().first() | |
| if video_file is None: | |
| video_file = yt.streams.filter( | |
| file_extension='mp4' | |
| ).order_by('resolution').desc().first() | |
| if video_file is None: | |
| return False, "No downloadable video stream found", None | |
| filename = f"youtube_{video_id}.mp4" | |
| filepath = os.path.join(output_path, filename) | |
| video_file.download(output_path=output_path, filename=filename) | |
| if not os.path.exists(filepath): | |
| return False, "Download failed: file not found after download", None | |
| file_size = os.path.getsize(filepath) | |
| if file_size == 0: | |
| os.remove(filepath) | |
| return False, "Download failed: file is empty", None | |
| return True, f"Successfully downloaded video ({file_size / (1024*1024):.2f} MB)", filepath | |
| except Exception as e: | |
| error_msg = f"YouTube download error: {str(e)}" | |
| logger.error(error_msg) | |
| return False, error_msg, None | |
| def get_youtube_info(url: str) -> Tuple[bool, str, dict]: | |
| if not YT_DOWNLOADER_AVAILABLE: | |
| return False, "pytube library not installed", {} | |
| try: | |
| if not is_valid_youtube_url(url): | |
| return False, "Invalid YouTube URL format", {} | |
| yt = YouTube(url) | |
| info = { | |
| "title": yt.title, | |
| "length": yt.length, | |
| "views": yt.views, | |
| "author": yt.author, | |
| "thumbnail_url": yt.thumbnail_url | |
| } | |
| return True, "Successfully retrieved video info", info | |
| except Exception as e: | |
| error_msg = f"Error getting YouTube info: {str(e)}" | |
| logger.error(error_msg) | |
| return False, error_msg, {} | |