Spaces:
Running
Running
File size: 4,201 Bytes
ad1bda5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import logging
import tempfile
from typing import Optional, Tuple
import re
logger = logging.getLogger(__name__)
YT_DOWNLOADER_AVAILABLE = False
try:
from pytube import YouTube
YT_DOWNLOADER_AVAILABLE = True
except ImportError:
logger.warning("pytube not available. YouTube download functionality will be disabled.")
def is_valid_youtube_url(url: str) -> bool:
try:
if not url or not isinstance(url, str):
return False
youtube_patterns = [
r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})',
r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})',
]
for pattern in youtube_patterns:
if re.match(pattern, url):
return True
return False
except Exception as e:
logger.error(f"Error validating YouTube URL: {e}")
return False
def extract_video_id(url: str) -> Optional[str]:
try:
patterns = [
r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})',
r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})',
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
except Exception as e:
logger.error(f"Error extracting video ID: {e}")
return None
def download_youtube_video(url: str, output_path: Optional[str] = None) -> Tuple[bool, str, Optional[str]]:
if not YT_DOWNLOADER_AVAILABLE:
return False, "pytube library not installed. Install it with: pip install pytube", None
try:
if not is_valid_youtube_url(url):
return False, "Invalid YouTube URL format", None
video_id = extract_video_id(url)
if not video_id:
return False, "Could not extract video ID from URL", None
yt = YouTube(url)
if output_path is None:
output_path = tempfile.gettempdir()
video_file = yt.streams.filter(
progressive=True,
file_extension='mp4'
).order_by('resolution').desc().first()
if video_file is None:
video_file = yt.streams.filter(
file_extension='mp4'
).order_by('resolution').desc().first()
if video_file is None:
return False, "No downloadable video stream found", None
filename = f"youtube_{video_id}.mp4"
filepath = os.path.join(output_path, filename)
video_file.download(output_path=output_path, filename=filename)
if not os.path.exists(filepath):
return False, "Download failed: file not found after download", None
file_size = os.path.getsize(filepath)
if file_size == 0:
os.remove(filepath)
return False, "Download failed: file is empty", None
return True, f"Successfully downloaded video ({file_size / (1024*1024):.2f} MB)", filepath
except Exception as e:
error_msg = f"YouTube download error: {str(e)}"
logger.error(error_msg)
return False, error_msg, None
def get_youtube_info(url: str) -> Tuple[bool, str, dict]:
if not YT_DOWNLOADER_AVAILABLE:
return False, "pytube library not installed", {}
try:
if not is_valid_youtube_url(url):
return False, "Invalid YouTube URL format", {}
yt = YouTube(url)
info = {
"title": yt.title,
"length": yt.length,
"views": yt.views,
"author": yt.author,
"thumbnail_url": yt.thumbnail_url
}
return True, "Successfully retrieved video info", info
except Exception as e:
error_msg = f"Error getting YouTube info: {str(e)}"
logger.error(error_msg)
return False, error_msg, {}
|