Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import yt_dlp | |
| import os | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import re | |
| import uuid | |
| import json | |
| from datetime import datetime | |
| session_data = {} | |
| class YouTubeDownloader: | |
| def __init__(self): | |
| self.download_dir = tempfile.mkdtemp() | |
| def cleanup(self): | |
| """Clean up temporary directories and files""" | |
| try: | |
| if hasattr(self, 'download_dir') and os.path.exists(self.download_dir): | |
| shutil.rmtree(self.download_dir) | |
| print(f"β Cleaned up temporary directory: {self.download_dir}") | |
| except Exception as e: | |
| print(f"β οΈ Warning: Could not clean up temporary directory: {e}") | |
| def is_valid_youtube_url(self, url): | |
| youtube_regex = re.compile( | |
| r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/' | |
| r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})' | |
| ) | |
| return youtube_regex.match(url) is not None | |
| def analyze_content_type(self, video_info): | |
| """Analyze video content to determine type""" | |
| title = video_info.get('title', '').lower() | |
| description = video_info.get('description', '').lower() | |
| tags = ' '.join(video_info.get('tags', [])).lower() | |
| content_indicators = { | |
| 'educational': ['tutorial', 'how to', 'learn', 'guide', 'explained', 'lesson', 'course', 'tips'], | |
| 'promotional': ['ad', 'promo', 'launch', 'brand', 'sponsored', 'commercial', 'product'], | |
| 'entertainment': ['funny', 'comedy', 'challenge', 'reaction', 'prank', 'meme', 'fun'], | |
| 'review': ['review', 'unboxing', 'comparison', 'vs', 'test', 'rating'], | |
| 'vlog': ['vlog', 'daily', 'routine', 'day in', 'life', 'personal'], | |
| 'music': ['music', 'song', 'cover', 'remix', 'beats', 'audio'], | |
| 'news': ['news', 'breaking', 'update', 'report', 'latest', 'current'] | |
| } | |
| metadata = f"{title} {description} {tags}" | |
| for category, keywords in content_indicators.items(): | |
| if any(keyword in metadata for keyword in keywords): | |
| return category.title() | |
| return "General" | |
| def analyze_emotion(self, video_info): | |
| """Analyze emotional tone of the video""" | |
| title = video_info.get('title', '').lower() | |
| description = video_info.get('description', '').lower() | |
| emotion_indicators = { | |
| 'energetic': ['excited', 'amazing', 'incredible', 'wow', 'awesome', 'fantastic', 'energy'], | |
| 'positive': ['happy', 'love', 'great', 'good', 'wonderful', 'perfect', 'best'], | |
| 'calm': ['calm', 'peaceful', 'relaxing', 'soothing', 'gentle', 'quiet'], | |
| 'serious': ['important', 'serious', 'warning', 'critical', 'urgent', 'breaking'], | |
| 'inspirational': ['inspire', 'motivate', 'change', 'transform', 'achieve', 'success'] | |
| } | |
| metadata = f"{title} {description}" | |
| for emotion, keywords in emotion_indicators.items(): | |
| if any(keyword in metadata for keyword in keywords): | |
| return emotion.title() | |
| return "Neutral" | |
| def analyze_music_style(self, video_info): | |
| """Analyze background music style""" | |
| title = video_info.get('title', '').lower() | |
| description = video_info.get('description', '').lower() | |
| tags = ' '.join(video_info.get('tags', [])).lower() | |
| metadata = f"{title} {description} {tags}" | |
| music_styles = { | |
| 'upbeat': ['upbeat', 'energetic', 'fast', 'dance', 'pop', 'electronic', 'rock'], | |
| 'calm': ['calm', 'soft', 'soothing', 'ambient', 'peaceful', 'meditation', 'acoustic'], | |
| 'cinematic': ['cinematic', 'dramatic', 'epic', 'orchestral', 'soundtrack'], | |
| 'lo-fi': ['lo-fi', 'chill', 'study', 'relaxing beats'], | |
| 'classical': ['classical', 'piano', 'orchestra', 'symphony'] | |
| } | |
| for style, keywords in music_styles.items(): | |
| if any(keyword in metadata for keyword in keywords): | |
| return style.title() | |
| # Check if it's likely a music video | |
| if any(word in metadata for word in ['music', 'song', 'audio', 'beats']): | |
| return "Music Content" | |
| return "Background Music Present" if 'music' in metadata else "Minimal/No Music" | |
| def detect_influencers(self, video_info): | |
| """Enhanced influencer detection""" | |
| # Expanded list of known personalities | |
| known_personalities = { | |
| # Indian Film Industry | |
| "Kartik Aaryan": ["kartik aaryan", "kartik", "aaryan"], | |
| "Deepika Padukone": ["deepika padukone", "deepika"], | |
| "Alia Bhatt": ["alia bhatt", "alia"], | |
| "Ranveer Singh": ["ranveer singh", "ranveer"], | |
| "Kiara Advani": ["kiara advani", "kiara"], | |
| "Janhvi Kapoor": ["janhvi kapoor", "janhvi"], | |
| "Ananya Panday": ["ananya panday", "ananya"], | |
| "Salman Khan": ["salman khan", "salman"], | |
| "Shahrukh Khan": ["shahrukh khan", "srk", "shah rukh"], | |
| "Amitabh Bachchan": ["amitabh bachchan", "amitabh", "big b"], | |
| "Katrina Kaif": ["katrina kaif", "katrina"], | |
| # Sports Personalities | |
| "Virat Kohli": ["virat kohli", "virat"], | |
| "MS Dhoni": ["ms dhoni", "dhoni"], | |
| "Rohit Sharma": ["rohit sharma", "rohit"], | |
| # International Celebrities | |
| "Taylor Swift": ["taylor swift", "taylor"], | |
| "Kylie Jenner": ["kylie jenner", "kylie"], | |
| "Elon Musk": ["elon musk", "elon"], | |
| # YouTubers/Content Creators | |
| "MrBeast": ["mrbeast", "mr beast"], | |
| "PewDiePie": ["pewdiepie", "felix"], | |
| "CarryMinati": ["carryminati", "carry", "ajey nagar"], | |
| "Ashish Chanchlani": ["ashish chanchlani", "ashish"], | |
| "Bhuvan Bam": ["bhuvan bam", "bb ki vines"], | |
| "Prajakta Koli": ["prajakta koli", "mostlysane"], | |
| # Tech Personalities | |
| "Sundar Pichai": ["sundar pichai", "sundar"], | |
| # Beauty/Fashion Influencers | |
| "James Charles": ["james charles"], | |
| "Nikkie Tutorials": ["nikkie tutorials", "nikkietutorials"] | |
| } | |
| # Combine all searchable text | |
| searchable_text = " ".join([ | |
| video_info.get('title', ''), | |
| video_info.get('description', ''), | |
| video_info.get('uploader', ''), | |
| video_info.get('channel', ''), | |
| ' '.join(video_info.get('tags', [])) | |
| ]).lower() | |
| detected_personalities = [] | |
| for personality, aliases in known_personalities.items(): | |
| if any(alias in searchable_text for alias in aliases): | |
| detected_personalities.append(personality) | |
| # Additional indicators | |
| influencer_indicators = [ | |
| "influencer", "creator", "brand ambassador", "celebrity", "star", | |
| "featured", "guest", "interview", "collaboration", "collab" | |
| ] | |
| has_influencer_indicators = any(indicator in searchable_text for indicator in influencer_indicators) | |
| if detected_personalities: | |
| return f"TRUE - Detected: {', '.join(detected_personalities)}" | |
| elif has_influencer_indicators: | |
| return "TRUE - Likely influencer/celebrity present (check video for confirmation)" | |
| else: | |
| return "FALSE - No known personalities detected" | |
| def generate_scene_breakdown(self, video_info): | |
| """Generate enhanced scene-by-scene breakdown""" | |
| duration = video_info.get('duration', 0) | |
| title = video_info.get('title', '').lower() | |
| description = video_info.get('description', '').lower() | |
| if not duration: | |
| return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"] | |
| # Determine segment length based on video duration | |
| if duration <= 30: | |
| segment_length = 2 # 2-second segments for very short videos | |
| elif duration <= 60: | |
| segment_length = 5 # 5-second segments for short videos | |
| elif duration <= 300: # 5 minutes | |
| segment_length = 10 # 10-second segments | |
| elif duration <= 900: # 15 minutes | |
| segment_length = 15 # 15-second segments | |
| else: | |
| segment_length = 30 # 30-second segments for long videos | |
| scenes = [] | |
| # Generate contextual scene descriptions based on video type | |
| video_type = self.analyze_content_type(video_info).lower() | |
| # Scene templates based on video type | |
| scene_templates = { | |
| 'educational': [ | |
| "Introduction and topic overview", | |
| "Main content explanation with examples", | |
| "Detailed demonstration or walkthrough", | |
| "Key points summary and tips", | |
| "Conclusion and call-to-action" | |
| ], | |
| 'promotional': [ | |
| "Brand/product introduction", | |
| "Key features showcase", | |
| "Benefits and advantages highlight", | |
| "Social proof or testimonials", | |
| "Call-to-action and closing" | |
| ], | |
| 'entertainment': [ | |
| "Opening hook and introduction", | |
| "Main entertainment content", | |
| "Peak moment or climax", | |
| "Reaction or commentary", | |
| "Closing and engagement request" | |
| ], | |
| 'review': [ | |
| "Product/service introduction", | |
| "First impressions and unboxing", | |
| "Detailed feature analysis", | |
| "Pros and cons discussion", | |
| "Final verdict and recommendation" | |
| ], | |
| 'vlog': [ | |
| "Daily routine introduction", | |
| "Activity or event coverage", | |
| "Personal commentary and thoughts", | |
| "Interaction with others", | |
| "Day wrap-up and reflection" | |
| ] | |
| } | |
| templates = scene_templates.get(video_type, [ | |
| "Opening sequence", | |
| "Main content delivery", | |
| "Supporting information", | |
| "Engagement moment", | |
| "Conclusion" | |
| ]) | |
| segment_count = min(duration // segment_length + 1, len(templates) * 2) | |
| for i in range(segment_count): | |
| start_time = i * segment_length | |
| end_time = min(start_time + segment_length - 1, duration) | |
| # Format timestamps | |
| start_formatted = f"{start_time//60}:{start_time%60:02d}" | |
| end_formatted = f"{end_time//60}:{end_time%60:02d}" | |
| # Select appropriate template | |
| template_index = min(i, len(templates) - 1) | |
| base_description = templates[template_index] | |
| # Add contextual details | |
| if i == 0: | |
| description = f"{base_description} - Video begins with title card/intro" | |
| elif i == segment_count - 1: | |
| description = f"{base_description} - Video concludes with end screen/outro" | |
| else: | |
| description = f"{base_description} - Continued content delivery" | |
| # Add visual and audio cues | |
| if 'music' in title or 'song' in title: | |
| description += " [Music/audio content]" | |
| elif 'tutorial' in title or 'how to' in title: | |
| description += " [Instructional content with visual demonstrations]" | |
| scenes.append(f"**[{start_formatted}-{end_formatted}]**: {description}") | |
| return scenes | |
| def format_video_info(self, video_info): | |
| """Enhanced video information formatting""" | |
| if not video_info: | |
| return "β No video information available." | |
| # Basic information processing | |
| duration = video_info.get('duration', 0) | |
| duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown" | |
| upload_date = video_info.get('upload_date', '') | |
| formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else upload_date or "Unknown" | |
| def format_number(num): | |
| if num is None or num == 0: | |
| return "0" | |
| if num >= 1_000_000_000: | |
| return f"{num/1_000_000_000:.1f}B" | |
| elif num >= 1_000_000: | |
| return f"{num/1_000_000:.1f}M" | |
| elif num >= 1_000: | |
| return f"{num/1_000:.1f}K" | |
| return str(num) | |
| # Enhanced analysis | |
| scene_descriptions = self.generate_scene_breakdown(video_info) | |
| music_style = self.analyze_music_style(video_info) | |
| influencer_detection = self.detect_influencers(video_info) | |
| video_type = self.analyze_content_type(video_info) | |
| emotion = self.analyze_emotion(video_info) | |
| # Additional metadata | |
| thumbnail_url = video_info.get('thumbnail', '') | |
| language = video_info.get('language', 'Unknown') | |
| availability = video_info.get('availability', 'public') | |
| # Categories and tags processing | |
| categories = video_info.get('categories', []) | |
| tags = video_info.get('tags', []) | |
| # Engagement metrics | |
| view_count = video_info.get('view_count', 0) | |
| like_count = video_info.get('like_count', 0) | |
| comment_count = video_info.get('comment_count', 0) | |
| engagement_rate = 0 | |
| if view_count > 0 and like_count is not None: | |
| engagement_rate = (like_count / view_count) * 100 | |
| # Generate comprehensive report | |
| report = f""" | |
| π¬ COMPREHENSIVE VIDEO ANALYSIS REPORT | |
| {'='*60} | |
| π BASIC INFORMATION | |
| {'β'*30} | |
| πΉ **Title:** {video_info.get('title', 'Unknown')} | |
| πΊ **Channel:** {video_info.get('channel', 'Unknown')} | |
| π€ **Uploader:** {video_info.get('uploader', 'Unknown')} | |
| π **Upload Date:** {formatted_date} | |
| β±οΈ **Duration:** {duration_str} | |
| π **Language:** {language} | |
| π **Availability:** {availability.title()} | |
| π PERFORMANCE METRICS | |
| {'β'*30} | |
| π **Views:** {format_number(view_count)} | |
| π **Likes:** {format_number(like_count)} | |
| π¬ **Comments:** {format_number(comment_count)} | |
| π₯ **Channel Subscribers:** {format_number(video_info.get('channel_followers', 0))} | |
| π **Engagement Rate:** {engagement_rate:.2f}% | |
| π·οΈ CONTENT CLASSIFICATION | |
| {'β'*30} | |
| π **Categories:** {', '.join(categories) if categories else 'None specified'} | |
| π **Primary Tags:** {', '.join(tags[:8]) if tags else 'None specified'} | |
| {('π **Additional Tags:** ' + ', '.join(tags[8:16]) + ('...' if len(tags) > 16 else '')) if len(tags) > 8 else ''} | |
| π VIDEO DESCRIPTION | |
| {'β'*30} | |
| {video_info.get('description', 'No description available')[:800]} | |
| {'...\n[Description truncated - Full description available in original video]' if len(video_info.get('description', '')) > 800 else ''} | |
| π¬ DETAILED SCENE-BY-SCENE BREAKDOWN | |
| {'β'*40} | |
| {chr(10).join(scene_descriptions)} | |
| π΅ **Background Music Style:** {music_style} | |
| π€ **Influencer Present:** {influencer_detection} | |
| π₯ **Video Type:** {video_type} | |
| π **Overall Emotion:** {emotion} | |
| π± TECHNICAL DETAILS | |
| {'β'*30} | |
| π **Video URL:** {video_info.get('webpage_url', 'Unknown')} | |
| πΌοΈ **Thumbnail:** {thumbnail_url if thumbnail_url else 'Not available'} | |
| π± **Video ID:** {video_info.get('id', 'Unknown')} | |
| β‘ QUICK INSIGHTS | |
| {'β'*30} | |
| β’ **Content Quality:** {'High' if view_count > 100000 else 'Medium' if view_count > 10000 else 'Growing'} | |
| β’ **Audience Engagement:** {'High' if engagement_rate > 5 else 'Medium' if engagement_rate > 1 else 'Low'} | |
| β’ **Viral Potential:** {'High' if view_count > 1000000 and engagement_rate > 3 else 'Medium' if view_count > 100000 else 'Standard'} | |
| β’ **Content Freshness:** {'Recent' if upload_date and upload_date >= '20240101' else 'Older Content'} | |
| {'='*60} | |
| π Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| """ | |
| return report.strip() | |
| def get_video_info(self, url, progress=gr.Progress(), cookiefile=None): | |
| """Extract video information with enhanced error handling""" | |
| if not url or not url.strip(): | |
| return None, "β Please enter a YouTube URL" | |
| if not self.is_valid_youtube_url(url): | |
| return None, "β Invalid YouTube URL format" | |
| try: | |
| progress(0.1, desc="Initializing YouTube extractor...") | |
| ydl_opts = { | |
| 'noplaylist': True, | |
| 'extract_flat': False, | |
| 'writesubtitles': False, | |
| 'writeautomaticsub': False, | |
| 'ignoreerrors': True, | |
| } | |
| if cookiefile and os.path.exists(cookiefile): | |
| ydl_opts['cookiefile'] = cookiefile | |
| progress(0.3, desc="Loading cookies for authentication...") | |
| progress(0.5, desc="Extracting video metadata...") | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(url, download=False) | |
| progress(0.9, desc="Processing video information...") | |
| progress(1.0, desc="β Analysis complete!") | |
| return info, "β Video information extracted successfully" | |
| except yt_dlp.DownloadError as e: | |
| return None, f"β YouTube Download Error: {str(e)}" | |
| except Exception as e: | |
| return None, f"β Unexpected Error: {str(e)}" | |
| # Initialize global downloader | |
| downloader = YouTubeDownloader() | |
| def analyze_with_cookies(url, cookies_file, progress=gr.Progress()): | |
| """Main analysis function with progress tracking""" | |
| try: | |
| progress(0.05, desc="Starting analysis...") | |
| cookiefile = None | |
| if cookies_file and os.path.exists(cookies_file): | |
| cookiefile = cookies_file | |
| progress(0.1, desc="Cookies file loaded successfully") | |
| info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile) | |
| if info: | |
| progress(0.95, desc="Generating comprehensive report...") | |
| formatted_info = downloader.format_video_info(info) | |
| progress(1.0, desc="β Complete!") | |
| return formatted_info | |
| else: | |
| return f"β Analysis Failed: {msg}" | |
| except Exception as e: | |
| return f"β System Error: {str(e)}" | |
| def create_interface(): | |
| """Create and configure the Gradio interface""" | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(), | |
| title="π₯ YouTube Video Analyzer Pro", | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .main-header { | |
| text-align: center; | |
| background: linear-gradient(90deg, #ff6b6b, #4ecdc4); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 20px; | |
| } | |
| .description-text { | |
| text-align: center; | |
| font-size: 1.1em; | |
| color: #666; | |
| margin-bottom: 30px; | |
| } | |
| """ | |
| ) as interface: | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| π₯ YouTube Video Analyzer Pro | |
| </div> | |
| <div class="description-text"> | |
| Get comprehensive analysis of any YouTube video with detailed scene breakdowns, | |
| influencer detection, emotion analysis, and performance metrics. | |
| Upload cookies.txt to access age-restricted or private videos. | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| url_input = gr.Textbox( | |
| label="π YouTube URL", | |
| placeholder="Paste your YouTube video URL here...", | |
| lines=1 | |
| ) | |
| with gr.Column(scale=1): | |
| cookies_input = gr.File( | |
| label="πͺ Upload cookies.txt (Optional)", | |
| file_types=[".txt"], | |
| type="filepath" | |
| ) | |
| analyze_btn = gr.Button( | |
| "π Analyze Video", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| output = gr.Textbox( | |
| label="π Comprehensive Analysis Report", | |
| lines=35, | |
| max_lines=50, | |
| show_copy_button=True | |
| ) | |
| analyze_btn.click( | |
| fn=analyze_with_cookies, | |
| inputs=[url_input, cookies_input], | |
| outputs=output, | |
| show_progress=True | |
| ) | |
| # Add examples | |
| gr.Examples( | |
| examples=[ | |
| ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"], | |
| ["https://youtu.be/jNQXAC9IVRw"], | |
| ], | |
| inputs=url_input, | |
| label="π― Try these examples:" | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| import atexit | |
| atexit.register(downloader.cleanup) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |