# tools/youtube_tool.py
"""
YouTube Transcript Extraction Tool
Extracts transcripts from YouTube videos for Video Brain mode.
"""

import re
from typing import Dict, Optional

# Try to import youtube_transcript_api, but handle if it fails
try:
    from youtube_transcript_api import YouTubeTranscriptApi
    from youtube_transcript_api._errors import (
        TranscriptsDisabled,
        NoTranscriptFound,
        VideoUnavailable
    )
    YOUTUBE_API_AVAILABLE = True
except ImportError:
    YOUTUBE_API_AVAILABLE = False
    print("⚠️ youtube-transcript-api not available")


class YouTubeTool:
    """Extract transcripts and metadata from YouTube videos."""
    
    def extract_video_id(self, url: str) -> Optional[str]:
        """Extract video ID from various YouTube URL formats."""
        patterns = [
            r'(?:v=|/v/|youtu\.be/|/embed/)([a-zA-Z0-9_-]{11})',
            r'([a-zA-Z0-9_-]{11})'
        ]
        
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                return match.group(1)
        return None
    
    def get_transcript(self, video_url: str) -> Dict:
        """
        Get transcript from a YouTube video.
        
        Returns:
            Dict with keys:
            - success: bool
            - transcript: str (full transcript text)
            - segments: list of {text, start, duration}
            - video_id: str
            - error: str (if failed)
        """
        video_id = self.extract_video_id(video_url)
        
        if not video_id:
            return {
                "success": False,
                "error": "Could not extract video ID from URL",
                "transcript": "",
                "segments": [],
                "video_id": None
            }
        
        if not YOUTUBE_API_AVAILABLE:
            return {
                "success": False,
                "error": "YouTube transcript API not available",
                "transcript": "",
                "segments": [],
                "video_id": video_id
            }
        
        try:
            # Try to get transcript (auto-generated or manual)
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
            
            # Try to find English transcript first
            transcript = None
            try:
                transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
            except:
                # Fall back to any available transcript, translated to English
                try:
                    for t in transcript_list:
                        transcript = t.translate('en')
                        break
                except:
                    # Just get any transcript
                    for t in transcript_list:
                        transcript = t
                        break
            
            if transcript:
                segments = transcript.fetch()
                
                # Build full transcript text with timestamps
                full_text_parts = []
                for seg in segments:
                    start_time = int(seg['start'])
                    minutes = start_time // 60
                    seconds = start_time % 60
                    timestamp = f"[{minutes}:{seconds:02d}]"
                    full_text_parts.append(f"{timestamp} {seg['text']}")
                
                full_transcript = "\n".join(full_text_parts)
                
                # Also create a clean version without timestamps
                clean_text = " ".join([seg['text'] for seg in segments])
                
                return {
                    "success": True,
                    "transcript": full_transcript,
                    "clean_transcript": clean_text,
                    "segments": segments,
                    "video_id": video_id,
                    "error": None
                }
            else:
                return {
                    "success": False,
                    "error": "No transcript available for this video",
                    "transcript": "",
                    "segments": [],
                    "video_id": video_id
                }
                
        except TranscriptsDisabled if YOUTUBE_API_AVAILABLE else Exception:
            return {
                "success": False,
                "error": "Transcripts are disabled for this video",
                "transcript": "",
                "segments": [],
                "video_id": video_id
            }
        except NoTranscriptFound if YOUTUBE_API_AVAILABLE else Exception:
            return {
                "success": False,
                "error": "No transcript found for this video",
                "transcript": "",
                "segments": [],
                "video_id": video_id
            }
        except VideoUnavailable if YOUTUBE_API_AVAILABLE else Exception:
            return {
                "success": False,
                "error": "Video is unavailable",
                "transcript": "",
                "segments": [],
                "video_id": video_id
            }
        except Exception as e:
            error_msg = str(e)
            # Check for network errors
            if "NameResolutionError" in error_msg or "Failed to resolve" in error_msg:
                return {
                    "success": False,
                    "error": "Network error: Cannot connect to YouTube (DNS resolution failed)",
                    "transcript": "",
                    "segments": [],
                    "video_id": video_id,
                    "network_error": True
                }
            return {
                "success": False,
                "error": f"Error fetching transcript: {error_msg[:200]}",
                "transcript": "",
                "segments": [],
                "video_id": video_id
            }
    
    def get_video_info(self, video_url: str) -> Dict:
        """Get basic video info by searching."""
        video_id = self.extract_video_id(video_url)
        return {
            "video_id": video_id,
            "url": video_url,
            "embed_url": f"https://www.youtube.com/embed/{video_id}" if video_id else None
        }