from urllib.parse import parse_qs, urlparse from llama_index.core.tools import FunctionTool from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable #loader = YoutubeTranscriptReader() yt_ap = YouTubeTranscriptApi() def extract_video_id(url: str) -> str: """ Extracts the video ID from a YouTube URL. Args: url (str): The full YouTube video URL. Returns: str: The extracted video ID or raises ValueError. """ parsed = urlparse(url) if parsed.hostname in {"www.youtube.com", "youtube.com"}: qs = parse_qs(parsed.query) if "v" in qs: return qs["v"][0] # fallback for youtu.be or raw IDs return parsed.path.lstrip("/") def fetch_youtube_transcript(video_url: str) -> str: """ Fetches the transcript text for a given YouTube video. Args: url (str): The YouTube video URL. Returns: str: Combined transcript text or an error message. """ video_id = extract_video_id(video_url) try: # ✅ call on the class, NOT an instance transcript_data = yt_ap.fetch( video_id=video_id, languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges ) #FROM TRANSCRIPT DATA, YOU CAN CREATE A OBJECT OF TRANSCRIPT SNIPET AND TIME arr = [ {"text": snippet.text} for snippet in transcript_data] return " ".join(f"{entry['text']}" for entry in arr) except Exception as e: return f"Error fetching video details: {str(e)}" def fetch_youtube_transcript_snippets(video_url: str) -> str: """ Fetch YouTube transcript snippets for the given URL. It gets the start-time, end-time and duration of each snippet. """ video_id = extract_video_id(video_url) try: # ✅ call on the class, NOT an instance transcript_data = yt_ap.fetch( video_id=video_id, languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges ) arr = [ {"text": snippet.text, "duration": snippet.duration, "start": snippet.start} for snippet in transcript_data ] return " ".join(f"Text: {entry['text']} Duration: {entry['duration']} StartTime: {entry['start']} " for entry in arr) except Exception as e: return f"Error fetching video details: {str(e)}" youtube_transcript_tool = FunctionTool.from_defaults(fetch_youtube_transcript) youtube_transcript_snippet_tool = FunctionTool.from_defaults(fetch_youtube_transcript_snippets)