File size: 2,667 Bytes
aea337a
 
 
 
a44d34c
aea337a
 
4793736
 
 
 
 
 
 
 
 
aea337a
 
 
 
 
 
 
 
f8a91e9
aea337a
 
f8a91e9
 
 
 
 
aea337a
 
 
 
 
 
 
 
 
 
 
f8a91e9
 
aea337a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from urllib.parse import parse_qs, urlparse
from llama_index.core.tools import FunctionTool
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable

#loader = YoutubeTranscriptReader()
yt_ap = YouTubeTranscriptApi()


def extract_video_id(url: str) -> str:
    """
    Extracts the video ID from a YouTube URL.
    Args:
        url (str): The full YouTube video URL.
    Returns:
        str: The extracted video ID or raises ValueError.
    """
    parsed = urlparse(url)
    if parsed.hostname in {"www.youtube.com", "youtube.com"}:
        qs = parse_qs(parsed.query)
        if "v" in qs:
            return qs["v"][0]
    # fallback for youtu.be or raw IDs
    return parsed.path.lstrip("/")


def fetch_youtube_transcript(video_url: str) -> str:
    """
    Fetches the transcript text for a given YouTube video.
    Args:
        url (str): The YouTube video URL.
    Returns:
        str: Combined transcript text or an error message.
    """
    video_id = extract_video_id(video_url)

    try:
        # ✅ call on the class, NOT an instance
        transcript_data = yt_ap.fetch(
            video_id=video_id,
            languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges 
        )

        #FROM TRANSCRIPT DATA, YOU CAN CREATE A OBJECT OF TRANSCRIPT SNIPET AND TIME
        arr = [ {"text": snippet.text} for snippet in transcript_data]
        return " ".join(f"{entry['text']}" for entry in arr)
    except Exception as e:
        return f"Error fetching video details: {str(e)}"
    
def fetch_youtube_transcript_snippets(video_url: str) -> str:
    """
    Fetch YouTube transcript snippets for the given URL.
    It gets the start-time, end-time and duration of each snippet.
    """
    video_id = extract_video_id(video_url)

    try:
        # ✅ call on the class, NOT an instance
        transcript_data = yt_ap.fetch(
            video_id=video_id,
            languages=["en"], #You can add as many languages, use yt_ap.list(video_id) function to get the langauges 
        )
        arr = [
            {"text": snippet.text, "duration": snippet.duration, "start": snippet.start}
            for snippet in transcript_data
        ]
        return " ".join(f"Text: {entry['text']} Duration: {entry['duration']} StartTime: {entry['start']} <End>" for entry in arr)
    except Exception as e:
        return f"Error fetching video details: {str(e)}"

youtube_transcript_tool = FunctionTool.from_defaults(fetch_youtube_transcript)
youtube_transcript_snippet_tool = FunctionTool.from_defaults(fetch_youtube_transcript_snippets)