File size: 1,455 Bytes
bcae73d
cf16df3
bcae73d
cf16df3
 
 
 
bcae73d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf16df3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import re
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from huggingface_hub import InferenceClient

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def get_video_id(youtube_url: str) -> str:
    """
    Extract the video ID from a given YouTube URL.
    """
    # Typical patterns:
    #   https://www.youtube.com/watch?v=VIDEO_ID
    #   https://youtu.be/VIDEO_ID
    #   https://www.youtube.com/shorts/VIDEO_ID
    pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)"
    match = re.search(pattern, youtube_url)
    if not match:
        raise ValueError("Could not extract video ID from the provided URL.")
    return match.group(1)

def fetch_transcript(youtube_url: str) -> str:
    """
    Given a YouTube URL, fetch the transcript and return it as a single string.
    """
    try:
        video_id = get_video_id(youtube_url)
        # Fetch transcript
        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
        # Combine the transcript lines
        transcript = " ".join([entry["text"] for entry in transcript_data])
        return transcript
    except Exception as e:
        return f"Error fetching transcript: {str(e)}"

demo = gr.Interface(
    fn=fetch_transcript,
    inputs=gr.Textbox(label="YouTube URL"),
    outputs="text",
    title="YouTube Transcript Fetcher",
    description="Enter a YouTube link to retrieve its transcript."
)

if __name__ == "__main__":
    demo.launch()