Spaces:
Running
Running
| import re | |
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from huggingface_hub import InferenceClient | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| def get_video_id(youtube_url: str) -> str: | |
| """ | |
| Extract the video ID from a given YouTube URL. | |
| """ | |
| # Typical patterns: | |
| # https://www.youtube.com/watch?v=VIDEO_ID | |
| # https://youtu.be/VIDEO_ID | |
| # https://www.youtube.com/shorts/VIDEO_ID | |
| pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)" | |
| match = re.search(pattern, youtube_url) | |
| if not match: | |
| raise ValueError("Could not extract video ID from the provided URL.") | |
| return match.group(1) | |
| def fetch_transcript(youtube_url: str) -> str: | |
| """ | |
| Given a YouTube URL, fetch the transcript and return it as a single string. | |
| """ | |
| try: | |
| video_id = get_video_id(youtube_url) | |
| # Fetch transcript | |
| transcript_data = YouTubeTranscriptApi.get_transcript(video_id) | |
| # Combine the transcript lines | |
| transcript = " ".join([entry["text"] for entry in transcript_data]) | |
| return transcript | |
| except Exception as e: | |
| return f"Error fetching transcript: {str(e)}" | |
| demo = gr.Interface( | |
| fn=fetch_transcript, | |
| inputs=gr.Textbox(label="YouTube URL"), | |
| outputs="text", | |
| title="YouTube Transcript Fetcher", | |
| description="Enter a YouTube link to retrieve its transcript." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |