import re import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def get_video_id(youtube_url: str) -> str: """ Extract the video ID from a given YouTube URL. """ # Typical patterns: # https://www.youtube.com/watch?v=VIDEO_ID # https://youtu.be/VIDEO_ID # https://www.youtube.com/shorts/VIDEO_ID pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)" match = re.search(pattern, youtube_url) if not match: raise ValueError("Could not extract video ID from the provided URL.") return match.group(1) def fetch_transcript(youtube_url: str) -> str: """ Given a YouTube URL, fetch the transcript and return it as a single string. """ try: video_id = get_video_id(youtube_url) # Fetch transcript transcript_data = YouTubeTranscriptApi.get_transcript(video_id) # Combine the transcript lines transcript = " ".join([entry["text"] for entry in transcript_data]) return transcript except Exception as e: return f"Error fetching transcript: {str(e)}" demo = gr.Interface( fn=fetch_transcript, inputs=gr.Textbox(label="YouTube URL"), outputs="text", title="YouTube Transcript Fetcher", description="Enter a YouTube link to retrieve its transcript." ) if __name__ == "__main__": demo.launch()