| | import gradio as gr |
| | from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound |
| | from youtube_transcript_api.formatters import TextFormatter, JSONFormatter, WebVTTFormatter, SRTFormatter |
| | import json |
| |
|
| | def get_transcript(video_id, languages, format_type, translate_to, preserve_formatting): |
| | try: |
| | |
| | if not languages: |
| | languages = ['en'] |
| | else: |
| | languages = languages.split(',') |
| |
|
| | |
| | transcript = YouTubeTranscriptApi.get_transcript( |
| | video_id, |
| | languages=languages, |
| | preserve_formatting=preserve_formatting |
| | ) |
| |
|
| | |
| | if translate_to: |
| | transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
| | base_transcript = transcript_list.find_transcript(languages) |
| | transcript = base_transcript.translate(translate_to).fetch() |
| |
|
| | |
| | formatter_map = { |
| | "Text": TextFormatter(), |
| | "JSON": JSONFormatter(), |
| | "WebVTT": WebVTTFormatter(), |
| | "SRT": SRTFormatter() |
| | } |
| | |
| | formatter = formatter_map[format_type] |
| | formatted_transcript = formatter.format_transcript(transcript) |
| |
|
| | return formatted_transcript |
| |
|
| | except TranscriptsDisabled: |
| | return "Error: Transcripts are disabled for this video" |
| | except NoTranscriptFound: |
| | return "Error: No transcript found for the specified languages" |
| | except Exception as e: |
| | return f"Unexpected error: {str(e)}" |
| |
|
| | def list_available_transcripts(video_id): |
| | try: |
| | transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
| | transcripts_info = [] |
| | |
| | for transcript in transcript_list: |
| | info = { |
| | "Language": transcript.language, |
| | "Code": transcript.language_code, |
| | "Is Generated": transcript.is_generated, |
| | "Is Translatable": transcript.is_translatable, |
| | "Translation Languages": transcript.translation_languages |
| | } |
| | transcripts_info.append(info) |
| | |
| | return json.dumps(transcripts_info, indent=2) |
| | except TranscriptsDisabled: |
| | return "Error: Transcripts are disabled for this video" |
| | except Exception as e: |
| | return f"Error: {str(e)}" |
| |
|
| | |
| | with gr.Blocks(title="YouTube Transcript Fetcher") as demo: |
| | gr.Markdown("# YouTube Transcript Fetcher") |
| | gr.Markdown("Retrieve transcripts from YouTube videos with various formatting options") |
| | |
| | with gr.Tab("Get Transcript"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | video_id_input = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") |
| | languages_input = gr.Textbox( |
| | label="Languages (comma-separated)", |
| | placeholder="e.g., en,de,es", |
| | value="en" |
| | ) |
| | format_dropdown = gr.Dropdown( |
| | choices=["Text", "JSON", "WebVTT", "SRT"], |
| | label="Output Format", |
| | value="Text" |
| | ) |
| | translate_dropdown = gr.Dropdown( |
| | choices=["", "en", "de", "es", "fr", "it"], |
| | label="Translate To (optional)", |
| | value="" |
| | ) |
| | preserve_formatting = gr.Checkbox( |
| | label="Preserve Formatting", |
| | value=False |
| | ) |
| | submit_btn = gr.Button("Get Transcript") |
| | |
| | with gr.Column(): |
| | output = gr.Textbox(label="Transcript", lines=20) |
| | |
| | submit_btn.click( |
| | fn=get_transcript, |
| | inputs=[video_id_input, languages_input, format_dropdown, translate_dropdown, preserve_formatting], |
| | outputs=output |
| | ) |
| | |
| | with gr.Tab("List Available Transcripts"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | list_video_id = gr.Textbox(label="YouTube Video ID", placeholder="e.g., dQw4w9WgXcQ") |
| | list_btn = gr.Button("List Transcripts") |
| | |
| | with gr.Column(): |
| | list_output = gr.Textbox(label="Available Transcripts (JSON)", lines=20) |
| | |
| | list_btn.click( |
| | fn=list_available_transcripts, |
| | inputs=list_video_id, |
| | outputs=list_output |
| | ) |
| | |
| | gr.Markdown(""" |
| | ### Notes |
| | - Enter a valid YouTube video ID (found in the URL) |
| | - Specify languages as comma-separated codes (e.g., "en,de") |
| | - Choose output format from available options |
| | - Optional: Select a language to translate the transcript to |
| | - Preserve formatting keeps HTML tags if present |
| | """) |
| |
|
| | demo.launch() |