Spaces:
Build error
Build error
| from youtube_transcript_api import ( | |
| YouTubeTranscriptApi, | |
| NoTranscriptFound, | |
| TranscriptsDisabled, | |
| VideoUnavailable | |
| ) | |
| from typing import List, Dict, Optional | |
| import re | |
| import gradio as gr | |
| class YoutubeTranscript: | |
| def get_video_id(url: str) -> str: | |
| """URLからVideo IDを抽出""" | |
| # Use regular expression for more robust ID extraction | |
| match = re.search(r"(?<=v=)[^&#]+|(?<=be/)[^&#]+", url) | |
| return match.group(0) if match else None | |
| def get_transcript(url: str, language: str = 'ja') -> Optional[List[Dict]]: | |
| """ | |
| YouTubeの書き起こしを取得 | |
| Args: | |
| url: YouTube動画のURL | |
| language: 字幕の言語 (ja, en, en-US) | |
| Returns: | |
| 字幕データのリスト。取得失敗時はNone | |
| Raises: | |
| NoTranscriptFound: 指定された言語の字幕が見つからない場合 | |
| """ | |
| try: | |
| video_id = YoutubeTranscript.get_video_id(url) | |
| if video_id is None: | |
| print(f"無効なURLです: {url}") | |
| return None | |
| transcript = YouTubeTranscriptApi.get_transcript( | |
| video_id, | |
| languages=[language] # 指定された言語を使用 | |
| ) | |
| return transcript | |
| except TranscriptsDisabled: | |
| print(f"この動画では字幕が無効になっています: {url}") | |
| return None | |
| except VideoUnavailable: | |
| print(f"動画が利用できません: {url}") | |
| return None | |
| except Exception as e: | |
| # Check if the exception is related to language not found | |
| if "does not have any transcripts" in str(e) or \ | |
| "Could not retrieve a transcript for the video" in str(e): | |
| raise NoTranscriptFound(f"指定した言語 ({language}) の字幕が見つかりません: {url}") from e | |
| else: | |
| print(f"予期せぬエラーが発生しました: {str(e)}") | |
| return None | |
| def get_transcript_for_gradio(url: str, language: str) -> str: | |
| try: | |
| transcript = YoutubeTranscript.get_transcript(url, language) | |
| if transcript: | |
| formatted_transcript = "".join( | |
| [f"{entry['text']}" for entry in transcript] | |
| ) | |
| return formatted_transcript, str(len(formatted_transcript)) | |
| else: | |
| return "字幕の取得に失敗しました。", "字幕の取得に失敗しました。" | |
| except NoTranscriptFound as e: | |
| return str(e), str(len(str(e))) | |
| except Exception as e: | |
| return f"予期せぬエラーが発生しました: {str(e)}", str(len(str(e))) | |
| with gr.Blocks(title="YouTube字幕取得アプリ") as demo: | |
| gr.Markdown("# YouTube字幕取得アプリ") | |
| gr.Markdown("YouTube動画のURLを入力すると、字幕を取得して表示します。") | |
| with gr.Row(): | |
| url_input = gr.Textbox( | |
| lines=1, | |
| placeholder="YouTube動画のURLを入力してください", | |
| show_copy_button=True, | |
| label="YouTube URL" | |
| ) | |
| language_input = gr.Radio( | |
| ["ja", "en", "en-US"], | |
| label="言語", | |
| value="ja" | |
| ) | |
| submit_button = gr.Button("字幕を取得") | |
| with gr.Column(): | |
| transcript_output = gr.Code(label="字幕", max_lines=10) | |
| char_count_output = gr.Textbox(label="文字数") | |
| submit_button.click( | |
| fn=get_transcript_for_gradio, | |
| inputs=[url_input, language_input], | |
| outputs=[transcript_output, char_count_output] | |
| ) | |
| demo.launch() |