Spaces:
Build error
Build error
| from youtube_transcript_api import ( | |
| YouTubeTranscriptApi, | |
| NoTranscriptFound, | |
| TranscriptsDisabled, | |
| VideoUnavailable | |
| ) | |
| from typing import List, Dict, Optional | |
| import re | |
| import gradio as gr | |
| class YoutubeTranscript: | |
| def get_video_id(url: str) -> str: | |
| """URLからVideo IDを抽出""" | |
| # Use regular expression for more robust ID extraction | |
| match = re.search(r"(?<=v=)[^&#]+|(?<=be/)[^&#]+", url) | |
| return match.group(0) if match else None | |
| def get_transcript(url: str, language: str = 'ja') -> Optional[List[Dict]]: | |
| """ | |
| YouTubeの書き起こしを取得 | |
| Args: | |
| url: YouTube動画のURL | |
| language: 字幕の言語 (ja, en, en-US) | |
| Returns: | |
| 字幕データのリスト。取得失敗時はNone | |
| Raises: | |
| NoTranscriptFound: 指定された言語の字幕が見つからない場合 | |
| """ | |
| try: | |
| video_id = YoutubeTranscript.get_video_id(url) | |
| if video_id is None: | |
| print(f"無効なURLです: {url}") | |
| return None | |
| transcript = YouTubeTranscriptApi.get_transcript( | |
| video_id, | |
| languages=[language] # 指定された言語を使用 | |
| ) | |
| return transcript | |
| except TranscriptsDisabled: | |
| print(f"この動画では字幕が無効になっています: {url}") | |
| return None | |
| except VideoUnavailable: | |
| print(f"動画が利用できません: {url}") | |
| return None | |
| except Exception as e: | |
| # Check if the exception is related to language not found | |
| if "does not have any transcripts" in str(e) or \ | |
| "Could not retrieve a transcript for the video" in str(e): | |
| raise NoTranscriptFound(f"指定した言語 ({language}) の字幕が見つかりません: {url}") from e | |
| else: | |
| print(f"予期せぬエラーが発生しました: {str(e)}") | |
| return None | |
| # The function 'get_transcript_for_gradio' should be outside the class | |
| def get_transcript_for_gradio(url: str, language: str) -> str: | |
| """Gradio UI 用の書き起こし取得関数""" | |
| try: | |
| transcript = YoutubeTranscript.get_transcript(url, language) | |
| if transcript: | |
| # formatted_transcript = "\n".join( | |
| # [f"[{entry['start']:.1f}s] {entry['text']}" for entry in transcript] | |
| # ) | |
| formatted_transcript = "".join( | |
| [f"{entry['text']}" for entry in transcript] | |
| ) | |
| return formatted_transcript,str(len(formatted_transcript)) | |
| else: | |
| return "字幕の取得に失敗しました。","字幕の取得に失敗しました。" | |
| except NoTranscriptFound as e: | |
| return str(e),str(len(e)) # Return the error message from the exception | |
| except Exception as e: | |
| return f"予期せぬエラーが発生しました: {str(e)}",str(len(e)) | |
| iface = gr.Interface( | |
| fn=get_transcript_for_gradio, | |
| inputs=[ | |
| gr.Textbox(lines=1, placeholder="YouTube動画のURLを入力してください",show_copy_button=True), | |
| gr.Radio(["ja", "en", "en-US"], label="言語", value="ja") # ラジオボタンを追加 | |
| ], | |
| outputs=[ | |
| gr.Code(label="字幕", max_lines=10), | |
| gr.Textbox(label="文字数") # Add a Textbox for character count | |
| ], | |
| title="YouTube字幕取得アプリ", | |
| description="YouTube動画のURLを入力すると、字幕を取得して表示します。", | |
| ) | |
| iface.launch() |