| import gradio as gr |
| from youtube_transcript_api import YouTubeTranscriptApi |
| from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled |
| import re |
|
|
| def extract_video_id(url): |
| """YouTube URLμμ λΉλμ€ IDλ₯Ό μΆμΆνλ ν¨μ""" |
| video_id = None |
| if 'youtube.com/watch?v=' in url: |
| video_id = url.split('youtube.com/watch?v=')[1][:11] |
| elif 'youtu.be/' in url: |
| video_id = url.split('youtu.be/')[1][:11] |
| return video_id |
|
|
| def get_transcript(url): |
| """YouTube μμμ μ€ν¬λ¦½νΈλ₯Ό μΆμΆνλ ν¨μ""" |
| try: |
| video_id = extract_video_id(url) |
| if not video_id: |
| return "μ¬λ°λ₯Έ YouTube URLμ μ
λ ₯ν΄μ£ΌμΈμ." |
| |
| try: |
| |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko']) |
| except NoTranscriptFound: |
| try: |
| |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) |
| except NoTranscriptFound: |
| try: |
| |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
| transcript = transcript_list.find_generated_transcript(['ko', 'en']) |
| transcript_list = transcript.fetch() |
| except: |
| return "μ΄ μμμλ μλ§μ΄ μκ±°λ μλ§μ κ°μ Έμ¬ μ μμ΅λλ€." |
| |
| |
| full_transcript = "" |
| for transcript in transcript_list: |
| text = transcript['text'] |
| timestamp = transcript['start'] |
| minutes = int(timestamp // 60) |
| seconds = int(timestamp % 60) |
| full_transcript += f"[{minutes:02d}:{seconds:02d}] {text}\n" |
| |
| return full_transcript |
| |
| except TranscriptsDisabled: |
| return "μ΄ μμμ μλ§μ΄ λΉνμ±νλμ΄ μμ΅λλ€." |
| except Exception as e: |
| return f"μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
|
| |
| iface = gr.Interface( |
| fn=get_transcript, |
| inputs=gr.Textbox(label="YouTube URLμ μ
λ ₯νμΈμ"), |
| outputs=gr.Textbox(label="μΆμΆλ μ€ν¬λ¦½νΈ", lines=10), |
| title="YouTube μ€ν¬λ¦½νΈ μΆμΆκΈ°", |
| description="YouTube μμμ URLμ μ
λ ₯νλ©΄ μλμΌλ‘ μ€ν¬λ¦½νΈλ₯Ό μΆμΆν©λλ€. (μΌλ° μλ§ λλ μλ μμ±λ μλ§)" |
| ) |
|
|
| |
| if __name__ == "__main__": |
| iface.launch() |