| import gradio as gr |
| import whisper |
| import yt_dlp |
| import os |
| import tempfile |
| import time |
|
|
| |
| model = None |
|
|
| def load_model(model_size="base"): |
| global model |
| if model is None: |
| print(f"Loading Whisper {model_size} model...") |
| model = whisper.load_model(model_size) |
| print("Model loaded!") |
| return model |
|
|
| def extract_audio_from_youtube(url, progress=gr.Progress()): |
| """YouTube URL์์ ์ค๋์ค ์ถ์ถ""" |
| progress(0.1, desc="YouTube ์ค๋์ค ๋ค์ด๋ก๋ ์ค...") |
| |
| temp_dir = tempfile.mkdtemp() |
| output_path = os.path.join(temp_dir, "audio") |
| |
| ydl_opts = { |
| 'format': 'bestaudio/best', |
| 'postprocessors': [{ |
| 'key': 'FFmpegExtractAudio', |
| 'preferredcodec': 'mp3', |
| 'preferredquality': '192', |
| }], |
| 'outtmpl': output_path, |
| 'quiet': True, |
| 'no_warnings': True, |
| } |
| |
| try: |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| info = ydl.extract_info(url, download=True) |
| title = info.get('title', 'Unknown') |
| duration = info.get('duration', 0) |
| except Exception as e: |
| raise gr.Error(f"YouTube ๋ค์ด๋ก๋ ์คํจ: {str(e)}") |
| |
| audio_file = output_path + ".mp3" |
| if not os.path.exists(audio_file): |
| |
| for ext in ['.mp3', '.m4a', '.wav', '.webm', '.opus']: |
| candidate = output_path + ext |
| if os.path.exists(candidate): |
| audio_file = candidate |
| break |
| |
| if not os.path.exists(audio_file): |
| raise gr.Error("์ค๋์ค ํ์ผ ์ถ์ถ ์คํจ") |
| |
| return audio_file, title, duration |
|
|
| def format_timestamp(seconds): |
| """์ด๋ฅผ HH:MM:SS ํ์์ผ๋ก ๋ณํ""" |
| h = int(seconds // 3600) |
| m = int((seconds % 3600) // 60) |
| s = int(seconds % 60) |
| if h > 0: |
| return f"{h:02d}:{m:02d}:{s:02d}" |
| return f"{m:02d}:{s:02d}" |
|
|
| def transcribe_youtube(url, model_size, language, output_format, progress=gr.Progress()): |
| """๋ฉ์ธ ์ฒ๋ฆฌ ํจ์: YouTube URL โ ํ
์คํธ""" |
| if not url or not url.strip(): |
| raise gr.Error("YouTube URL์ ์
๋ ฅํด์ฃผ์ธ์.") |
| |
| start_time = time.time() |
| |
| |
| audio_file, title, duration = extract_audio_from_youtube(url, progress) |
| |
| |
| progress(0.3, desc=f"Whisper {model_size} ๋ชจ๋ธ ๋ก๋ฉ ์ค...") |
| whisper_model = load_model(model_size) |
| |
| |
| progress(0.5, desc="์์ฑ ์ธ์ ์ค... (์์ ๊ธธ์ด์ ๋ฐ๋ผ ์๊ฐ์ด ์์๋ฉ๋๋ค)") |
| |
| transcribe_opts = { |
| "verbose": False, |
| "fp16": False, |
| } |
| |
| if language != "auto": |
| transcribe_opts["language"] = language |
| |
| result = whisper_model.transcribe(audio_file, **transcribe_opts) |
| |
| progress(0.9, desc="๊ฒฐ๊ณผ ์ ๋ฆฌ ์ค...") |
| |
| |
| detected_lang = result.get("language", "unknown") |
| segments = result.get("segments", []) |
| |
| if output_format == "ํ
์คํธ๋ง": |
| transcript = result["text"].strip() |
| elif output_format == "ํ์์คํฌํ ํฌํจ": |
| lines = [] |
| for seg in segments: |
| ts = format_timestamp(seg["start"]) |
| lines.append(f"[{ts}] {seg['text'].strip()}") |
| transcript = "\n".join(lines) |
| else: |
| srt_lines = [] |
| for i, seg in enumerate(segments, 1): |
| start = seg["start"] |
| end = seg["end"] |
| start_ts = f"{int(start//3600):02d}:{int((start%3600)//60):02d}:{int(start%60):02d},{int((start%1)*1000):03d}" |
| end_ts = f"{int(end//3600):02d}:{int((end%3600)//60):02d}:{int(end%60):02d},{int((end%1)*1000):03d}" |
| srt_lines.append(f"{i}") |
| srt_lines.append(f"{start_ts} --> {end_ts}") |
| srt_lines.append(seg["text"].strip()) |
| srt_lines.append("") |
| transcript = "\n".join(srt_lines) |
| |
| elapsed = time.time() - start_time |
| |
| |
| info_text = f"""๐น ์ ๋ชฉ: {title} |
| โฑ๏ธ ์์ ๊ธธ์ด: {format_timestamp(duration)} |
| ๐ ๊ฐ์ง๋ ์ธ์ด: {detected_lang} |
| ๐ ์ธ๊ทธ๋จผํธ ์: {len(segments)} |
| โก ์ฒ๋ฆฌ ์๊ฐ: {elapsed:.1f}์ด""" |
| |
| |
| txt_path = os.path.join(tempfile.mkdtemp(), f"{title[:50]}_transcript.txt") |
| with open(txt_path, "w", encoding="utf-8") as f: |
| f.write(transcript) |
| |
| |
| try: |
| os.remove(audio_file) |
| except: |
| pass |
| |
| progress(1.0, desc="์๋ฃ!") |
| |
| return info_text, transcript, txt_path |
|
|
|
|
| |
| css = """ |
| #title { text-align: center; margin-bottom: 0.5em; } |
| #subtitle { text-align: center; color: #666; margin-bottom: 1.5em; } |
| .output-text textarea { font-size: 14px !important; line-height: 1.6 !important; } |
| """ |
|
|
| with gr.Blocks( |
| title="YouTube Speech-to-Text" |
| ) as demo: |
| |
| gr.HTML("<h1 id='title'>๐ฌ YouTube Speech-to-Text</h1>") |
| gr.HTML("<p id='subtitle'>YouTube ์์์ ์์ฑ์ ํ
์คํธ๋ก ๋ณํํฉ๋๋ค</p>") |
| |
| with gr.Row(): |
| with gr.Column(scale=3): |
| url_input = gr.Textbox( |
| label="YouTube URL", |
| placeholder="https://www.youtube.com/watch?v=... ๋๋ https://youtu.be/...", |
| lines=1, |
| ) |
| with gr.Column(scale=1): |
| model_size = gr.Dropdown( |
| choices=["tiny", "base", "small", "medium", "large"], |
| value="base", |
| label="Whisper ๋ชจ๋ธ", |
| info="ํฌ๊ธฐ๊ฐ ํด์๋ก ์ ํํ์ง๋ง ๋๋ฆฝ๋๋ค" |
| ) |
| |
| with gr.Row(): |
| language = gr.Dropdown( |
| choices=[ |
| ("์๋ ๊ฐ์ง", "auto"), |
| ("ํ๊ตญ์ด", "ko"), |
| ("์์ด", "en"), |
| ("์ผ๋ณธ์ด", "ja"), |
| ("์ค๊ตญ์ด", "zh"), |
| ], |
| value="auto", |
| label="์ธ์ด ์ค์ ", |
| ) |
| output_format = gr.Dropdown( |
| choices=["ํ
์คํธ๋ง", "ํ์์คํฌํ ํฌํจ", "SRT ์๋ง"], |
| value="ํ์์คํฌํ ํฌํจ", |
| label="์ถ๋ ฅ ํ์", |
| ) |
| |
| run_btn = gr.Button("๐ ๋ณํ ์์", variant="primary", size="lg") |
| |
| with gr.Row(): |
| info_output = gr.Textbox(label="๐ ์์ ์ ๋ณด", lines=5, interactive=False) |
| |
| transcript_output = gr.Textbox( |
| label="๐ ๋ณํ ๊ฒฐ๊ณผ", |
| lines=15, |
| interactive=True, |
| elem_classes=["output-text"], |
| ) |
| |
| file_output = gr.File(label="๐พ ํ
์คํธ ํ์ผ ๋ค์ด๋ก๋") |
| |
| run_btn.click( |
| fn=transcribe_youtube, |
| inputs=[url_input, model_size, language, output_format], |
| outputs=[info_output, transcript_output, file_output], |
| ) |
| |
| gr.Markdown(""" |
| --- |
| **์ฌ์ฉ ํ:** |
| - `tiny`/`base`: ๋น ๋ฅด์ง๋ง ์ ํ๋ ๋ฎ์ (CPU ํ๊ฒฝ ๊ถ์ฅ) |
| - `small`/`medium`: ๊ท ํ ์กํ ์ ํ |
| - `large`: ์ต๊ณ ์ ํ๋ (GPU ํ์, ์๊ฐ ์์) |
| - ํ๊ตญ์ด ์์์ ์ธ์ด๋ฅผ `ํ๊ตญ์ด`๋ก ์ง์ ํ๋ฉด ๋ ์ ํํฉ๋๋ค |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch(theme=gr.themes.Soft(), css=css) |