import gradio as gr import yt_dlp import subprocess import os import ffmpeg import whisper output_dir = "mp4" os.makedirs(output_dir, exist_ok=True) global_format_map = {} def fetch_merged_formats(url): global global_format_map global_format_map = {} ydl_opts = {'quiet': True} labels = [] with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) formats = info.get("formats", []) has_audio_140 = any(f["format_id"] == "140" for f in formats) for f in formats: if f.get("vcodec") != "none": resolution = f.get("resolution") or f"{f.get('width')}x{f.get('height')}" acodec = f.get("acodec", "none") fid = f["format_id"] if acodec == "none" and has_audio_140: label = f"{resolution} - mp4 + m4a" format_id = f"{fid}+140" elif acodec != "none": label = f"{resolution} - video + audio" format_id = fid else: continue if label not in global_format_map: global_format_map[label] = format_id labels.append(label) return gr.update(choices=labels, value=labels[0] if labels else None) def get_real_resolution(filepath): try: probe = ffmpeg.probe(filepath) stream = next((s for s in probe['streams'] if s['codec_type'] == 'video'), None) if stream: return f"{stream['width']}x{stream['height']}" except Exception as e: print(f"ffmpeg error: {e}") return "Unknown" def download_merged_and_transcribe(url, selected_label): format_id = global_format_map.get(selected_label) if not format_id: return ["❌ 포맷을 찾을 수 없습니다."], "실패" # Step 1. mp4 다운로드 mp4_output = os.path.join(output_dir, "%(title).100s.%(ext)s") subprocess.run([ "yt-dlp", "-f", format_id, "--merge-output-format", "mp4", "-o", mp4_output, url ], check=True) files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".mp4")] files.sort(key=os.path.getmtime, reverse=True) mp4_file = files[0] if files else None res = get_real_resolution(mp4_file) # Step 2. m4a 오디오만 따로 다운로드 audio_path = os.path.join(output_dir, "audio.m4a") subprocess.run([ "yt-dlp", "-f", "140", "-o", audio_path, url ], check=True) # Step 3. Whisper로 텍스트 변환 model = whisper.load_model("base") result = model.transcribe(audio_path) txt_path = os.path.splitext(mp4_file)[0] + ".txt" with open(txt_path, "w", encoding="utf-8") as f: f.write(result["text"]) return [mp4_file, txt_path], f"✅ 실제 해상도: {res}" # ▶ Gradio 앱 with gr.Blocks() as app: gr.Markdown("# 🎬 유튜브 다운로드 + Whisper 자막") url_input = gr.Textbox(label="유튜브 URL") get_formats_btn = gr.Button("🔍 포맷 불러오기") format_radio = gr.Radio(label="🎞️ 해상도 및 포맷 선택", choices=[], interactive=True) download_btn = gr.Button("⬇️ 다운로드 및 자막 생성") file_output = gr.Files(label="📁 다운로드된 파일") resolution_display = gr.Textbox(label="ℹ️ 결과 요약", interactive=False) get_formats_btn.click(fn=fetch_merged_formats, inputs=url_input, outputs=format_radio) download_btn.click(fn=download_merged_and_transcribe, inputs=[url_input, format_radio], outputs=[file_output, resolution_display]) app.launch(share=True)