interface / app.py
nangunan's picture
interface
22db699
import gradio as gr
import whisper
import subprocess
import os
import uuid
import torch
import re
import nltk
from nltk.tokenize import sent_tokenize
from transformers import pipeline, AutoTokenizer
# NLTK λ¦¬μ†ŒμŠ€ λ‹€μš΄λ‘œλ“œ
nltk.download('punkt')
# πŸ”Š Whisper μžλ§‰ 생성 λͺ¨λΈ
asr_model = whisper.load_model("medium")
# 🧠 μš”μ•½ λͺ¨λΈ (mT5 기반 λ‹€κ΅­μ–΄ μš”μ•½ 지원)
model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
summarizer = pipeline("summarization", model=model_name, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
# yt-dlp 및 ffmpeg 경둜 μ„€μ •
yt_dlp_path = "C:/Windows/System32/yt-dlp.exe"
ffmpeg_path = "C:/ProgramData/chocolatey/bin"
# 🎡 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ ν•¨μˆ˜
def download_audio_with_ytdlp(youtube_url):
audio_filename = f"yt_audio_{uuid.uuid4().hex[:8]}.mp3"
command = [
yt_dlp_path,
"-f", "bestaudio",
"--extract-audio",
"--audio-format", "mp3",
"--ffmpeg-location", ffmpeg_path,
"-o", audio_filename,
youtube_url
]
try:
subprocess.run(command, capture_output=True, text=True, check=True)
if not os.path.exists(audio_filename):
raise RuntimeError(f"μ˜€λ””μ˜€ 파일 생성 μ‹€νŒ¨: {audio_filename}")
return audio_filename
except subprocess.CalledProcessError as e:
raise RuntimeError(f"yt-dlp μ‹€ν–‰ 였λ₯˜:\n{e.stderr}")
# πŸ“„ μžλ§‰ μ •μ œ
def clean_transcript(raw_text):
text = re.sub(r'\s+', ' ', raw_text).strip()
return sent_tokenize(text)
# 🧠 μžλ§‰μ„ λΆ„ν• ν•˜μ—¬ μš”μ•½
def summarize_long_text(text, chunk_char_limit=1000):
sentences = clean_transcript(text)
chunks, current_chunk = [], ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < chunk_char_limit:
current_chunk += sentence + " "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + " "
if current_chunk:
chunks.append(current_chunk.strip())
summaries = []
for chunk in chunks:
try:
result = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
summaries.append(result[0]['summary_text'])
except:
summaries.append("⚠️ μš”μ•½ μ‹€νŒ¨")
return "\n\n".join(summaries)
# πŸ” 전체 처리 νŒŒμ΄ν”„λΌμΈ
def process_youtube(youtube_url):
try:
audio_file = download_audio_with_ytdlp(youtube_url)
result = asr_model.transcribe(audio_file)
transcript = result.get("text", "")
if os.path.exists(audio_file):
os.remove(audio_file)
if len(transcript.strip()) < 100:
summary = "⚠️ μžλ§‰ λ‚΄μš©μ΄ λ„ˆλ¬΄ μ§§μ•„ μš”μ•½ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
else:
summary = summarize_long_text(transcript)
return transcript, summary
except Exception as e:
return f"[μ—λŸ¬] {str(e)}", ""
# 🌐 Gradio UI ꡬ성
demo = gr.Interface(
fn=process_youtube,
inputs=gr.Textbox(label="유튜브 μ˜μƒ URL"),
outputs=[
gr.Textbox(label="πŸŽ™ μžλ§‰ (Whisper κ²°κ³Ό)", lines=10),
gr.Textbox(label="🧠 μš”μ•½ (μš”μ•½ κ²°κ³Ό)", lines=5)
],
title="🎬 유튜브 μžλ§‰ & μš”μ•½ μ„œλΉ„μŠ€ (ν•œ/영 ν˜Όν•© μ΅œμ ν™”)",
description="yt-dlp둜 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ β†’ Whisper μžλ§‰ 생성 β†’ mT5 μš”μ•½ λͺ¨λΈλ‘œ μš”μ•½ 처리"
)
if __name__ == "__main__":
demo.launch()