Spaces:

nangunan
/

interface

Runtime error

File size: 3,539 Bytes

22db699

import gradio as gr
import whisper
import subprocess
import os
import uuid
import torch
import re
import nltk
from nltk.tokenize import sent_tokenize
from transformers import pipeline, AutoTokenizer

# NLTK 리소스 다운로드
nltk.download('punkt')

# 🔊 Whisper 자막 생성 모델
asr_model = whisper.load_model("medium")

# 🧠 요약 모델 (mT5 기반 다국어 요약 지원)
model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
summarizer = pipeline("summarization", model=model_name, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# yt-dlp 및 ffmpeg 경로 설정
yt_dlp_path = "C:/Windows/System32/yt-dlp.exe"
ffmpeg_path = "C:/ProgramData/chocolatey/bin"

# 🎵 오디오 다운로드 함수
def download_audio_with_ytdlp(youtube_url):
    audio_filename = f"yt_audio_{uuid.uuid4().hex[:8]}.mp3"
    command = [
        yt_dlp_path,
        "-f", "bestaudio",
        "--extract-audio",
        "--audio-format", "mp3",
        "--ffmpeg-location", ffmpeg_path,
        "-o", audio_filename,
        youtube_url
    ]
    try:
        subprocess.run(command, capture_output=True, text=True, check=True)
        if not os.path.exists(audio_filename):
            raise RuntimeError(f"오디오 파일 생성 실패: {audio_filename}")
        return audio_filename
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"yt-dlp 실행 오류:\n{e.stderr}")

# 📄 자막 정제
def clean_transcript(raw_text):
    text = re.sub(r'\s+', ' ', raw_text).strip()
    return sent_tokenize(text)

# 🧠 자막을 분할하여 요약
def summarize_long_text(text, chunk_char_limit=1000):
    sentences = clean_transcript(text)
    chunks, current_chunk = [], ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) < chunk_char_limit:
            current_chunk += sentence + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
    if current_chunk:
        chunks.append(current_chunk.strip())

    summaries = []
    for chunk in chunks:
        try:
            result = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
            summaries.append(result[0]['summary_text'])
        except:
            summaries.append("⚠️ 요약 실패")

    return "\n\n".join(summaries)

# 🔁 전체 처리 파이프라인
def process_youtube(youtube_url):
    try:
        audio_file = download_audio_with_ytdlp(youtube_url)
        result = asr_model.transcribe(audio_file)
        transcript = result.get("text", "")

        if os.path.exists(audio_file):
            os.remove(audio_file)

        if len(transcript.strip()) < 100:
            summary = "⚠️ 자막 내용이 너무 짧아 요약할 수 없습니다."
        else:
            summary = summarize_long_text(transcript)

        return transcript, summary
    except Exception as e:
        return f"[에러] {str(e)}", ""

# 🌐 Gradio UI 구성
demo = gr.Interface(
    fn=process_youtube,
    inputs=gr.Textbox(label="유튜브 영상 URL"),
    outputs=[
        gr.Textbox(label="🎙 자막 (Whisper 결과)", lines=10),
        gr.Textbox(label="🧠 요약 (요약 결과)", lines=5)
    ],
    title="🎬 유튜브 자막 & 요약 서비스 (한/영 혼합 최적화)",
    description="yt-dlp로 오디오 다운로드 → Whisper 자막 생성 → mT5 요약 모델로 요약 처리"
)

if __name__ == "__main__":
    demo.launch()