File size: 3,539 Bytes
22db699 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import whisper
import subprocess
import os
import uuid
import torch
import re
import nltk
from nltk.tokenize import sent_tokenize
from transformers import pipeline, AutoTokenizer
# NLTK 리μμ€ λ€μ΄λ‘λ
nltk.download('punkt')
# π Whisper μλ§ μμ± λͺ¨λΈ
asr_model = whisper.load_model("medium")
# π§ μμ½ λͺ¨λΈ (mT5 κΈ°λ° λ€κ΅μ΄ μμ½ μ§μ)
model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
summarizer = pipeline("summarization", model=model_name, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
# yt-dlp λ° ffmpeg κ²½λ‘ μ€μ
yt_dlp_path = "C:/Windows/System32/yt-dlp.exe"
ffmpeg_path = "C:/ProgramData/chocolatey/bin"
# π΅ μ€λμ€ λ€μ΄λ‘λ ν¨μ
def download_audio_with_ytdlp(youtube_url):
audio_filename = f"yt_audio_{uuid.uuid4().hex[:8]}.mp3"
command = [
yt_dlp_path,
"-f", "bestaudio",
"--extract-audio",
"--audio-format", "mp3",
"--ffmpeg-location", ffmpeg_path,
"-o", audio_filename,
youtube_url
]
try:
subprocess.run(command, capture_output=True, text=True, check=True)
if not os.path.exists(audio_filename):
raise RuntimeError(f"μ€λμ€ νμΌ μμ± μ€ν¨: {audio_filename}")
return audio_filename
except subprocess.CalledProcessError as e:
raise RuntimeError(f"yt-dlp μ€ν μ€λ₯:\n{e.stderr}")
# π μλ§ μ μ
def clean_transcript(raw_text):
text = re.sub(r'\s+', ' ', raw_text).strip()
return sent_tokenize(text)
# π§ μλ§μ λΆν νμ¬ μμ½
def summarize_long_text(text, chunk_char_limit=1000):
sentences = clean_transcript(text)
chunks, current_chunk = [], ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < chunk_char_limit:
current_chunk += sentence + " "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + " "
if current_chunk:
chunks.append(current_chunk.strip())
summaries = []
for chunk in chunks:
try:
result = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
summaries.append(result[0]['summary_text'])
except:
summaries.append("β οΈ μμ½ μ€ν¨")
return "\n\n".join(summaries)
# π μ 체 μ²λ¦¬ νμ΄νλΌμΈ
def process_youtube(youtube_url):
try:
audio_file = download_audio_with_ytdlp(youtube_url)
result = asr_model.transcribe(audio_file)
transcript = result.get("text", "")
if os.path.exists(audio_file):
os.remove(audio_file)
if len(transcript.strip()) < 100:
summary = "β οΈ μλ§ λ΄μ©μ΄ λ무 μ§§μ μμ½ν μ μμ΅λλ€."
else:
summary = summarize_long_text(transcript)
return transcript, summary
except Exception as e:
return f"[μλ¬] {str(e)}", ""
# π Gradio UI ꡬμ±
demo = gr.Interface(
fn=process_youtube,
inputs=gr.Textbox(label="μ νλΈ μμ URL"),
outputs=[
gr.Textbox(label="π μλ§ (Whisper κ²°κ³Ό)", lines=10),
gr.Textbox(label="π§ μμ½ (μμ½ κ²°κ³Ό)", lines=5)
],
title="π¬ μ νλΈ μλ§ & μμ½ μλΉμ€ (ν/μ νΌν© μ΅μ ν)",
description="yt-dlpλ‘ μ€λμ€ λ€μ΄λ‘λ β Whisper μλ§ μμ± β mT5 μμ½ λͺ¨λΈλ‘ μμ½ μ²λ¦¬"
)
if __name__ == "__main__":
demo.launch()
|