|
|
import gradio as gr |
|
|
import whisper |
|
|
import subprocess |
|
|
import os |
|
|
import uuid |
|
|
import torch |
|
|
import re |
|
|
import nltk |
|
|
from nltk.tokenize import sent_tokenize |
|
|
from transformers import pipeline, AutoTokenizer |
|
|
|
|
|
|
|
|
nltk.download('punkt') |
|
|
|
|
|
|
|
|
asr_model = whisper.load_model("medium") |
|
|
|
|
|
|
|
|
model_name = "csebuetnlp/mT5_multilingual_XLSum" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) |
|
|
summarizer = pipeline("summarization", model=model_name, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) |
|
|
|
|
|
|
|
|
yt_dlp_path = "C:/Windows/System32/yt-dlp.exe" |
|
|
ffmpeg_path = "C:/ProgramData/chocolatey/bin" |
|
|
|
|
|
|
|
|
def download_audio_with_ytdlp(youtube_url): |
|
|
audio_filename = f"yt_audio_{uuid.uuid4().hex[:8]}.mp3" |
|
|
command = [ |
|
|
yt_dlp_path, |
|
|
"-f", "bestaudio", |
|
|
"--extract-audio", |
|
|
"--audio-format", "mp3", |
|
|
"--ffmpeg-location", ffmpeg_path, |
|
|
"-o", audio_filename, |
|
|
youtube_url |
|
|
] |
|
|
try: |
|
|
subprocess.run(command, capture_output=True, text=True, check=True) |
|
|
if not os.path.exists(audio_filename): |
|
|
raise RuntimeError(f"μ€λμ€ νμΌ μμ± μ€ν¨: {audio_filename}") |
|
|
return audio_filename |
|
|
except subprocess.CalledProcessError as e: |
|
|
raise RuntimeError(f"yt-dlp μ€ν μ€λ₯:\n{e.stderr}") |
|
|
|
|
|
|
|
|
def clean_transcript(raw_text): |
|
|
text = re.sub(r'\s+', ' ', raw_text).strip() |
|
|
return sent_tokenize(text) |
|
|
|
|
|
|
|
|
def summarize_long_text(text, chunk_char_limit=1000): |
|
|
sentences = clean_transcript(text) |
|
|
chunks, current_chunk = [], "" |
|
|
|
|
|
for sentence in sentences: |
|
|
if len(current_chunk) + len(sentence) < chunk_char_limit: |
|
|
current_chunk += sentence + " " |
|
|
else: |
|
|
chunks.append(current_chunk.strip()) |
|
|
current_chunk = sentence + " " |
|
|
if current_chunk: |
|
|
chunks.append(current_chunk.strip()) |
|
|
|
|
|
summaries = [] |
|
|
for chunk in chunks: |
|
|
try: |
|
|
result = summarizer(chunk, max_length=128, min_length=30, do_sample=False) |
|
|
summaries.append(result[0]['summary_text']) |
|
|
except: |
|
|
summaries.append("β οΈ μμ½ μ€ν¨") |
|
|
|
|
|
return "\n\n".join(summaries) |
|
|
|
|
|
|
|
|
def process_youtube(youtube_url): |
|
|
try: |
|
|
audio_file = download_audio_with_ytdlp(youtube_url) |
|
|
result = asr_model.transcribe(audio_file) |
|
|
transcript = result.get("text", "") |
|
|
|
|
|
if os.path.exists(audio_file): |
|
|
os.remove(audio_file) |
|
|
|
|
|
if len(transcript.strip()) < 100: |
|
|
summary = "β οΈ μλ§ λ΄μ©μ΄ λ무 μ§§μ μμ½ν μ μμ΅λλ€." |
|
|
else: |
|
|
summary = summarize_long_text(transcript) |
|
|
|
|
|
return transcript, summary |
|
|
except Exception as e: |
|
|
return f"[μλ¬] {str(e)}", "" |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=process_youtube, |
|
|
inputs=gr.Textbox(label="μ νλΈ μμ URL"), |
|
|
outputs=[ |
|
|
gr.Textbox(label="π μλ§ (Whisper κ²°κ³Ό)", lines=10), |
|
|
gr.Textbox(label="π§ μμ½ (μμ½ κ²°κ³Ό)", lines=5) |
|
|
], |
|
|
title="π¬ μ νλΈ μλ§ & μμ½ μλΉμ€ (ν/μ νΌν© μ΅μ ν)", |
|
|
description="yt-dlpλ‘ μ€λμ€ λ€μ΄λ‘λ β Whisper μλ§ μμ± β mT5 μμ½ λͺ¨λΈλ‘ μμ½ μ²λ¦¬" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|