File size: 3,539 Bytes
22db699
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import whisper
import subprocess
import os
import uuid
import torch
import re
import nltk
from nltk.tokenize import sent_tokenize
from transformers import pipeline, AutoTokenizer

# NLTK λ¦¬μ†ŒμŠ€ λ‹€μš΄λ‘œλ“œ
nltk.download('punkt')

# πŸ”Š Whisper μžλ§‰ 생성 λͺ¨λΈ
asr_model = whisper.load_model("medium")

# 🧠 μš”μ•½ λͺ¨λΈ (mT5 기반 λ‹€κ΅­μ–΄ μš”μ•½ 지원)
model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
summarizer = pipeline("summarization", model=model_name, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# yt-dlp 및 ffmpeg 경둜 μ„€μ •
yt_dlp_path = "C:/Windows/System32/yt-dlp.exe"
ffmpeg_path = "C:/ProgramData/chocolatey/bin"

# 🎡 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ ν•¨μˆ˜
def download_audio_with_ytdlp(youtube_url):
    audio_filename = f"yt_audio_{uuid.uuid4().hex[:8]}.mp3"
    command = [
        yt_dlp_path,
        "-f", "bestaudio",
        "--extract-audio",
        "--audio-format", "mp3",
        "--ffmpeg-location", ffmpeg_path,
        "-o", audio_filename,
        youtube_url
    ]
    try:
        subprocess.run(command, capture_output=True, text=True, check=True)
        if not os.path.exists(audio_filename):
            raise RuntimeError(f"μ˜€λ””μ˜€ 파일 생성 μ‹€νŒ¨: {audio_filename}")
        return audio_filename
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"yt-dlp μ‹€ν–‰ 였λ₯˜:\n{e.stderr}")

# πŸ“„ μžλ§‰ μ •μ œ
def clean_transcript(raw_text):
    text = re.sub(r'\s+', ' ', raw_text).strip()
    return sent_tokenize(text)

# 🧠 μžλ§‰μ„ λΆ„ν• ν•˜μ—¬ μš”μ•½
def summarize_long_text(text, chunk_char_limit=1000):
    sentences = clean_transcript(text)
    chunks, current_chunk = [], ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) < chunk_char_limit:
            current_chunk += sentence + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
    if current_chunk:
        chunks.append(current_chunk.strip())

    summaries = []
    for chunk in chunks:
        try:
            result = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
            summaries.append(result[0]['summary_text'])
        except:
            summaries.append("⚠️ μš”μ•½ μ‹€νŒ¨")

    return "\n\n".join(summaries)

# πŸ” 전체 처리 νŒŒμ΄ν”„λΌμΈ
def process_youtube(youtube_url):
    try:
        audio_file = download_audio_with_ytdlp(youtube_url)
        result = asr_model.transcribe(audio_file)
        transcript = result.get("text", "")

        if os.path.exists(audio_file):
            os.remove(audio_file)

        if len(transcript.strip()) < 100:
            summary = "⚠️ μžλ§‰ λ‚΄μš©μ΄ λ„ˆλ¬΄ μ§§μ•„ μš”μ•½ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
        else:
            summary = summarize_long_text(transcript)

        return transcript, summary
    except Exception as e:
        return f"[μ—λŸ¬] {str(e)}", ""

# 🌐 Gradio UI ꡬ성
demo = gr.Interface(
    fn=process_youtube,
    inputs=gr.Textbox(label="유튜브 μ˜μƒ URL"),
    outputs=[
        gr.Textbox(label="πŸŽ™ μžλ§‰ (Whisper κ²°κ³Ό)", lines=10),
        gr.Textbox(label="🧠 μš”μ•½ (μš”μ•½ κ²°κ³Ό)", lines=5)
    ],
    title="🎬 유튜브 μžλ§‰ & μš”μ•½ μ„œλΉ„μŠ€ (ν•œ/영 ν˜Όν•© μ΅œμ ν™”)",
    description="yt-dlp둜 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ β†’ Whisper μžλ§‰ 생성 β†’ mT5 μš”μ•½ λͺ¨λΈλ‘œ μš”μ•½ 처리"
)

if __name__ == "__main__":
    demo.launch()