Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import torch | |
| import gc | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| import tempfile | |
| import yt_dlp | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base") | |
| def download_audio(url: str, temp_dir: str) -> str: | |
| output_path = os.path.join(temp_dir, "audio.%(ext)s") | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': output_path, | |
| 'quiet': True, | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '192', | |
| }], | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return output_path.replace('%(ext)s', 'mp3') | |
| def process_video(url: str) -> str: | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| audio_path = download_audio(url, tmpdir) | |
| transcription_result = asr_pipeline(audio_path) | |
| text = transcription_result['text'] | |
| if len(text.strip()) < 50: | |
| return "Transcription too short or unclear" | |
| gc.collect() | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| summary_result = summarizer(text, max_length=150, min_length=50, do_sample=False) | |
| return summary_result[0]['summary_text'] | |
| def main(url): | |
| return process_video(url) | |
| iface = gr.Interface(fn=main, inputs="text", outputs="text", title="YouTube Audio Summarizer") | |
| iface.launch() | |