cygon24's picture
Update app.py
e329420 verified
import os
# Install dependencies manually if not found
os.system("pip install torch torchvision torchaudio transformers gradio youtube-transcript-api")
import torch
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import re
from transformers import pipeline
# Load the model locally
# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
# text_summary = pipeline("summarization", model=model_path, device=-1)
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
def extract_video_id(url):
"""Extracts the YouTube video ID from a given URL."""
pattern = r"(?:v=|\/|youtu\.be\/)([0-9A-Za-z_-]{11})"
match = re.search(pattern, url)
return match.group(1) if match else None
#uncomment this by addition of proxies to avoid IP blocking by youtube
# def get_transcript(youtube_url):
# video_id = extract_video_id(youtube_url)
# if not video_id:
# return "Invalid YouTube URL!"
# try:
# # Add proxy configuration
# proxies = {
# 'http': 'http://your-proxy-address:port',
# 'https': 'http://your-proxy-address:port'
# }
# transcript = YouTubeTranscriptApi.get_transcript(
# video_id,
# proxies=proxies
# )
# return " ".join([entry['text'] for entry in transcript])
# except Exception as e:
# return f"Error: {str(e)}"
#this works fine in local
def get_transcript(youtube_url):
"""Fetches the transcript of a YouTube video."""
video_id = extract_video_id(youtube_url)
if not video_id:
return "Invalid YouTube URL!"
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
full_text = " ".join([entry['text'] for entry in transcript])
return full_text
except Exception as e:
return f"Error fetching transcript: {str(e)}"
def chunk_text(text, max_tokens=1024):
"""Splits text into smaller chunks (max 1024 tokens)."""
words = text.split()
chunks = []
for i in range(0, len(words), max_tokens):
chunks.append(" ".join(words[i:i + max_tokens]))
return chunks
def summarize_youtube_video(youtube_url):
"""Fetches the transcript and returns its summarized version."""
transcript = get_transcript(youtube_url)
if "Error" in transcript or "Invalid" in transcript:
return transcript # Return error message if transcript not available
# Split the transcript into smaller chunks
chunks = chunk_text(transcript, max_tokens=900) # Keep a safe limit
summaries = []
for chunk in chunks:
summary_output = text_summary(chunk, max_length=200, min_length=50, do_sample=False)
summaries.append(summary_output[0]['summary_text'])
final_summary = " ".join(summaries) # Combine all summaries
return final_summary
# 🌟 Gradio UI Integration
demo = gr.Interface(
fn=summarize_youtube_video,
inputs=gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube Video URL..."),
outputs=gr.Textbox(label="Summarized Text", lines=7),
title="@cygon: YouTube Video Summarizer",
description="Enter a YouTube video URL, and the app will fetch & summarize the transcript for you.",
)
if __name__ == "__main__":
demo.launch(share=True) # Enables public sharing