import os # Install dependencies manually if not found os.system("pip install torch torchvision torchaudio transformers gradio youtube-transcript-api") import torch import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi import re from transformers import pipeline # Load the model locally # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" # text_summary = pipeline("summarization", model=model_path, device=-1) text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) def extract_video_id(url): """Extracts the YouTube video ID from a given URL.""" pattern = r"(?:v=|\/|youtu\.be\/)([0-9A-Za-z_-]{11})" match = re.search(pattern, url) return match.group(1) if match else None #uncomment this by addition of proxies to avoid IP blocking by youtube # def get_transcript(youtube_url): # video_id = extract_video_id(youtube_url) # if not video_id: # return "Invalid YouTube URL!" # try: # # Add proxy configuration # proxies = { # 'http': 'http://your-proxy-address:port', # 'https': 'http://your-proxy-address:port' # } # transcript = YouTubeTranscriptApi.get_transcript( # video_id, # proxies=proxies # ) # return " ".join([entry['text'] for entry in transcript]) # except Exception as e: # return f"Error: {str(e)}" #this works fine in local def get_transcript(youtube_url): """Fetches the transcript of a YouTube video.""" video_id = extract_video_id(youtube_url) if not video_id: return "Invalid YouTube URL!" try: transcript = YouTubeTranscriptApi.get_transcript(video_id) full_text = " ".join([entry['text'] for entry in transcript]) return full_text except Exception as e: return f"Error fetching transcript: {str(e)}" def chunk_text(text, max_tokens=1024): """Splits text into smaller chunks (max 1024 tokens).""" words = text.split() chunks = [] for i in range(0, len(words), max_tokens): chunks.append(" ".join(words[i:i + max_tokens])) return chunks def summarize_youtube_video(youtube_url): """Fetches the transcript and returns its summarized version.""" transcript = get_transcript(youtube_url) if "Error" in transcript or "Invalid" in transcript: return transcript # Return error message if transcript not available # Split the transcript into smaller chunks chunks = chunk_text(transcript, max_tokens=900) # Keep a safe limit summaries = [] for chunk in chunks: summary_output = text_summary(chunk, max_length=200, min_length=50, do_sample=False) summaries.append(summary_output[0]['summary_text']) final_summary = " ".join(summaries) # Combine all summaries return final_summary # 🌟 Gradio UI Integration demo = gr.Interface( fn=summarize_youtube_video, inputs=gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube Video URL..."), outputs=gr.Textbox(label="Summarized Text", lines=7), title="@cygon: YouTube Video Summarizer", description="Enter a YouTube video URL, and the app will fetch & summarize the transcript for you.", ) if __name__ == "__main__": demo.launch(share=True) # Enables public sharing