Spaces:
Sleeping
Sleeping
| import os | |
| # Install dependencies manually if not found | |
| os.system("pip install torch torchvision torchaudio transformers gradio youtube-transcript-api") | |
| import torch | |
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import re | |
| from transformers import pipeline | |
| # Load the model locally | |
| # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" | |
| # text_summary = pipeline("summarization", model=model_path, device=-1) | |
| text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) | |
| def extract_video_id(url): | |
| """Extracts the YouTube video ID from a given URL.""" | |
| pattern = r"(?:v=|\/|youtu\.be\/)([0-9A-Za-z_-]{11})" | |
| match = re.search(pattern, url) | |
| return match.group(1) if match else None | |
| #uncomment this by addition of proxies to avoid IP blocking by youtube | |
| # def get_transcript(youtube_url): | |
| # video_id = extract_video_id(youtube_url) | |
| # if not video_id: | |
| # return "Invalid YouTube URL!" | |
| # try: | |
| # # Add proxy configuration | |
| # proxies = { | |
| # 'http': 'http://your-proxy-address:port', | |
| # 'https': 'http://your-proxy-address:port' | |
| # } | |
| # transcript = YouTubeTranscriptApi.get_transcript( | |
| # video_id, | |
| # proxies=proxies | |
| # ) | |
| # return " ".join([entry['text'] for entry in transcript]) | |
| # except Exception as e: | |
| # return f"Error: {str(e)}" | |
| #this works fine in local | |
| def get_transcript(youtube_url): | |
| """Fetches the transcript of a YouTube video.""" | |
| video_id = extract_video_id(youtube_url) | |
| if not video_id: | |
| return "Invalid YouTube URL!" | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| full_text = " ".join([entry['text'] for entry in transcript]) | |
| return full_text | |
| except Exception as e: | |
| return f"Error fetching transcript: {str(e)}" | |
| def chunk_text(text, max_tokens=1024): | |
| """Splits text into smaller chunks (max 1024 tokens).""" | |
| words = text.split() | |
| chunks = [] | |
| for i in range(0, len(words), max_tokens): | |
| chunks.append(" ".join(words[i:i + max_tokens])) | |
| return chunks | |
| def summarize_youtube_video(youtube_url): | |
| """Fetches the transcript and returns its summarized version.""" | |
| transcript = get_transcript(youtube_url) | |
| if "Error" in transcript or "Invalid" in transcript: | |
| return transcript # Return error message if transcript not available | |
| # Split the transcript into smaller chunks | |
| chunks = chunk_text(transcript, max_tokens=900) # Keep a safe limit | |
| summaries = [] | |
| for chunk in chunks: | |
| summary_output = text_summary(chunk, max_length=200, min_length=50, do_sample=False) | |
| summaries.append(summary_output[0]['summary_text']) | |
| final_summary = " ".join(summaries) # Combine all summaries | |
| return final_summary | |
| # 🌟 Gradio UI Integration | |
| demo = gr.Interface( | |
| fn=summarize_youtube_video, | |
| inputs=gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube Video URL..."), | |
| outputs=gr.Textbox(label="Summarized Text", lines=7), | |
| title="@cygon: YouTube Video Summarizer", | |
| description="Enter a YouTube video URL, and the app will fetch & summarize the transcript for you.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) # Enables public sharing | |