Spaces:

Sayiqa7
/

Youtube_summarization

Runtime error

App Files Files Community

Sayiqa7 commited on Dec 31, 2024

Commit

7d1dcdc

verified ·

1 Parent(s): 7a199b6

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -399

app.py CHANGED Viewed

@@ -1,359 +1,3 @@
-# import subprocess
-# subprocess.check_call(["pip", "install", "transformers==4.34.0"])
-# subprocess.check_call(["pip", "install", "torch>=1.7.1"])
-# subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
-# subprocess.check_call(["pip", "install", "pytube"])
-# subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
-# subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
-# subprocess.check_call(["pip", "install", "google-generativeai"])
-# subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
-# subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
-# subprocess.check_call(["pip", "install", "genai"])
-# subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
-# subprocess.check_call(["pip", "install", "google-api-python-client>=2.0.0"])
-# import transformers
-# import torch
-# import os
-# import youtube_transcript_api
-# import pytube
-# import gradio
-# import PyPDF2
-# import pathlib
-# import pandas
-# import numpy
-# import textblob
-# import gradio as gr
-# from youtube_transcript_api import YouTubeTranscriptApi
-# import google.generativeai as genai
-# from googleapiclient.discovery import build
-# import requests
-# from textblob import TextBlob
-# import re
-# #from google.cloud import generativeai
-# from googleapiclient.discovery import build
-# from huggingface_hub import login
-# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-# def install_missing_packages():
-#     required_packages = {
-#          "torch":">=1.11.0",
-#         "transformers":">=4.34.0",
-#         "youtube_transcript_api" :">=0.6.3" ,
-#         "pytube":None,
-#         "huggingface_hub": ">=0.19.0",
-#         "PyPDF2": ">=3.0.1",
-#         "textblob":">=0.17.1",
-#         "python-dotenv":">=1.0.0",
-#         "genai":None,
-#         "google-generativeai": None,
-#         "google-cloud-aiplatform":"==1.34.0",
-#         "google-api-python-client": ">=2.0.0"
-#     }
-#     for package, version in required_packages.items():
-#         try:
-#             __import__(package)
-#         except ImportError:
-#             package_name = f"{package}{version}" if version else package
-#             subprocess.check_call(["pip", "install", package_name])
-# install_missing_packages()
-# # Configuration
-# hf_token = os.getenv("HF_TOKEN")
-# if hf_token:
-#     login(hf_token)
-# else:
-#     raise ValueError("HF_TOKEN environment variable not set.")
-# YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube API Key
-# USER_CREDENTIALS = {"admin": "password"}  # Example user credentials
-# import os
-# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-# # Use environment variables
-# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
-# if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
-#     raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
-# genai.configure(api_key=GOOGLE_API_KEY)
-# # Database
-# students_data = [
-#     (1, "Alice", "A", "Computer Science"),
-#     (2, "Aliaa", "B", "Mathematics"),
-#     (3, "Charlie", "A", "Machine Learning"),
-#     (4, "Daan", "A", "Physics"),
-#     (5, "Jhon", "C", "Math"),
-#     (6, "Emma", "A+", "Computer Science")
-# ]
-# teachers_data = [
-#     (1, "Dr. Smith", "Math", "MS Mathematics"),
-#     (2, "Ms. Johnson", "Science", "MSc Physics"),
-#     (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
-#     (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
-# ]
-# courses_data = [
-#     (1, "Algebra", "Dr. Smith", "Advanced"),
-#     (2, "Biology", "Ms. Mia", "Intermediate"),
-#     (3, "Machine Learning", "Ms. Jack", "Intermediate"),
-#     (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
-#     (5, "Mathematics", "Ms. Smith", "Intermediate")
-# ]
-# def extract_video_id(url):
-#     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
-#     return match.group(1) if match else None
-# def get_video_metadata(video_id):
-#     try:
-#         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
-#         request = youtube.videos().list(part="snippet", id=video_id)
-#         response = request.execute()
-#         if "items" in response and len(response["items"]) > 0:
-#             snippet = response["items"][0]["snippet"]
-#             return {
-#                 "title": snippet.get("title", "No title available"),
-#                 "description": snippet.get("description", "No description available"),
-#             }
-#         return {}
-#     except Exception as e:
-#         return {"title": "Error fetching metadata", "description": str(e)}
-# def clean_text_for_analysis(text):
-#     return " ".join(text.split())
-# def get_recommendations(keywords, max_results=5):
-#     if not keywords:
-#         return "Please provide search keywords"
-#     try:
-#         response = requests.get(
-#             "https://www.googleapis.com/youtube/v3/search",
-#             params={
-#                 "part": "snippet",
-#                 "q": f"educational {keywords}",
-#                 "type": "video",
-#                 "maxResults": max_results,
-#                 "relevanceLanguage": "en",
-#                 "key": YOUTUBE_API_KEY
-#             }
-#         ).json()
-#         results = []
-#         for item in response.get("items", []):
-#             title = item["snippet"]["title"]
-#             channel = item["snippet"]["channelTitle"]
-#             video_id = item["id"]["videoId"]
-#             results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
-#         return "\n".join(results) if results else "No recommendations found"
-#     except Exception as e:
-#         return f"Error: {str(e)}"
-# def process_youtube_video(url):
-#     try:
-#         thumbnail = None
-#         summary = "No transcript available"
-#         sentiment_label = "N/A"
-#         video_id = extract_video_id(url)
-#         if not video_id:
-#             return None, "Invalid YouTube URL", "N/A"
-#         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
-#         try:
-#             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-#             transcript = None
-#             try:
-#                 transcript = transcript_list.find_transcript(['en'])
-#             except:
-#                 transcript = transcript_list.find_generated_transcript(['en'])
-#             text = " ".join([t['text'] for t in transcript.fetch()])
-#             if not text.strip():
-#                 raise ValueError("Transcript is empty")
-#             cleaned_text = clean_text_for_analysis(text)
-#             sentiment = TextBlob(cleaned_text).sentiment
-#             sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
-#             summary = f"Summary: {cleaned_text[:400]}..."
-#         except (TranscriptsDisabled, NoTranscriptFound):
-#             metadata = get_video_metadata(video_id)
-#             summary = metadata.get("description", "No subtitles available")
-#             sentiment_label = "N/A"
-#         return thumbnail, summary, sentiment_label
-#     except Exception as e:
-#         return None, f"Error: {str(e)}", "N/A"
-# # Gradio Interface
-# with gr.Blocks(theme=gr.themes.Soft()) as app:
-#     # Login Page
-#     with gr.Group() as login_page:
-#         gr.Markdown("# 🎓 Educational Learning Management System")
-#         username = gr.Textbox(label="Username")
-#         password = gr.Textbox(label="Password", type="password")
-#         login_btn = gr.Button("Login", variant="primary")
-#         login_msg = gr.Markdown()
-#     # Main Interface
-#     with gr.Group(visible=False) as main_page:
-#         with gr.Row():
-#             with gr.Column(scale=1):
-#                 gr.Markdown("### 📋 Navigation")
-#                 nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
-#                 nav_students = gr.Button("👥 Students")
-#                 nav_teachers = gr.Button("👨‍🏫 Teachers")
-#                 nav_courses = gr.Button("📚 Courses")
-#                 nav_youtube = gr.Button("🎥 YouTube Tool")
-#                 logout_btn = gr.Button("🚪 Logout", variant="stop")
-#             with gr.Column(scale=3):
-#                 # Dashboard Content
-#                 dashboard_page = gr.Group()
-#                 with dashboard_page:
-#                     gr.Markdown("## 📊 Dashboard")
-#                     gr.Markdown(f"""
-#                     ### System Overview
-#                     - 👥 Total Students: {len(students_data)}
-#                     - 👨‍🏫 Total Teachers: {len(teachers_data)}
-#                     - 📚 Total Courses: {len(courses_data)}
-#                     ### Quick Actions
-#                     - View student performance
-#                     - Access course materials
-#                     - Generate learning insights
-#                     """)
-#                 # Students Content
-#                 students_page = gr.Group(visible=False)
-#                 with students_page:
-#                     gr.Markdown("## 👥 Students")
-#                     gr.DataFrame(
-#                         value=students_data,
-#                         headers=["ID", "Name", "Grade", "Program"]
-#                     )
-#                 # Teachers Content
-#                 teachers_page = gr.Group(visible=False)
-#                 with teachers_page:
-#                     gr.Markdown("## 👨‍🏫 Teachers")
-#                     gr.DataFrame(
-#                         value=teachers_data,
-#                         headers=["ID", "Name", "Subject", "Qualification"]
-#                     )
-#                 # Courses Content
-#                 courses_page = gr.Group(visible=False)
-#                 with courses_page:
-#                     gr.Markdown("## 📚 Courses")
-#                     gr.DataFrame(
-#                         value=courses_data,
-#                         headers=["ID", "Name", "Instructor", "Level"]
-#                     )
-#                 # YouTube Tool Content
-#                 youtube_page = gr.Group(visible=False)
-#                 with youtube_page:
-#                     gr.Markdown("## Agent for YouTube Content Exploration")
-#                     with gr.Row():
-#                         with gr.Column(scale=2):
-#                             video_url = gr.Textbox(
-#                                 label="YouTube URL",
-#                                 placeholder="https://youtube.com/watch?v=..."
-#                             )
-#                             keywords = gr.Textbox(
-#                                 label="Keywords for Recommendations",
-#                                 placeholder="e.g., python programming, machine learning"
-#                             )
-#                             analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
-#                             recommend_btn = gr.Button("🔎 Get Recommendations", variant="primary")
-#                         with gr.Column(scale=1):
-#                             video_thumbnail = gr.Image(label="Video Preview")
-#                     with gr.Row():
-#                         with gr.Column():
-#                             summary = gr.Textbox(label="📝 Summary", lines=8)
-#                             sentiment = gr.Textbox(label="😊 Content Sentiment")
-#                         with gr.Column():
-#                             recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
-#     def login_check(user, pwd):
-#         if USER_CREDENTIALS.get(user) == pwd:
-#             return {
-#                 login_page: gr.update(visible=False),
-#                 main_page: gr.update(visible=True),
-#                 login_msg: ""
-#             }
-#         return {
-#             login_page: gr.update(visible=True),
-#             main_page: gr.update(visible=False),
-#             login_msg: "❌ Invalid credentials"
-#         }
-#     def show_page(page_name):
-#         updates = {
-#             dashboard_page: gr.update(visible=False),
-#             students_page: gr.update(visible=False),
-#             teachers_page: gr.update(visible=False),
-#             courses_page: gr.update(visible=False),
-#             youtube_page: gr.update(visible=False)
-#         }
-#         updates[page_name] = gr.update(visible=True)
-#         return updates
-#     # Event Handlers
-#     login_btn.click(
-#         login_check,
-#         inputs=[username, password],
-#         outputs=[login_page, main_page, login_msg]
-#     )
-#     nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
-#     nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
-#     nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
-#     nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
-#     nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
-#     analyze_btn.click(
-#         process_youtube_video,
-#         inputs=[video_url],
-#         outputs=[video_thumbnail, summary, sentiment]
-#     )
-#     recommend_btn.click(
-#         get_recommendations,
-#         inputs=[keywords],
-#         outputs=[recommendations]
-#     )
-#     logout_btn.click(
-#         lambda: {
-#             login_page: gr.update(visible=True),
-#             main_page: gr.update(visible=False)
-#         },
-#         outputs=[login_page, main_page]
-#     )
-# if __name__ == "__main__":
-#     app.launch()
-##############################
 import subprocess
 subprocess.check_call(["pip", "install", "transformers==4.34.0"])
 subprocess.check_call(["pip", "install", "torch>=1.7.1"])
@@ -463,14 +107,11 @@ courses_data = [
     (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
     (5, "Mathematics", "Ms. Smith", "Intermediate")
 ]
-from transformers import pipeline
 def extract_video_id(url):
     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
-def clean_text(text):
-    return " ".join(text.split())
 def get_video_metadata(video_id):
     try:
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
@@ -484,23 +125,28 @@ def get_video_metadata(video_id):
                 "description": snippet.get("description", "No description available"),
             }
         return {}
     except Exception as e:
         return {"title": "Error fetching metadata", "description": str(e)}
-def segment_transcript(transcript_text):
-    """Segment transcript into sections like intro, body, and conclusion."""
-    lines = transcript_text.split(". ")
-    intro = ". ".join(lines[:3])  # First 3 lines for intro
-    body = ". ".join(lines[3:-2])  # Middle lines for body
-    conclusion = ". ".join(lines[-2:])  # Last 2 lines for conclusion
-    return {"intro": intro, "body": body, "conclusion": conclusion}
-def summarize_text(text, summarizer):
-    """Summarize text using the provided summarization model."""
-    max_chunk_size = 512
-    chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
-    summaries = summarizer(chunks, max_length=150, min_length=40, do_sample=False)
-    return " ".join(summary["summary_text"] for summary in summaries)
 def process_youtube_video(url):
     try:
@@ -510,40 +156,28 @@ def process_youtube_video(url):
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
-        # Load summarization model
-        summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
         try:
-            # Fetch transcript
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-            transcript = transcript_list.find_transcript(['en']).fetch()
-            transcript_text = " ".join([t['text'] for t in transcript])
-            cleaned_text = clean_text(transcript_text)
-            # Segment transcript into sections
-            segments = segment_transcript(cleaned_text)
-            # Summarize each section
-            intro_summary = summarize_text(segments["intro"], summarizer)
-            body_summary = summarize_text(segments["body"], summarizer)
-            conclusion_summary = summarize_text(segments["conclusion"], summarizer)
-            # Sentiment analysis
-            sentiment = TextBlob(cleaned_text).sentiment
-            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
-            detailed_summary = (
-                f"### Introduction\n{intro_summary}\n\n"
-                f"### Main Body\n{body_summary}\n\n"
-                f"### Conclusion\n{conclusion_summary}"
-            )
         except (TranscriptsDisabled, NoTranscriptFound):
             metadata = get_video_metadata(video_id)
-            detailed_summary = metadata.get("description", "No subtitles available")
             sentiment_label = "N/A"
-        return thumbnail, detailed_summary, sentiment_label
     except Exception as e:
         return None, f"Error: {str(e)}", "N/A"
@@ -700,3 +334,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
 if __name__ == "__main__":
     app.launch()

 import subprocess
 subprocess.check_call(["pip", "install", "transformers==4.34.0"])
 subprocess.check_call(["pip", "install", "torch>=1.7.1"])
     (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
     (5, "Mathematics", "Ms. Smith", "Intermediate")
 ]
 def extract_video_id(url):
     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
 def get_video_metadata(video_id):
     try:
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
                 "description": snippet.get("description", "No description available"),
             }
         return {}
     except Exception as e:
         return {"title": "Error fetching metadata", "description": str(e)}
+def clean_text_for_analysis(text):
+    return " ".join(text.split())
+def generate_summary(content):
+    return "\n".join([
+        "**Key Insights**",
+        "",
+        content[:400] + "..." if len(content) > 400 else content
+    ])
+def analyze_sentiment(text):
+    sentiment = TextBlob(text).sentiment
+    sentiment_label = (
+        "Positive" if sentiment.polarity > 0 else
+        "Negative" if sentiment.polarity < 0 else
+        "Neutral"
+    )
+    return f"{sentiment_label} ({sentiment.polarity:.2f})"
 def process_youtube_video(url):
     try:
         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
         try:
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            transcript = None
+            try:
+                transcript = transcript_list.find_transcript(['en'])
+            except:
+                transcript = transcript_list.find_generated_transcript(['en'])
+            text = " ".join([t['text'] for t in transcript.fetch()])
+            if not text.strip():
+                raise ValueError("Transcript is empty")
+            cleaned_text = clean_text_for_analysis(text)
+            summary = generate_summary(cleaned_text)
+            sentiment_label = analyze_sentiment(cleaned_text)
         except (TranscriptsDisabled, NoTranscriptFound):
             metadata = get_video_metadata(video_id)
+            summary = generate_summary(metadata.get("description", "No subtitles available"))
             sentiment_label = "N/A"
+        return thumbnail, summary, sentiment_label
     except Exception as e:
         return None, f"Error: {str(e)}", "N/A"
 if __name__ == "__main__":
     app.launch()
+##############################