Spaces:

Sayiqa
/

summarization_youtube

Build error

App Files Files Community

Sayiqa commited on Dec 30, 2024

Commit

d6fb65a

verified ·

1 Parent(s): 9d201df

Update app.py

Browse files

Files changed (1) hide show

app.py +523 -349

app.py CHANGED Viewed

@@ -1,51 +1,470 @@
 import subprocess
-subprocess.check_call(["pip", "install", "transformers==4.34.0"])
-subprocess.check_call(["pip", "install", "torch>=1.7.1"])
-subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
-subprocess.check_call(["pip", "install", "pytube"])
-subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
-subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
-subprocess.check_call(["pip", "install", "google-generativeai"])
-subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
-subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
-subprocess.check_call(["pip", "install", "genai"])
-subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
-import transformers
-import torch
-import os
-import youtube_transcript_api
-import pytube
-import gradio
-import PyPDF2
-import pathlib
-import pandas
-import numpy
-import textblob
-import gradio as gr
-from youtube_transcript_api import YouTubeTranscriptApi
-import google.generativeai as genai
-import requests
-from textblob import TextBlob
 import re
-#from google.cloud import generativeai
-from huggingface_hub import login
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 def install_missing_packages():
     required_packages = {
-         "torch":">=1.11.0",
-        "transformers":">=4.34.0",
-        "youtube_transcript_api" :">=0.6.3" ,
-        "pytube":None,
         "huggingface_hub": ">=0.19.0",
         "PyPDF2": ">=3.0.1",
-        "textblob":">=0.17.1",
-        "python-dotenv":">=1.0.0",
-        "genai":None,
         "google-generativeai": None,
-        "google-cloud-aiplatform":"==1.34.0"
     }
     for package, version in required_packages.items():
         try:
             __import__(package)
@@ -54,162 +473,20 @@ def install_missing_packages():
             subprocess.check_call(["pip", "install", package_name])
 install_missing_packages()
-# Configuration
-hf_token = os.getenv("HF_TOKEN")
-if hf_token:
-    login(hf_token)
-else:
-    raise ValueError("HF_TOKEN environment variable not set.")
 # Configuration
-USER_CREDENTIALS = {
-    "admin": "password123",
-    "teacher": "teach2024",
-    "student": "learn2024"
-}
-import os
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-# Use environment variables
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
     raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
-genai.configure(api_key=GOOGLE_API_KEY)
-# Database
-students_data = [
-    (1, "Alice", "A", "Computer Science"),
-    (2, "Aliaa", "B", "Mathematics"),
-    (3, "Charlie", "A", "Machine Learning"),
-    (4, "Daan", "A", "Physics"),
-    (5, "Jhon", "C", "Math"),
-    (6, "Emma", "A+", "Computer Science")
-]
-teachers_data = [
-    (1, "Dr. Smith", "Math", "MS Mathematics"),
-    (2, "Ms. Johnson", "Science", "MSc Physics"),
-    (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
-    (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
-]
-courses_data = [
-    (1, "Algebra", "Dr. Smith", "Advanced"),
-    (2, "Biology", "Ms. Mia", "Intermediate"),
-    (3, "Machine Learning", "Ms. Jack", "Intermediate"),
-    (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
-    (5, "Mathematics", "Ms. Smith", "Intermediate")
-]
-def sanitize_text(text):
-    """Remove invalid Unicode characters."""
-    return text.encode("utf-8", "replace").decode("utf-8")
 def extract_video_id(url):
-    if not url:
-        return None
-    patterns = [
-        r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
-    ]
-    for pattern in patterns:
-        match = re.search(pattern, url)
-        if match:
-            return match.group(1)
-    return None
-from textblob import TextBlob
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-import re
-from collections import Counter
-from googleapiclient.discovery import build
-def process_youtube_video(url="", keywords=""):
-    try:
-        # Initialize variables
-        thumbnail = None
-        summary = "No transcript available"
-        sentiment_label = "N/A"
-        recommendations = ""
-        subtitle_info = "No additional information available"
-        if not url.strip():
-            return None, "Please enter a YouTube URL", "N/A", "", ""
-        video_id = extract_video_id(url)
-        if not video_id:
-            return None, "Invalid YouTube URL", "N/A", "", ""
-        thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
-        try:
-            # Fetch transcript
-            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-            transcript = None
-            try:
-                transcript = transcript_list.find_transcript(['en'])
-            except:
-                transcript = transcript_list.find_generated_transcript(['en'])
-            text = " ".join([t['text'] for t in transcript.fetch()])
-            if not text.strip():
-                raise ValueError("Transcript is empty")
-            # Clean up the text for sentiment analysis
-            cleaned_text = clean_text_for_analysis(text)
-            # Sentiment analysis
-            sentiment = TextBlob(cleaned_text).sentiment  # Use cleaned text for sentiment analysis
-            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
-            # Generate summary
-            model = genai.GenerativeModel("gemini-pro")
-            summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
-            # Extract subtitle information
-            subtitle_info = extract_subtitle_info(cleaned_text)
-        except TranscriptsDisabled:
-            metadata = get_video_metadata(video_id)
-            summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
-            sentiment_label = "N/A"
-            subtitle_info = "No subtitles available for analysis."
-        except NoTranscriptFound:
-            metadata = get_video_metadata(video_id)
-            summary = metadata.get("description", "⚠️ No English transcript available.")
-            sentiment_label = "N/A"
-            subtitle_info = "No subtitles available for analysis."
-        except Exception as e:
-            return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
-        # Get recommendations
-        if keywords.strip():
-            recommendations = get_recommendations(keywords)
-        return thumbnail, summary, sentiment_label, subtitle_info, recommendations
-    except Exception as e:
-        return None, f"Error: {str(e)}", "N/A", "", ""
-def extract_video_id(url):
-    """
-    Extracts the video ID from a YouTube URL.
-    """
     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
 def get_video_metadata(video_id):
-    """
-    Fetches video metadata such as title and description using the YouTube Data API.
-    """
     try:
-        YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube Data API key
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
         request = youtube.videos().list(part="snippet", id=video_id)
         response = request.execute()
@@ -225,48 +502,20 @@ def get_video_metadata(video_id):
     except Exception as e:
         return {"title": "Error fetching metadata", "description": str(e)}
 def extract_subtitle_info(text):
-    """
-    Extracts meaningful information from the subtitles.
-    This could include topics, key insights, or a breakdown of the content.
-    """
     try:
-        # Split text into sentences for better analysis
         sentences = text.split(". ")
-        # Example: Extract key topics or keywords
         words = text.split()
         common_words = Counter(words).most_common(10)
         key_topics = ", ".join([word for word, count in common_words])
-        # Example: Provide a breakdown of the content
         info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
         return info
     except Exception as e:
         return f"Error extracting subtitle information: {str(e)}"
-def clean_text_for_analysis(text):
-    """
-    Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
-    """
-    # Remove extra spaces and line breaks
-    cleaned_text = " ".join(text.split())
-    return cleaned_text
-def get_recommendations(keywords):
-    """
-    Fetches related video recommendations based on the provided keywords.
-    This function can be expanded with a proper API or custom logic.
-    """
-    # Placeholder for fetching recommendations based on keywords
-    return f"Recommendations for: {keywords}"  # Dummy return for now
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
@@ -282,161 +531,86 @@ def get_recommendations(keywords, max_results=5):
                 "key": YOUTUBE_API_KEY
             }
         ).json()
         results = []
         for item in response.get("items", []):
             title = item["snippet"]["title"]
             channel = item["snippet"]["channelTitle"]
             video_id = item["id"]["videoId"]
             results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
         return "\n".join(results) if results else "No recommendations found"
     except Exception as e:
         return f"Error: {str(e)}"
 # Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    # Login Page
-    with gr.Group() as login_page:
-        gr.Markdown("# 🎓 Educational Learning Management System")
-        username = gr.Textbox(label="Username")
-        password = gr.Textbox(label="Password", type="password")
-        login_btn = gr.Button("Login", variant="primary")
-        login_msg = gr.Markdown()
-    # Main Interface
-    with gr.Group(visible=False) as main_page:
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📋 Navigation")
-                nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
-                nav_students = gr.Button("👥 Students")
-                nav_teachers = gr.Button("👨‍🏫 Teachers")
-                nav_courses = gr.Button("📚 Courses")
-                nav_youtube = gr.Button("🎥 YouTube Tool")
-                logout_btn = gr.Button("🚪 Logout", variant="stop")
-            with gr.Column(scale=3):
-                # Dashboard Content
-                dashboard_page = gr.Group()
-                with dashboard_page:
-                    gr.Markdown("## 📊 Dashboard")
-                    gr.Markdown(f"""
-                    ### System Overview
-                    - 👥 Total Students: {len(students_data)}
-                    - 👨‍🏫 Total Teachers: {len(teachers_data)}
-                    - 📚 Total Courses: {len(courses_data)}
-                    ### Quick Actions
-                    - View student performance
-                    - Access course materials
-                    - Generate learning insights
-                    """)
-                # Students Content
-                students_page = gr.Group(visible=False)
-                with students_page:
-                    gr.Markdown("## 👥 Students")
-                    gr.DataFrame(
-                        value=students_data,
-                        headers=["ID", "Name", "Grade", "Program"]
-                    )
-                # Teachers Content
-                teachers_page = gr.Group(visible=False)
-                with teachers_page:
-                    gr.Markdown("## 👨‍🏫 Teachers")
-                    gr.DataFrame(
-                        value=teachers_data,
-                        headers=["ID", "Name", "Subject", "Qualification"]
-                    )
-                # Courses Content
-                courses_page = gr.Group(visible=False)
-                with courses_page:
-                    gr.Markdown("## 📚 Courses")
-                    gr.DataFrame(
-                        value=courses_data,
-                        headers=["ID", "Name", "Instructor", "Level"]
-                    )
-                # YouTube Tool Content
-                youtube_page = gr.Group(visible=False)
-                with youtube_page:
-                    gr.Markdown("## Agent for YouTube Content Exploration")
-                    with gr.Row():
-                        with gr.Column(scale=2):
-                            video_url = gr.Textbox(
-                                label="YouTube URL",
-                                placeholder="https://youtube.com/watch?v=..."
-                            )
-                            keywords = gr.Textbox(
-                                label="Keywords for Recommendations",
-                                placeholder="e.g., python programming, machine learning"
-                            )
-                            analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
-                        with gr.Column(scale=1):
-                            video_thumbnail = gr.Image(label="Video Preview")
-                    with gr.Row():
-                        with gr.Column():
-                            summary = gr.Textbox(label="📝 Summary", lines=8)
-                            sentiment = gr.Textbox(label="😊 Content Sentiment")
-                        with gr.Column():
-                            recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
-    def login_check(user, pwd):
-        if USER_CREDENTIALS.get(user) == pwd:
-            return {
-                login_page: gr.update(visible=False),
-                main_page: gr.update(visible=True),
-                login_msg: ""
-            }
-        return {
-            login_page: gr.update(visible=True),
-            main_page: gr.update(visible=False),
-            login_msg: "❌ Invalid credentials"
-        }
-    def show_page(page_name):
-        updates = {
-            dashboard_page: gr.update(visible=False),
-            students_page: gr.update(visible=False),
-            teachers_page: gr.update(visible=False),
-            courses_page: gr.update(visible=False),
-            youtube_page: gr.update(visible=False)
-        }
-        updates[page_name] = gr.update(visible=True)
-        return updates
-    # Event Handlers
-    login_btn.click(
-        login_check,
-        inputs=[username, password],
-        outputs=[login_page, main_page, login_msg]
-    )
-    nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
-    nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
-    nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
-    nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
-    nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
     analyze_btn.click(
         process_youtube_video,
         inputs=[video_url, keywords],
         outputs=[video_thumbnail, summary, sentiment, recommendations]
     )
-    logout_btn.click(
-        lambda: {
-            login_page: gr.update(visible=True),
-            main_page: gr.update(visible=False)
-        },
-        outputs=[login_page, main_page]
-    )
 if __name__ == "__main__":
     app.launch()

+# import subprocess
+# subprocess.check_call(["pip", "install", "transformers==4.34.0"])
+# subprocess.check_call(["pip", "install", "torch>=1.7.1"])
+# subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
+# subprocess.check_call(["pip", "install", "pytube"])
+# subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
+# subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
+# subprocess.check_call(["pip", "install", "google-generativeai"])
+# subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
+# subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
+# subprocess.check_call(["pip", "install", "genai"])
+# subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
+# import transformers
+# import torch
+# import os
+# import youtube_transcript_api
+# import pytube
+# import gradio
+# import PyPDF2
+# import pathlib
+# import pandas
+# import numpy
+# import textblob
+# import gradio as gr
+# from youtube_transcript_api import YouTubeTranscriptApi
+# import google.generativeai as genai
+# import requests
+# from textblob import TextBlob
+# import re
+# #from google.cloud import generativeai
+# from huggingface_hub import login
+# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+# def install_missing_packages():
+#     required_packages = {
+#          "torch":">=1.11.0",
+#         "transformers":">=4.34.0",
+#         "youtube_transcript_api" :">=0.6.3" ,
+#         "pytube":None,
+#         "huggingface_hub": ">=0.19.0",
+#         "PyPDF2": ">=3.0.1",
+#         "textblob":">=0.17.1",
+#         "python-dotenv":">=1.0.0",
+#         "genai":None,
+#         "google-generativeai": None,
+#         "google-cloud-aiplatform":"==1.34.0"
+#     }
+#     for package, version in required_packages.items():
+#         try:
+#             __import__(package)
+#         except ImportError:
+#             package_name = f"{package}{version}" if version else package
+#             subprocess.check_call(["pip", "install", package_name])
+# install_missing_packages()
+# # Configuration
+# hf_token = os.getenv("HF_TOKEN")
+# if hf_token:
+#     login(hf_token)
+# else:
+#     raise ValueError("HF_TOKEN environment variable not set.")
+# # Configuration
+# USER_CREDENTIALS = {
+#     "admin": "password123",
+#     "teacher": "teach2024",
+#     "student": "learn2024"
+# }
+# import os
+# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+# # Use environment variables
+# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
+# if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
+#     raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
+# genai.configure(api_key=GOOGLE_API_KEY)
+# # Database
+# students_data = [
+#     (1, "Alice", "A", "Computer Science"),
+#     (2, "Aliaa", "B", "Mathematics"),
+#     (3, "Charlie", "A", "Machine Learning"),
+#     (4, "Daan", "A", "Physics"),
+#     (5, "Jhon", "C", "Math"),
+#     (6, "Emma", "A+", "Computer Science")
+# ]
+# teachers_data = [
+#     (1, "Dr. Smith", "Math", "MS Mathematics"),
+#     (2, "Ms. Johnson", "Science", "MSc Physics"),
+#     (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
+#     (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
+# ]
+# courses_data = [
+#     (1, "Algebra", "Dr. Smith", "Advanced"),
+#     (2, "Biology", "Ms. Mia", "Intermediate"),
+#     (3, "Machine Learning", "Ms. Jack", "Intermediate"),
+#     (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
+#     (5, "Mathematics", "Ms. Smith", "Intermediate")
+# ]
+# def sanitize_text(text):
+#     """Remove invalid Unicode characters."""
+#     return text.encode("utf-8", "replace").decode("utf-8")
+# def extract_video_id(url):
+#     if not url:
+#         return None
+#     patterns = [
+#         r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
+#     ]
+#     for pattern in patterns:
+#         match = re.search(pattern, url)
+#         if match:
+#             return match.group(1)
+#     return None
+# from textblob import TextBlob
+# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+# import re
+# from collections import Counter
+# from googleapiclient.discovery import build
+# def process_youtube_video(url="", keywords=""):
+#     try:
+#         # Initialize variables
+#         thumbnail = None
+#         summary = "No transcript available"
+#         sentiment_label = "N/A"
+#         recommendations = ""
+#         subtitle_info = "No additional information available"
+#         if not url.strip():
+#             return None, "Please enter a YouTube URL", "N/A", "", ""
+#         video_id = extract_video_id(url)
+#         if not video_id:
+#             return None, "Invalid YouTube URL", "N/A", "", ""
+#         thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+#         try:
+#             # Fetch transcript
+#             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+#             transcript = None
+#             try:
+#                 transcript = transcript_list.find_transcript(['en'])
+#             except:
+#                 transcript = transcript_list.find_generated_transcript(['en'])
+#             text = " ".join([t['text'] for t in transcript.fetch()])
+#             if not text.strip():
+#                 raise ValueError("Transcript is empty")
+#             # Clean up the text for sentiment analysis
+#             cleaned_text = clean_text_for_analysis(text)
+#             # Sentiment analysis
+#             sentiment = TextBlob(cleaned_text).sentiment  # Use cleaned text for sentiment analysis
+#             sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
+#             # Generate summary
+#             model = genai.GenerativeModel("gemini-pro")
+#             summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
+#             # Extract subtitle information
+#             subtitle_info = extract_subtitle_info(cleaned_text)
+#         except TranscriptsDisabled:
+#             metadata = get_video_metadata(video_id)
+#             summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
+#             sentiment_label = "N/A"
+#             subtitle_info = "No subtitles available for analysis."
+#         except NoTranscriptFound:
+#             metadata = get_video_metadata(video_id)
+#             summary = metadata.get("description", "⚠️ No English transcript available.")
+#             sentiment_label = "N/A"
+#             subtitle_info = "No subtitles available for analysis."
+#         except Exception as e:
+#             return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
+#         # Get recommendations
+#         if keywords.strip():
+#             recommendations = get_recommendations(keywords)
+#         return thumbnail, summary, sentiment_label, subtitle_info, recommendations
+#     except Exception as e:
+#         return None, f"Error: {str(e)}", "N/A", "", ""
+# def extract_video_id(url):
+#     """
+#     Extracts the video ID from a YouTube URL.
+#     """
+#     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
+#     return match.group(1) if match else None
+# def get_video_metadata(video_id):
+#     """
+#     Fetches video metadata such as title and description using the YouTube Data API.
+#     """
+#     try:
+#         YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube Data API key
+#         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+#         request = youtube.videos().list(part="snippet", id=video_id)
+#         response = request.execute()
+#         if "items" in response and len(response["items"]) > 0:
+#             snippet = response["items"][0]["snippet"]
+#             return {
+#                 "title": snippet.get("title", "No title available"),
+#                 "description": snippet.get("description", "No description available"),
+#             }
+#         return {}
+#     except Exception as e:
+#         return {"title": "Error fetching metadata", "description": str(e)}
+# def extract_subtitle_info(text):
+#     """
+#     Extracts meaningful information from the subtitles.
+#     This could include topics, key insights, or a breakdown of the content.
+#     """
+#     try:
+#         # Split text into sentences for better analysis
+#         sentences = text.split(". ")
+#         # Example: Extract key topics or keywords
+#         words = text.split()
+#         common_words = Counter(words).most_common(10)
+#         key_topics = ", ".join([word for word, count in common_words])
+#         # Example: Provide a breakdown of the content
+#         info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
+#         return info
+#     except Exception as e:
+#         return f"Error extracting subtitle information: {str(e)}"
+# def clean_text_for_analysis(text):
+#     """
+#     Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
+#     """
+#     # Remove extra spaces and line breaks
+#     cleaned_text = " ".join(text.split())
+#     return cleaned_text
+# def get_recommendations(keywords):
+#     """
+#     Fetches related video recommendations based on the provided keywords.
+#     This function can be expanded with a proper API or custom logic.
+#     """
+#     # Placeholder for fetching recommendations based on keywords
+#     return f"Recommendations for: {keywords}"  # Dummy return for now
+# def get_recommendations(keywords, max_results=5):
+#     if not keywords:
+#         return "Please provide search keywords"
+#     try:
+#         response = requests.get(
+#             "https://www.googleapis.com/youtube/v3/search",
+#             params={
+#                 "part": "snippet",
+#                 "q": f"educational {keywords}",
+#                 "type": "video",
+#                 "maxResults": max_results,
+#                 "relevanceLanguage": "en",
+#                 "key": YOUTUBE_API_KEY
+#             }
+#         ).json()
+#         results = []
+#         for item in response.get("items", []):
+#             title = item["snippet"]["title"]
+#             channel = item["snippet"]["channelTitle"]
+#             video_id = item["id"]["videoId"]
+#             results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
+#         return "\n".join(results) if results else "No recommendations found"
+#     except Exception as e:
+#         return f"Error: {str(e)}"
+# # Gradio Interface
+# with gr.Blocks(theme=gr.themes.Soft()) as app:
+#     # Login Page
+#     with gr.Group() as login_page:
+#         gr.Markdown("# 🎓 Educational Learning Management System")
+#         username = gr.Textbox(label="Username")
+#         password = gr.Textbox(label="Password", type="password")
+#         login_btn = gr.Button("Login", variant="primary")
+#         login_msg = gr.Markdown()
+#     # Main Interface
+#     with gr.Group(visible=False) as main_page:
+#         with gr.Row():
+#             with gr.Column(scale=1):
+#                 gr.Markdown("### 📋 Navigation")
+#                 nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
+#                 nav_students = gr.Button("👥 Students")
+#                 nav_teachers = gr.Button("👨‍🏫 Teachers")
+#                 nav_courses = gr.Button("📚 Courses")
+#                 nav_youtube = gr.Button("🎥 YouTube Tool")
+#                 logout_btn = gr.Button("🚪 Logout", variant="stop")
+#             with gr.Column(scale=3):
+#                 # Dashboard Content
+#                 dashboard_page = gr.Group()
+#                 with dashboard_page:
+#                     gr.Markdown("## 📊 Dashboard")
+#                     gr.Markdown(f"""
+#                     ### System Overview
+#                     - 👥 Total Students: {len(students_data)}
+#                     - 👨‍🏫 Total Teachers: {len(teachers_data)}
+#                     - 📚 Total Courses: {len(courses_data)}
+#                     ### Quick Actions
+#                     - View student performance
+#                     - Access course materials
+#                     - Generate learning insights
+#                     """)
+#                 # Students Content
+#                 students_page = gr.Group(visible=False)
+#                 with students_page:
+#                     gr.Markdown("## 👥 Students")
+#                     gr.DataFrame(
+#                         value=students_data,
+#                         headers=["ID", "Name", "Grade", "Program"]
+#                     )
+#                 # Teachers Content
+#                 teachers_page = gr.Group(visible=False)
+#                 with teachers_page:
+#                     gr.Markdown("## 👨‍🏫 Teachers")
+#                     gr.DataFrame(
+#                         value=teachers_data,
+#                         headers=["ID", "Name", "Subject", "Qualification"]
+#                     )
+#                 # Courses Content
+#                 courses_page = gr.Group(visible=False)
+#                 with courses_page:
+#                     gr.Markdown("## 📚 Courses")
+#                     gr.DataFrame(
+#                         value=courses_data,
+#                         headers=["ID", "Name", "Instructor", "Level"]
+#                     )
+#                 # YouTube Tool Content
+#                 youtube_page = gr.Group(visible=False)
+#                 with youtube_page:
+#                     gr.Markdown("## Agent for YouTube Content Exploration")
+#                     with gr.Row():
+#                         with gr.Column(scale=2):
+#                             video_url = gr.Textbox(
+#                                 label="YouTube URL",
+#                                 placeholder="https://youtube.com/watch?v=..."
+#                             )
+#                             keywords = gr.Textbox(
+#                                 label="Keywords for Recommendations",
+#                                 placeholder="e.g., python programming, machine learning"
+#                             )
+#                             analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
+#                         with gr.Column(scale=1):
+#                             video_thumbnail = gr.Image(label="Video Preview")
+#                     with gr.Row():
+#                         with gr.Column():
+#                             summary = gr.Textbox(label="📝 Summary", lines=8)
+#                             sentiment = gr.Textbox(label="😊 Content Sentiment")
+#                         with gr.Column():
+#                             recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
+#     def login_check(user, pwd):
+#         if USER_CREDENTIALS.get(user) == pwd:
+#             return {
+#                 login_page: gr.update(visible=False),
+#                 main_page: gr.update(visible=True),
+#                 login_msg: ""
+#             }
+#         return {
+#             login_page: gr.update(visible=True),
+#             main_page: gr.update(visible=False),
+#             login_msg: "❌ Invalid credentials"
+#         }
+#     def show_page(page_name):
+#         updates = {
+#             dashboard_page: gr.update(visible=False),
+#             students_page: gr.update(visible=False),
+#             teachers_page: gr.update(visible=False),
+#             courses_page: gr.update(visible=False),
+#             youtube_page: gr.update(visible=False)
+#         }
+#         updates[page_name] = gr.update(visible=True)
+#         return updates
+#     # Event Handlers
+#     login_btn.click(
+#         login_check,
+#         inputs=[username, password],
+#         outputs=[login_page, main_page, login_msg]
+#     )
+#     nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
+#     nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
+#     nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
+#     nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
+#     nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
+#     analyze_btn.click(
+#         process_youtube_video,
+#         inputs=[video_url, keywords],
+#         outputs=[video_thumbnail, summary, sentiment, recommendations]
+#     )
+#     logout_btn.click(
+#         lambda: {
+#             login_page: gr.update(visible=True),
+#             main_page: gr.update(visible=False)
+#         },
+#         outputs=[login_page, main_page]
+#     )
+# if __name__ == "__main__":
+#     app.launch()
 import subprocess
+import os
 import re
+from collections import Counter
+from textblob import TextBlob
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+from googleapiclient.discovery import build
+import gradio as gr
+# Install required packages
 def install_missing_packages():
     required_packages = {
+        "torch": ">=1.11.0",
+        "transformers": ">=4.34.0",
+        "youtube_transcript_api": ">=0.6.3",
+        "pytube": None,
         "huggingface_hub": ">=0.19.0",
         "PyPDF2": ">=3.0.1",
+        "textblob": ">=0.17.1",
+        "python-dotenv": ">=1.0.0",
+        "genai": None,
         "google-generativeai": None,
+        "google-cloud-aiplatform": "==1.34.0"
     }
     for package, version in required_packages.items():
         try:
             __import__(package)
             subprocess.check_call(["pip", "install", package_name])
 install_missing_packages()
 # Configuration
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
     raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
 def extract_video_id(url):
     match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
     return match.group(1) if match else None
 def get_video_metadata(video_id):
     try:
         youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
         request = youtube.videos().list(part="snippet", id=video_id)
         response = request.execute()
     except Exception as e:
         return {"title": "Error fetching metadata", "description": str(e)}
+def clean_text_for_analysis(text):
+    return " ".join(text.split())
 def extract_subtitle_info(text):
     try:
         sentences = text.split(". ")
         words = text.split()
         common_words = Counter(words).most_common(10)
         key_topics = ", ".join([word for word, count in common_words])
         info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
         return info
     except Exception as e:
         return f"Error extracting subtitle information: {str(e)}"
 def get_recommendations(keywords, max_results=5):
     if not keywords:
         return "Please provide search keywords"
                 "key": YOUTUBE_API_KEY
             }
         ).json()
         results = []
         for item in response.get("items", []):
             title = item["snippet"]["title"]
             channel = item["snippet"]["channelTitle"]
             video_id = item["id"]["videoId"]
             results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
         return "\n".join(results) if results else "No recommendations found"
     except Exception as e:
         return f"Error: {str(e)}"
+def process_youtube_video(url, keywords):
+    try:
+        thumbnail = None
+        summary = "No transcript available"
+        sentiment_label = "N/A"
+        recommendations = ""
+        video_id = extract_video_id(url)
+        if not video_id:
+            return None, "Invalid YouTube URL", "N/A", "", ""
+        thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+        try:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            transcript = None
+            try:
+                transcript = transcript_list.find_transcript(['en'])
+            except:
+                transcript = transcript_list.find_generated_transcript(['en'])
+            text = " ".join([t['text'] for t in transcript.fetch()])
+            if not text.strip():
+                raise ValueError("Transcript is empty")
+            cleaned_text = clean_text_for_analysis(text)
+            sentiment = TextBlob(cleaned_text).sentiment
+            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
+            summary = f"Summary: {cleaned_text[:400]}..."
+        except (TranscriptsDisabled, NoTranscriptFound):
+            metadata = get_video_metadata(video_id)
+            summary = metadata.get("description", "No subtitles available")
+            sentiment_label = "N/A"
+        if keywords.strip():
+            recommendations = get_recommendations(keywords)
+        return thumbnail, summary, sentiment_label, recommendations
+    except Exception as e:
+        return None, f"Error: {str(e)}", "N/A", ""
 # Gradio Interface
+with gr.Blocks() as app:
+    with gr.Row():
+        gr.Markdown("# YouTube Content Analysis Tool")
+    with gr.Row():
+        video_url = gr.Textbox(label="YouTube URL", placeholder="https://youtube.com/watch?v=...")
+        keywords = gr.Textbox(label="Keywords for Recommendations", placeholder="e.g., Python programming")
+        analyze_btn = gr.Button("Analyze")
+    with gr.Row():
+        video_thumbnail = gr.Image(label="Thumbnail")
+        summary = gr.Textbox(label="Summary", lines=5)
+    with gr.Row():
+        sentiment = gr.Textbox(label="Sentiment")
+        recommendations = gr.Textbox(label="Related Videos", lines=5)
     analyze_btn.click(
         process_youtube_video,
         inputs=[video_url, keywords],
         outputs=[video_thumbnail, summary, sentiment, recommendations]
     )
 if __name__ == "__main__":
     app.launch()