Spaces:

Sayiqa7
/

Youtube_summarization

Runtime error

File size: 13,734 Bytes

import subprocess
subprocess.check_call(["pip", "install", "transformers==4.34.0"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
subprocess.check_call(["pip", "install", "google-generativeai"])
subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
subprocess.check_call(["pip", "install", "genai"])
subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
subprocess.check_call(["pip", "install", "google-api-python-client>=2.0.0"])
import transformers
import torch
import os 
import youtube_transcript_api
import pytube
import gradio
import PyPDF2
import pathlib
import pandas
import numpy
import textblob
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import google.generativeai as genai
from googleapiclient.discovery import build
import requests
from textblob import TextBlob
import re
#from google.cloud import generativeai
from googleapiclient.discovery import build
from huggingface_hub import login
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
def install_missing_packages():
    required_packages = {
         "torch":">=1.11.0",
        "transformers":">=4.34.0",
        "youtube_transcript_api" :">=0.6.3" ,
        "pytube":None,
        "huggingface_hub": ">=0.19.0",
        "PyPDF2": ">=3.0.1",
        "textblob":">=0.17.1",
        "python-dotenv":">=1.0.0",
        "genai":None,
        "google-generativeai": None,
        "google-cloud-aiplatform":"==1.34.0",
        "google-api-python-client": ">=2.0.0"
    }


    for package, version in required_packages.items():
        try:
            __import__(package)
        except ImportError:
            package_name = f"{package}{version}" if version else package
            subprocess.check_call(["pip", "install", package_name])

install_missing_packages()
# Configuration

hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(hf_token)
else:
    raise ValueError("HF_TOKEN environment variable not set.")


#YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"  # Replace with your YouTube API Key

USER_CREDENTIALS = {"admin": "password"}  # Example user credentials

import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound

# Use environment variables
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
    raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")

genai.configure(api_key=GOOGLE_API_KEY)

# Database
students_data = [
    (1, "Alice", "A", "Computer Science"),
    (2, "Aliaa", "B", "Mathematics"),
    (3, "Charlie", "A", "Machine Learning"),
    (4, "Daan", "A", "Physics"),
    (5, "Jhon", "C", "Math"),
    (6, "Emma", "A+", "Computer Science")
]

teachers_data = [
    (1, "Dr. Smith", "Math", "MS Mathematics"),
    (2, "Ms. Johnson", "Science", "MSc Physics"),
    (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
    (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
]

courses_data = [
    (1, "Algebra", "Dr. Smith", "Advanced"),
    (2, "Biology", "Ms. Mia", "Intermediate"),
    (3, "Machine Learning", "Ms. Jack", "Intermediate"),
    (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
    (5, "Mathematics", "Ms. Smith", "Intermediate")
]

import youtube
from google.cloud import language_v1beta3 as language
from google.auth import credentials
YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"

# Replace with your Google Cloud project ID
PROJECT_ID = "lively-machine-445513-t7"


def extract_video_id(url):
    """Extracts the video ID from a YouTube URL."""
    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
    return match.group(1) if match else None


def get_video_transcript(video_id):
    """Fetches the transcript of a YouTube video using the YouTube Data API v3.

    Args:
        video_id: The ID of the YouTube video.

    Returns:
        A list of dictionaries containing the transcript text for each segment,
        or None if the transcript is unavailable.
    """

    youtube_service = youtube.Youtube(api_key=YOUTUBE_API_KEY)

    try:
        caption_response = youtube_service.captions().list(
            part="snippet", videoId=video_id
        ).execute()

        # Assuming the first caption track is the desired transcript
        if caption_response.get("items"):
            transcript_id = caption_response["items"][0]["id"]
            transcript_details = youtube_service.captions().list(
                part="snippet", videoId=video_id, id=transcript_id
            ).execute()
            return transcript_details["items"][0]["snippet"]["isAutotranslated"] is False and transcript_details["items"][0]["snippet"]["language"] == "en" and transcript_details["items"][0]["snippet"]["textTracks"][0]["vssId"]

        return None

    except Exception as e:
        print(f"Error fetching transcript: {str(e)}")
        return None


def analyze_sentiment(text):
    """Analyzes the sentiment of a text using Google Cloud Natural Language API.

    Args:
        text: The text to analyze.

    Returns:
        A dictionary containing sentiment score (polarity) and classification
        (positive, negative, or neutral).
    """

    credentials = credentials.ApplicationDefaultCredentials()
    language_client = language.LanguageServiceClient(credentials=credentials)

    document = language.Document(
        content=text, type_=language.Document.Type.PLAIN_TEXT
    )

    sentiment = language_client.analyze_sentiment(document=document).document_sentiment

    return {
        "polarity": sentiment.score,
        "classification": "Positive"
        if sentiment.score > 0
        else "Negative"
        if sentiment.score < 0
        else "Neutral",
    }


def process_youtube_video(url):
    """Processes a YouTube video URL, returning thumbnail, summary, and sentiment analysis.

    Args:
        url: The URL of the YouTube video.

    Returns:
        A tuple containing thumbnail URL, summary text, and sentiment analysis dictionary
        (polarity and classification), or None if there's an error.
    """

    video_id = extract_video_id(url)
    if not video_id:
        return None, "Invalid YouTube URL", "N/A"

    thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"

    transcript_id = get_video_transcript(video_id)
    if transcript_id:
        # Leverage the youtube_transcript library (assuming it's installed)
        # to fetch the transcript text using the transcript_id
        transcript_text = fetch_transcript_text_using_youtube_transcript_library(transcript_id)
        if transcript_text:
            summary = f"Summary: {transcript_text[:400]}..."
            sentiment_analysis = analyze_sentiment(transcript_text)
            return thumbnail, summary, sentiment_analysis
        else:
            print("Error fetching transcript text using youtube_transcript library")

    # Fallback to video description if transcript unavailable
    metadata = youtube.Youtube(api_key=YOUTUBE_API_KEY).videos().list(
        part="snippet", id=video_id
    ).execute()
    summary = metadata.get("items", [])

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
    # Login Page
    with gr.Group() as login_page:
        gr.Markdown("# 🎓 Educational Learning Management System")
        username = gr.Textbox(label="Username")
        password = gr.Textbox(label="Password", type="password")
        login_btn = gr.Button("Login", variant="primary")
        login_msg = gr.Markdown()

    # Main Interface
    with gr.Group(visible=False) as main_page:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 📋 Navigation")
                nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
                nav_students = gr.Button("👥 Students")
                nav_teachers = gr.Button("👨‍🏫 Teachers")
                nav_courses = gr.Button("📚 Courses")
                nav_youtube = gr.Button("🎥 YouTube Tool")
                logout_btn = gr.Button("🚪 Logout", variant="stop")

            with gr.Column(scale=3):
                # Dashboard Content
                dashboard_page = gr.Group()
                with dashboard_page:
                    gr.Markdown("## 📊 Dashboard")
                    gr.Markdown(f"""
                    ### System Overview
                    - 👥 Total Students: {len(students_data)}
                    - 👨‍🏫 Total Teachers: {len(teachers_data)}
                    - 📚 Total Courses: {len(courses_data)}
                    ### Quick Actions
                    - View student performance
                    - Access course materials
                    - Generate learning insights
                    """)

                # Students Content
                students_page = gr.Group(visible=False)
                with students_page:
                    gr.Markdown("## 👥 Students")
                    gr.DataFrame(
                        value=students_data,
                        headers=["ID", "Name", "Grade", "Program"]
                    )

                # Teachers Content
                teachers_page = gr.Group(visible=False)
                with teachers_page:
                    gr.Markdown("## 👨‍🏫 Teachers")
                    gr.DataFrame(
                        value=teachers_data,
                        headers=["ID", "Name", "Subject", "Qualification"]
                    )

                # Courses Content
                courses_page = gr.Group(visible=False)
                with courses_page:
                    gr.Markdown("## 📚 Courses")
                    gr.DataFrame(
                        value=courses_data,
                        headers=["ID", "Name", "Instructor", "Level"]
                    )

                # YouTube Tool Content
                youtube_page = gr.Group(visible=False)
                with youtube_page:
                    gr.Markdown("## Agent for YouTube Content Exploration")
                    with gr.Row():
                        with gr.Column(scale=2):
                            video_url = gr.Textbox(
                                label="YouTube URL",
                                placeholder="https://youtube.com/watch?v=..."
                            )
                            keywords = gr.Textbox(
                                label="Keywords for Recommendations",
                                placeholder="e.g., python programming, machine learning"
                            )
                            analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
                            recommend_btn = gr.Button("🔎 Get Recommendations", variant="primary")

                        with gr.Column(scale=1):
                            video_thumbnail = gr.Image(label="Video Preview")

                    with gr.Row():
                        with gr.Column():
                            summary = gr.Textbox(label="📝 Summary", lines=8)
                            sentiment = gr.Textbox(label="😊 Content Sentiment")
                        with gr.Column():
                            recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)

    def login_check(user, pwd):
        if USER_CREDENTIALS.get(user) == pwd:
            return {
                login_page: gr.update(visible=False),
                main_page: gr.update(visible=True),
                login_msg: ""
            }
        return {
            login_page: gr.update(visible=True),
            main_page: gr.update(visible=False),
            login_msg: "❌ Invalid credentials"
        }

    def show_page(page_name):
        updates = {
            dashboard_page: gr.update(visible=False),
            students_page: gr.update(visible=False),
            teachers_page: gr.update(visible=False),
            courses_page: gr.update(visible=False),
            youtube_page: gr.update(visible=False)
        }
        updates[page_name] = gr.update(visible=True)
        return updates

    # Event Handlers
    login_btn.click(
        login_check,
        inputs=[username, password],
        outputs=[login_page, main_page, login_msg]
    )

    nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
    nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
    nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
    nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
    nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))

    analyze_btn.click(
        process_youtube_video,
        inputs=[video_url],
        outputs=[video_thumbnail, summary, sentiment]
    )

    recommend_btn.click(
        get_recommendations,
        inputs=[keywords],
        outputs=[recommendations]
    )

    logout_btn.click(
        lambda: {
            login_page: gr.update(visible=True),
            main_page: gr.update(visible=False)
        },
        outputs=[login_page, main_page]
    )

if __name__ == "__main__":
    app.launch()