Spaces:

Sayiqa
/

deployment

Sleeping

App Files Files Community

Sayiqa commited on Dec 30, 2024

Commit

edf026f

verified ·

1 Parent(s): b598aa9

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -89

app.py CHANGED Viewed

@@ -68,116 +68,354 @@ if hf_token:
 else:
     raise ValueError("HF_TOKEN environment variable not set.")
-# GOOGLE_API_KEY = "AIzaSyAURQb9jueh3dBQ4SITgKoR0L2_33en3yU"
-# YOUTUBE_API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4"
-# genai.configure(api_key=GOOGLE_API_KEY)
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-genai.configure(api_key=GOOGLE_API_KEY)
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
-print("GOOGLE_API_KEY:", os.getenv("GOOGLE_API_KEY"))
-print("YOUTUBE_API_KEY:", os.getenv("YOUTUBE_API_KEY"))
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
-import requests
-import gradio as gr
-PROMPT = """You are a YouTube video summarizer. You will be taking the transcript text
-and summarizing the entire video, providing the important points within 250 words.
-Please provide the summary of the text given here: """
-def extract_transcript_details(youtube_video_url):
     try:
-        # Extract video ID
-        if "youtu.be/" in youtube_video_url:
-            video_id = youtube_video_url.split("youtu.be/")[1].split("?")[0]
-        else:
-            video_id = youtube_video_url.split("v=")[1].split("&")[0]
-        # Attempt to fetch transcript
-        available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
-        transcript_data = None
         try:
-            # Try English transcript
-            # transcript_data = available_transcripts.find_transcript(['en']).fetch()
-            transcript = YouTubeTranscriptApi.get_transcript('jGwO_UgTS7I', languages=['en'])
-            print(transcript)
-        except NoTranscriptFound:
-            # Try English (India) transcript
-            transcript_data = available_transcripts.find_transcript(['en-IN']).fetch()
         except TranscriptsDisabled:
-            return "Error: Transcripts are disabled for this video."
-        # Combine transcript text
-        transcript = " ".join([entry["text"] for entry in transcript_data])
-        return transcript
-    except VideoUnavailable:
-        return "Error: The video is unavailable."
-    except Exception as e:
-        return f"Error extracting transcript: {str(e)}"
-def generate_gemini_content(transcript_text, prompt=PROMPT):
-    try:
-        # Placeholder for AI model
-        # Replace with actual model call if needed
-        return f"Summary: {transcript_text[:250]}..."  # Example summary
-    except Exception as e:
-        return f"Error generating summary: {str(e)}"
-def get_video_thumbnail(youtube_video_url):
     try:
-        if "youtu.be/" in youtube_video_url:
-            video_id = youtube_video_url.split("youtu.be/")[1].split("?")[0]
-        else:
-            video_id = youtube_video_url.split("v=")[1].split("&")[0]
-        thumbnail_url = f"http://img.youtube.com/vi/{video_id}/0.jpg"
-        return thumbnail_url
     except Exception as e:
-        return None
-def process_video(youtube_video_url):
-    thumbnail = get_video_thumbnail(youtube_video_url)
-    transcript = extract_transcript_details(youtube_video_url)
-    if "Error" in transcript:
-        summary = transcript
-    else:
-        summary = generate_gemini_content(transcript)
-    return thumbnail, summary
-def gradio_interface(youtube_url, user_preferences):
-    thumbnail, summary = process_video(youtube_url)
-    recommendations = "Feature under development."  # Placeholder
-    return thumbnail, summary, recommendations
-# Create Gradio Blocks Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# YouTube Transcript to Notes and Recommendations")
-    with gr.Row():
-        youtube_url_input = gr.Textbox(
-            label="Enter YouTube Video Link",
-            placeholder="https://www.youtube.com/watch?v=VIDEO_ID"
-        )
-        user_preferences_input = gr.Textbox(
-            label="Enter Your Interests/Preferences",
-            placeholder="e.g., machine learning, cooking recipes"
-        )
-    get_notes_button = gr.Button("Get Detailed Notes and Recommendations")
-    with gr.Row():
-        thumbnail_output = gr.Image(label="Video Thumbnail")
-    with gr.Row():
-        summary_output = gr.Textbox(label="Detailed Notes", lines=15)
-        recommendations_output = gr.Textbox(label="Personalized Recommendations", lines=10)
-    get_notes_button.click(
-        fn=gradio_interface,
-        inputs=[youtube_url_input, user_preferences_input],
-        outputs=[thumbnail_output, summary_output, recommendations_output]
     )
-# Launch the Gradio app
 if __name__ == "__main__":
-    demo.launch()

 else:
     raise ValueError("HF_TOKEN environment variable not set.")
+import subprocess
+subprocess.check_call(["pip", "install", "transformers==4.34.0"])
+subprocess.check_call(["pip", "install", "torch>=1.7.1"])
+subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
+subprocess.check_call(["pip", "install", "pytube"])
+subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
+subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
+subprocess.check_call(["pip", "install", "google-generativeai"])
+subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
+subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
+subprocess.check_call(["pip", "install", "genai"])
+subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
+import transformers
+import torch
+import os
+import youtube_transcript_api
+import pytube
+import gradio
+import PyPDF2
+import pathlib
+import pandas
+import numpy
+import textblob
+import gradio as gr
+from youtube_transcript_api import YouTubeTranscriptApi
+import google.generativeai as genai
+import requests
+from textblob import TextBlob
+import re
+#from google.cloud import generativeai
+from huggingface_hub import login
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+def install_missing_packages():
+    required_packages = {
+         "torch":">=1.11.0",
+        "transformers":">=4.34.0",
+        "youtube_transcript_api" :">=0.6.3" ,
+        "pytube":None,
+        "huggingface_hub": ">=0.19.0",
+        "PyPDF2": ">=3.0.1",
+        "textblob":">=0.17.1",
+        "python-dotenv":">=1.0.0",
+        "genai":None,
+        "google-generativeai": None,
+        "google-cloud-aiplatform":"==1.34.0"
+    }
+    for package, version in required_packages.items():
+        try:
+            __import__(package)
+        except ImportError:
+            package_name = f"{package}{version}" if version else package
+            subprocess.check_call(["pip", "install", package_name])
+install_missing_packages()
+# Configuration
+hf_token = os.getenv("HF_TOKEN")
+if hf_token:
+    login(hf_token)
+else:
+    raise ValueError("HF_TOKEN environment variable not set.")
+# Configuration
+USER_CREDENTIALS = {
+    "admin": "password123",
+    "teacher": "teach2024",
+    "student": "learn2024"
+}
+import os
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+# Use environment variables
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
+if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
+    raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
+genai.configure(api_key=GOOGLE_API_KEY)
+# Database
+students_data = [
+    (1, "Alice", "A", "Computer Science"),
+    (2, "Aliaa", "B", "Mathematics"),
+    (3, "Charlie", "A", "Machine Learning"),
+    (4, "Daan", "A", "Physics"),
+    (5, "Jhon", "C", "Math"),
+    (6, "Emma", "A+", "Computer Science")
+]
+teachers_data = [
+    (1, "Dr. Smith", "Math", "MS Mathematics"),
+    (2, "Ms. Johnson", "Science", "MSc Physics"),
+    (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
+    (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
+]
+courses_data = [
+    (1, "Algebra", "Dr. Smith", "Advanced"),
+    (2, "Biology", "Ms. Mia", "Intermediate"),
+    (3, "Machine Learning", "Ms. Jack", "Intermediate"),
+    (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
+    (5, "Mathematics", "Ms. Smith", "Intermediate")
+]
+def sanitize_text(text):
+    """Remove invalid Unicode characters."""
+    return text.encode("utf-8", "replace").decode("utf-8")
+def extract_video_id(url):
+    if not url:
+        return None
+    patterns = [
+        r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    return None
+def process_youtube_video(url="", keywords=""):
     try:
+        if not url.strip():
+            return None, "Please enter a YouTube URL", "N/A", ""
+        video_id = extract_video_id(url)
+        if not video_id:
+            return None, "Invalid YouTube URL", "N/A", ""
         try:
+            # Try multiple transcript options
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            try:
+                transcript = transcript_list.find_transcript(['en'])
+            except:
+                try:
+                    transcript = transcript_list.find_transcript(['en-US'])
+                except:
+                    try:
+                        # Try auto-generated
+                        transcript = transcript_list.find_generated_transcript(['en'])
+                    except:
+                        raise NoTranscriptFound()
+            text = " ".join([t['text'] for t in transcript.fetch()])
+            # Generate summary
+            model = genai.GenerativeModel("gemini-pro")
+            summary = model.generate_content(f"Summarize this: {text[:4000]}").text
+            # Analysis
+            sentiment = TextBlob(text[:1000]).sentiment
+            sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
+            thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
         except TranscriptsDisabled:
+            return thumbnail, "⚠️ This video has disabled subtitles. Please try another video.", "N/A", ""
+        except NoTranscriptFound:
+            return thumbnail, "⚠️ No English transcript available. Please try another video.", "N/A", ""
+        except Exception as e:
+            return None, f"⚠️ Error: {str(e)}", "N/A", ""
+        # Get recommendations
+        if keywords.strip():
+            recommendations = get_recommendations(keywords)
+        else:
+            recommendations = ""
+        return thumbnail, summary, sentiment_label, recommendations
+    except Exception as e:
+        return None, f"Error: {str(e)}", "N/A", ""
+def get_recommendations(keywords, max_results=5):
+    if not keywords:
+        return "Please provide search keywords"
     try:
+        response = requests.get(
+            "https://www.googleapis.com/youtube/v3/search",
+            params={
+                "part": "snippet",
+                "q": f"educational {keywords}",
+                "type": "video",
+                "maxResults": max_results,
+                "relevanceLanguage": "en",
+                "key": YOUTUBE_API_KEY
+            }
+        ).json()
+        results = []
+        for item in response.get("items", []):
+            title = item["snippet"]["title"]
+            channel = item["snippet"]["channelTitle"]
+            video_id = item["id"]["videoId"]
+            results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
+        return "\n".join(results) if results else "No recommendations found"
     except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    # Login Page
+    with gr.Group() as login_page:
+        gr.Markdown("# 🎓 Educational Learning Management System")
+        username = gr.Textbox(label="Username")
+        password = gr.Textbox(label="Password", type="password")
+        login_btn = gr.Button("Login", variant="primary")
+        login_msg = gr.Markdown()
+    # Main Interface
+    with gr.Group(visible=False) as main_page:
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 📋 Navigation")
+                nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
+                nav_students = gr.Button("👥 Students")
+                nav_teachers = gr.Button("👨‍🏫 Teachers")
+                nav_courses = gr.Button("📚 Courses")
+                nav_youtube = gr.Button("🎥 YouTube Tool")
+                logout_btn = gr.Button("🚪 Logout", variant="stop")
+            with gr.Column(scale=3):
+                # Dashboard Content
+                dashboard_page = gr.Group()
+                with dashboard_page:
+                    gr.Markdown("## 📊 Dashboard")
+                    gr.Markdown(f"""
+                    ### System Overview
+                    - 👥 Total Students: {len(students_data)}
+                    - 👨‍🏫 Total Teachers: {len(teachers_data)}
+                    - 📚 Total Courses: {len(courses_data)}
+                    ### Quick Actions
+                    - View student performance
+                    - Access course materials
+                    - Generate learning insights
+                    """)
+                # Students Content
+                students_page = gr.Group(visible=False)
+                with students_page:
+                    gr.Markdown("## 👥 Students")
+                    gr.DataFrame(
+                        value=students_data,
+                        headers=["ID", "Name", "Grade", "Program"]
+                    )
+                # Teachers Content
+                teachers_page = gr.Group(visible=False)
+                with teachers_page:
+                    gr.Markdown("## 👨‍🏫 Teachers")
+                    gr.DataFrame(
+                        value=teachers_data,
+                        headers=["ID", "Name", "Subject", "Qualification"]
+                    )
+                # Courses Content
+                courses_page = gr.Group(visible=False)
+                with courses_page:
+                    gr.Markdown("## 📚 Courses")
+                    gr.DataFrame(
+                        value=courses_data,
+                        headers=["ID", "Name", "Instructor", "Level"]
+                    )
+                # YouTube Tool Content
+                youtube_page = gr.Group(visible=False)
+                with youtube_page:
+                    gr.Markdown("## Agent for YouTube Content Exploration")
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            video_url = gr.Textbox(
+                                label="YouTube URL",
+                                placeholder="https://youtube.com/watch?v=..."
+                            )
+                            keywords = gr.Textbox(
+                                label="Keywords for Recommendations",
+                                placeholder="e.g., python programming, machine learning"
+                            )
+                            analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
+                        with gr.Column(scale=1):
+                            video_thumbnail = gr.Image(label="Video Preview")
+                    with gr.Row():
+                        with gr.Column():
+                            summary = gr.Textbox(label="📝 Summary", lines=8)
+                            sentiment = gr.Textbox(label="😊 Content Sentiment")
+                        with gr.Column():
+                            recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
+    def login_check(user, pwd):
+        if USER_CREDENTIALS.get(user) == pwd:
+            return {
+                login_page: gr.update(visible=False),
+                main_page: gr.update(visible=True),
+                login_msg: ""
+            }
+        return {
+            login_page: gr.update(visible=True),
+            main_page: gr.update(visible=False),
+            login_msg: "❌ Invalid credentials"
+        }
+    def show_page(page_name):
+        updates = {
+            dashboard_page: gr.update(visible=False),
+            students_page: gr.update(visible=False),
+            teachers_page: gr.update(visible=False),
+            courses_page: gr.update(visible=False),
+            youtube_page: gr.update(visible=False)
+        }
+        updates[page_name] = gr.update(visible=True)
+        return updates
+    # Event Handlers
+    login_btn.click(
+        login_check,
+        inputs=[username, password],
+        outputs=[login_page, main_page, login_msg]
+    )
+    nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
+    nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
+    nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
+    nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
+    nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
+    analyze_btn.click(
+        process_youtube_video,
+        inputs=[video_url, keywords],
+        outputs=[video_thumbnail, summary, sentiment, recommendations]
+    )
+    logout_btn.click(
+        lambda: {
+            login_page: gr.update(visible=True),
+            main_page: gr.update(visible=False)
+        },
+        outputs=[login_page, main_page]
     )
 if __name__ == "__main__":
+    app.launch()