Spaces:
Sleeping
Sleeping
| import subprocess | |
| subprocess.check_call(["pip", "install", "transformers==4.34.0"]) | |
| subprocess.check_call(["pip", "install", "torch>=1.7.1"]) | |
| subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"]) | |
| subprocess.check_call(["pip", "install", "pytube"]) | |
| subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"]) | |
| subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"]) | |
| subprocess.check_call(["pip", "install", "google-generativeai"]) | |
| subprocess.check_call(["pip", "install", "textblob>=0.17.1"]) | |
| subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"]) | |
| subprocess.check_call(["pip", "install", "genai"]) | |
| subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"]) | |
| import transformers | |
| import torch | |
| import os | |
| import youtube_transcript_api | |
| import pytube | |
| import gradio | |
| import PyPDF2 | |
| import pathlib | |
| import pandas | |
| import numpy | |
| import textblob | |
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import google.generativeai as genai | |
| import requests | |
| from textblob import TextBlob | |
| import re | |
| #from google.cloud import generativeai | |
| from huggingface_hub import login | |
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
| def install_missing_packages(): | |
| required_packages = { | |
| "torch":">=1.11.0", | |
| "transformers":">=4.34.0", | |
| "youtube_transcript_api" :">=0.6.3" , | |
| "pytube":None, | |
| "huggingface_hub": ">=0.19.0", | |
| "PyPDF2": ">=3.0.1", | |
| "textblob":">=0.17.1", | |
| "python-dotenv":">=1.0.0", | |
| "genai":None, | |
| "google-generativeai": None, | |
| "google-cloud-aiplatform":"==1.34.0" | |
| } | |
| for package, version in required_packages.items(): | |
| try: | |
| __import__(package) | |
| except ImportError: | |
| package_name = f"{package}{version}" if version else package | |
| subprocess.check_call(["pip", "install", package_name]) | |
| install_missing_packages() | |
| # Configuration | |
| hf_token = os.getenv("HF_TOKEN") | |
| if hf_token: | |
| login(hf_token) | |
| else: | |
| raise ValueError("HF_TOKEN environment variable not set.") | |
| # Configuration | |
| USER_CREDENTIALS = { | |
| "admin": "password123", | |
| "teacher": "teach2024", | |
| "student": "learn2024" | |
| } | |
| import os | |
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
| # Use environment variables | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") | |
| if not GOOGLE_API_KEY or not YOUTUBE_API_KEY: | |
| raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables") | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| # Database | |
| students_data = [ | |
| (1, "Alice", "A", "Computer Science"), | |
| (2, "Aliaa", "B", "Mathematics"), | |
| (3, "Charlie", "A", "Machine Learning"), | |
| (4, "Daan", "A", "Physics"), | |
| (5, "Jhon", "C", "Math"), | |
| (6, "Emma", "A+", "Computer Science") | |
| ] | |
| teachers_data = [ | |
| (1, "Dr. Smith", "Math", "MS Mathematics"), | |
| (2, "Ms. Johnson", "Science", "MSc Physics"), | |
| (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"), | |
| (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"), | |
| ] | |
| courses_data = [ | |
| (1, "Algebra", "Dr. Smith", "Advanced"), | |
| (2, "Biology", "Ms. Mia", "Intermediate"), | |
| (3, "Machine Learning", "Ms. Jack", "Intermediate"), | |
| (4, "Computer Science", "Ms. Evelyn", "Intermediate"), | |
| (5, "Mathematics", "Ms. Smith", "Intermediate") | |
| ] | |
| def sanitize_text(text): | |
| """Remove invalid Unicode characters.""" | |
| return text.encode("utf-8", "replace").decode("utf-8") | |
| def extract_video_id(url): | |
| if not url: | |
| return None | |
| patterns = [ | |
| r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)' | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| from textblob import TextBlob | |
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
| import re | |
| from collections import Counter | |
| from googleapiclient.discovery import build | |
| # def process_youtube_video(url="", keywords=""): | |
| # try: | |
| # #Initialize variables | |
| # thumbnail = None | |
| # summary = "No transcript available" | |
| # sentiment_label = "N/A" | |
| # recommendations = "" | |
| # subtitle_info = "No additional information available" | |
| # if not url.strip(): | |
| # return None, "Please enter a YouTube URL", "N/A", "", "" | |
| # video_id = extract_video_id(url) | |
| # if not video_id: | |
| # return None, "Invalid YouTube URL", "N/A", "", "" | |
| # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" | |
| # try: | |
| # # Fetch transcript | |
| # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| # transcript = None | |
| # try: | |
| # transcript = transcript_list.find_transcript(['en']) | |
| # except: | |
| # transcript = transcript_list.find_generated_transcript(['en']) | |
| # text = " ".join([t['text'] for t in transcript.fetch()]) | |
| # if not text.strip(): | |
| # raise ValueError("Transcript is empty") | |
| # # Clean up the text for sentiment analysis | |
| # cleaned_text = clean_text_for_analysis(text) | |
| # # Sentiment analysis | |
| # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis | |
| # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})" | |
| # # Generate summary | |
| # model = genai.GenerativeModel("gemini-pro") | |
| # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text | |
| # # Extract subtitle information | |
| # subtitle_info = extract_subtitle_info(cleaned_text) | |
| # except TranscriptsDisabled: | |
| # metadata = get_video_metadata(video_id) | |
| # summary = metadata.get("description", "β οΈ This video has disabled subtitles.") | |
| # sentiment_label = "N/A" | |
| # subtitle_info = "No subtitles available for analysis." | |
| # except NoTranscriptFound: | |
| # metadata = get_video_metadata(video_id) | |
| # summary = metadata.get("description", "β οΈ No English transcript available.") | |
| # sentiment_label = "N/A" | |
| # subtitle_info = "No subtitles available for analysis." | |
| # except Exception as e: | |
| # return thumbnail, f"β οΈ Error processing transcript: {str(e)}", "N/A", "", "" | |
| # # Get recommendations | |
| # if keywords.strip(): | |
| # recommendations = get_recommendations(keywords) | |
| # return thumbnail, summary, sentiment_label, subtitle_info, recommendations | |
| # except Exception as e: | |
| # return None, f"Error: {str(e)}", "N/A", "", "" | |
| # def extract_video_id(url): | |
| # """ | |
| # Extracts the video ID from a YouTube URL. | |
| # """ | |
| # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url) | |
| # return match.group(1) if match else None | |
| # def get_video_metadata(video_id): | |
| # """ | |
| # Fetches video metadata such as title and description using the YouTube Data API. | |
| # """ | |
| # try: | |
| # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key | |
| # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) | |
| # request = youtube.videos().list(part="snippet", id=video_id) | |
| # response = request.execute() | |
| # if "items" in response and len(response["items"]) > 0: | |
| # snippet = response["items"][0]["snippet"] | |
| # return { | |
| # "title": snippet.get("title", "No title available"), | |
| # "description": snippet.get("description", "No description available"), | |
| # } | |
| # return {} | |
| # except Exception as e: | |
| # return {"title": "Error fetching metadata", "description": str(e)} | |
| # def extract_subtitle_info(text): | |
| # """ | |
| # Extracts meaningful information from the subtitles. | |
| # This could include topics, key insights, or a breakdown of the content. | |
| # """ | |
| # try: | |
| # # Split text into sentences for better analysis | |
| # sentences = text.split(". ") | |
| # # Example: Extract key topics or keywords | |
| # words = text.split() | |
| # common_words = Counter(words).most_common(10) | |
| # key_topics = ", ".join([word for word, count in common_words]) | |
| # # Example: Provide a breakdown of the content | |
| # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}." | |
| # return info | |
| # except Exception as e: | |
| # return f"Error extracting subtitle information: {str(e)}" | |
| # def clean_text_for_analysis(text): | |
| # """ | |
| # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements. | |
| # """ | |
| # # Remove extra spaces and line breaks | |
| # cleaned_text = " ".join(text.split()) | |
| # return cleaned_text | |
| # def get_recommendations(keywords): | |
| # """ | |
| # Fetches related video recommendations based on the provided keywords. | |
| # This function can be expanded with a proper API or custom logic. | |
| # """ | |
| # # Placeholder for fetching recommendations based on keywords | |
| # return f"Recommendations for: {keywords}" # Dummy return for now | |
| ###################################### | |
| # from textblob import TextBlob | |
| # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
| # import re | |
| # from collections import Counter | |
| # from googleapiclient.discovery import build | |
| # import os | |
| # # Set your YouTube API key | |
| # YOUTUBE_API_KEY = "YOUR_API_KEY_HERE" # Replace with your actual API key | |
| # # Alternatively, you can set it as an environment variable: | |
| # # YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY') | |
| # def process_youtube_video(url=""): | |
| # """ | |
| # Process a YouTube video URL and return sentiment analysis of its content. | |
| # """ | |
| # try: | |
| # # Input validation | |
| # if not url.strip(): | |
| # return {"error": "Please enter a YouTube URL"} | |
| # # Extract video ID | |
| # video_id = extract_video_id(url) | |
| # if not video_id: | |
| # return {"error": "Invalid YouTube URL"} | |
| # # Get video transcript | |
| # text = get_video_transcript(video_id) | |
| # if isinstance(text, dict) and "error" in text: | |
| # return text | |
| # # Get video metadata | |
| # metadata = get_video_metadata(video_id) | |
| # if "error" in metadata: | |
| # return metadata | |
| # # Perform sentiment analysis | |
| # sentiment_result = analyze_sentiment(text) | |
| # return { | |
| # "success": True, | |
| # "metadata": metadata, | |
| # "sentiment": sentiment_result, | |
| # "video_id": video_id | |
| # } | |
| # except Exception as e: | |
| # return {"error": f"An error occurred: {str(e)}"} | |
| # def get_video_metadata(video_id): | |
| # """ | |
| # Fetches video metadata using the YouTube Data API. | |
| # """ | |
| # try: | |
| # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) | |
| # request = youtube.videos().list( | |
| # part="snippet", | |
| # id=video_id | |
| # ) | |
| # response = request.execute() | |
| # if response.get("items"): | |
| # snippet = response["items"][0]["snippet"] | |
| # return { | |
| # "title": snippet.get("title", ""), | |
| # "description": snippet.get("description", ""), | |
| # "publishedAt": snippet.get("publishedAt", ""), | |
| # "channelTitle": snippet.get("channelTitle", "") | |
| # } | |
| # return {"error": "Video not found"} | |
| # except Exception as e: | |
| # return {"error": f"Error fetching metadata: {str(e)}"} | |
| # # [Previous functions remain the same: get_video_transcript, analyze_sentiment, | |
| # # extract_video_id, clean_text_for_analysis, get_detailed_sentiment] | |
| # # Example usage with proper error handling: | |
| # if __name__ == "__main__": | |
| # # Example with a real YouTube URL | |
| # test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with any YouTube URL | |
| # # Check if API key is set | |
| # if YOUTUBE_API_KEY == "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98": | |
| # print("Error: Please set your YouTube API key first!") | |
| # else: | |
| # result = process_youtube_video(test_url) | |
| # if "error" in result: | |
| # print(f"Error: {result['error']}") | |
| # else: | |
| # print("\n=== Video Information ===") | |
| # print(f"Title: {result['metadata']['title']}") | |
| # print(f"Channel: {result['metadata']['channelTitle']}") | |
| # print("\n=== Sentiment Analysis Results ===") | |
| # sentiment = result['sentiment'] | |
| # print(f"Overall Sentiment: {sentiment['overall_sentiment']}") | |
| # print(f"Average Polarity: {sentiment['average_polarity']}") | |
| # print("\nSentiment Distribution:") | |
| # dist = sentiment['sentiment_distribution'] | |
| # total = sum(dist.values()) | |
| # if total > 0: | |
| # print(f"Positive: {dist['positive']} ({(dist['positive']/total*100):.1f}%)") | |
| # print(f"Neutral: {dist['neutral']} ({(dist['neutral']/total*100):.1f}%)") | |
| # print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)") | |
| # print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}") | |
| ##################################################################################################### | |
| def process_youtube_video(url="", keywords=""): | |
| try: | |
| thumbnail = None | |
| summary = "" | |
| sentiment_label = "N/A" | |
| recommendations = "" | |
| if not url.strip(): | |
| return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations | |
| video_id = extract_video_id(url) | |
| if not video_id: | |
| return thumbnail, "Invalid YouTube URL", sentiment_label, recommendations | |
| thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" | |
| try: | |
| # Method 1: Direct transcript fetch | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
| text = " ".join([t['text'] for t in transcript]) | |
| except: | |
| # Method 2: Try list_transcripts | |
| try: | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| # Try multiple language variants | |
| for lang_code in ['en', 'en-US', 'en-GB', 'a.en']: | |
| try: | |
| transcript = transcript_list.find_transcript([lang_code]) | |
| text = " ".join([t['text'] for t in transcript.fetch()]) | |
| break | |
| except: | |
| continue | |
| # If no English transcript found, try auto-generated | |
| if 'text' not in locals(): | |
| transcript = transcript_list.find_generated_transcript(['en']) | |
| text = " ".join([t['text'] for t in transcript.fetch()]) | |
| except: | |
| # Method 3: Try translation | |
| available_transcripts = transcript_list.find_manually_created_transcript() | |
| translated = available_transcripts.translate('en') | |
| text = " ".join([t['text'] for t in translated.fetch()]) | |
| # Clean and process text | |
| cleaned_text = re.sub(r'[^\w\s.]', '', text) | |
| cleaned_text = ' '.join(cleaned_text.split()) | |
| # Sentiment Analysis | |
| blob = TextBlob(cleaned_text[:2000]) | |
| polarity = blob.sentiment.polarity | |
| subjectivity = blob.sentiment.subjectivity | |
| sentiment_label = ( | |
| f"Sentiment: {'Positive' if polarity > 0 else 'Negative' if polarity < 0 else 'Neutral'}\n" | |
| f"Confidence: {abs(polarity):.2f}\n" | |
| f"Subjectivity: {subjectivity:.2f}" | |
| ) | |
| # Generate summary | |
| model = genai.GenerativeModel("gemini-pro") | |
| summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text | |
| except Exception as e: | |
| print(f"Debug - Transcript Error: {str(e)}") # Debug logging | |
| return thumbnail, f"β οΈ Unable to process video: {str(e)}", "N/A", recommendations | |
| # Get recommendations | |
| if keywords.strip(): | |
| recommendations = get_recommendations(keywords) | |
| return thumbnail, summary, sentiment_label, recommendations | |
| except Exception as e: | |
| print(f"Debug - Main Error: {str(e)}") # Debug logging | |
| return None, f"Error: {str(e)}", "N/A", "" | |
| def get_recommendations(keywords, max_results=5): | |
| if not keywords: | |
| return "Please provide search keywords" | |
| try: | |
| response = requests.get( | |
| "https://www.googleapis.com/youtube/v3/search", | |
| params={ | |
| "part": "snippet", | |
| "q": f"educational {keywords}", | |
| "type": "video", | |
| "maxResults": max_results, | |
| "relevanceLanguage": "en", | |
| "key": YOUTUBE_API_KEY | |
| } | |
| ).json() | |
| results = [] | |
| for item in response.get("items", []): | |
| title = item["snippet"]["title"] | |
| channel = item["snippet"]["channelTitle"] | |
| video_id = item["id"]["videoId"] | |
| results.append(f"πΊ {title}\nπ€ {channel}\nπ https://youtube.com/watch?v={video_id}\n") | |
| return "\n".join(results) if results else "No recommendations found" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio Interface | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| # Login Page | |
| with gr.Group() as login_page: | |
| gr.Markdown("# π Educational Learning Management System") | |
| username = gr.Textbox(label="Username") | |
| password = gr.Textbox(label="Password", type="password") | |
| login_btn = gr.Button("Login", variant="primary") | |
| login_msg = gr.Markdown() | |
| # Main Interface | |
| with gr.Group(visible=False) as main_page: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Navigation") | |
| nav_dashboard = gr.Button("π Dashboard", variant="primary") | |
| nav_students = gr.Button("π₯ Students") | |
| nav_teachers = gr.Button("π¨βπ« Teachers") | |
| nav_courses = gr.Button("π Courses") | |
| nav_youtube = gr.Button("π₯ YouTube Tool") | |
| logout_btn = gr.Button("πͺ Logout", variant="stop") | |
| with gr.Column(scale=3): | |
| # Dashboard Content | |
| dashboard_page = gr.Group() | |
| with dashboard_page: | |
| gr.Markdown("## π Dashboard") | |
| gr.Markdown(f""" | |
| ### System Overview | |
| - π₯ Total Students: {len(students_data)} | |
| - π¨βπ« Total Teachers: {len(teachers_data)} | |
| - π Total Courses: {len(courses_data)} | |
| ### Quick Actions | |
| - View student performance | |
| - Access course materials | |
| - Generate learning insights | |
| """) | |
| # Students Content | |
| students_page = gr.Group(visible=False) | |
| with students_page: | |
| gr.Markdown("## π₯ Students") | |
| gr.DataFrame( | |
| value=students_data, | |
| headers=["ID", "Name", "Grade", "Program"] | |
| ) | |
| # Teachers Content | |
| teachers_page = gr.Group(visible=False) | |
| with teachers_page: | |
| gr.Markdown("## π¨βπ« Teachers") | |
| gr.DataFrame( | |
| value=teachers_data, | |
| headers=["ID", "Name", "Subject", "Qualification"] | |
| ) | |
| # Courses Content | |
| courses_page = gr.Group(visible=False) | |
| with courses_page: | |
| gr.Markdown("## π Courses") | |
| gr.DataFrame( | |
| value=courses_data, | |
| headers=["ID", "Name", "Instructor", "Level"] | |
| ) | |
| # YouTube Tool Content | |
| youtube_page = gr.Group(visible=False) | |
| with youtube_page: | |
| gr.Markdown("## Agent for YouTube Content Exploration") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| video_url = gr.Textbox( | |
| label="YouTube URL", | |
| placeholder="https://youtube.com/watch?v=..." | |
| ) | |
| keywords = gr.Textbox( | |
| label="Keywords for Recommendations", | |
| placeholder="e.g., python programming, machine learning" | |
| ) | |
| analyze_btn = gr.Button("π Analyze Video", variant="primary") | |
| with gr.Column(scale=1): | |
| video_thumbnail = gr.Image(label="Video Preview") | |
| with gr.Row(): | |
| with gr.Column(): | |
| summary = gr.Textbox(label="π Summary", lines=8) | |
| sentiment = gr.Textbox(label="π Content Sentiment") | |
| with gr.Column(): | |
| recommendations = gr.Textbox(label="π― Related Videos", lines=10) | |
| def login_check(user, pwd): | |
| if USER_CREDENTIALS.get(user) == pwd: | |
| return { | |
| login_page: gr.update(visible=False), | |
| main_page: gr.update(visible=True), | |
| login_msg: "" | |
| } | |
| return { | |
| login_page: gr.update(visible=True), | |
| main_page: gr.update(visible=False), | |
| login_msg: "β Invalid credentials" | |
| } | |
| def show_page(page_name): | |
| updates = { | |
| dashboard_page: gr.update(visible=False), | |
| students_page: gr.update(visible=False), | |
| teachers_page: gr.update(visible=False), | |
| courses_page: gr.update(visible=False), | |
| youtube_page: gr.update(visible=False) | |
| } | |
| updates[page_name] = gr.update(visible=True) | |
| return updates | |
| # Event Handlers | |
| login_btn.click( | |
| login_check, | |
| inputs=[username, password], | |
| outputs=[login_page, main_page, login_msg] | |
| ) | |
| nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys())) | |
| nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys())) | |
| nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys())) | |
| nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys())) | |
| nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys())) | |
| analyze_btn.click( | |
| process_youtube_video, | |
| inputs=[video_url, keywords], | |
| outputs=[video_thumbnail, summary, sentiment, recommendations] | |
| ) | |
| logout_btn.click( | |
| lambda: { | |
| login_page: gr.update(visible=True), | |
| main_page: gr.update(visible=False) | |
| }, | |
| outputs=[login_page, main_page] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |