deployment / app.py
Sayiqa's picture
Update app.py
985d66e verified
raw
history blame
24.3 kB
import subprocess
subprocess.check_call(["pip", "install", "transformers==4.34.0"])
subprocess.check_call(["pip", "install", "torch>=1.7.1"])
subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
subprocess.check_call(["pip", "install", "pytube"])
subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
subprocess.check_call(["pip", "install", "google-generativeai"])
subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
subprocess.check_call(["pip", "install", "genai"])
subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
import transformers
import torch
import os
import youtube_transcript_api
import pytube
import gradio
import PyPDF2
import pathlib
import pandas
import numpy
import textblob
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import google.generativeai as genai
import requests
from textblob import TextBlob
import re
#from google.cloud import generativeai
from huggingface_hub import login
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
def install_missing_packages():
required_packages = {
"torch":">=1.11.0",
"transformers":">=4.34.0",
"youtube_transcript_api" :">=0.6.3" ,
"pytube":None,
"huggingface_hub": ">=0.19.0",
"PyPDF2": ">=3.0.1",
"textblob":">=0.17.1",
"python-dotenv":">=1.0.0",
"genai":None,
"google-generativeai": None,
"google-cloud-aiplatform":"==1.34.0"
}
for package, version in required_packages.items():
try:
__import__(package)
except ImportError:
package_name = f"{package}{version}" if version else package
subprocess.check_call(["pip", "install", package_name])
install_missing_packages()
# Configuration
hf_token = os.getenv("HF_TOKEN")
if hf_token:
login(hf_token)
else:
raise ValueError("HF_TOKEN environment variable not set.")
# Configuration
USER_CREDENTIALS = {
"admin": "password123",
"teacher": "teach2024",
"student": "learn2024"
}
import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
# Use environment variables
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
genai.configure(api_key=GOOGLE_API_KEY)
# Database
students_data = [
(1, "Alice", "A", "Computer Science"),
(2, "Aliaa", "B", "Mathematics"),
(3, "Charlie", "A", "Machine Learning"),
(4, "Daan", "A", "Physics"),
(5, "Jhon", "C", "Math"),
(6, "Emma", "A+", "Computer Science")
]
teachers_data = [
(1, "Dr. Smith", "Math", "MS Mathematics"),
(2, "Ms. Johnson", "Science", "MSc Physics"),
(3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
(4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
]
courses_data = [
(1, "Algebra", "Dr. Smith", "Advanced"),
(2, "Biology", "Ms. Mia", "Intermediate"),
(3, "Machine Learning", "Ms. Jack", "Intermediate"),
(4, "Computer Science", "Ms. Evelyn", "Intermediate"),
(5, "Mathematics", "Ms. Smith", "Intermediate")
]
def sanitize_text(text):
"""Remove invalid Unicode characters."""
return text.encode("utf-8", "replace").decode("utf-8")
def extract_video_id(url):
if not url:
return None
patterns = [
r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
from textblob import TextBlob
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
import re
from collections import Counter
from googleapiclient.discovery import build
# def process_youtube_video(url="", keywords=""):
# try:
# #Initialize variables
# thumbnail = None
# summary = "No transcript available"
# sentiment_label = "N/A"
# recommendations = ""
# subtitle_info = "No additional information available"
# if not url.strip():
# return None, "Please enter a YouTube URL", "N/A", "", ""
# video_id = extract_video_id(url)
# if not video_id:
# return None, "Invalid YouTube URL", "N/A", "", ""
# thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
# try:
# # Fetch transcript
# transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# transcript = None
# try:
# transcript = transcript_list.find_transcript(['en'])
# except:
# transcript = transcript_list.find_generated_transcript(['en'])
# text = " ".join([t['text'] for t in transcript.fetch()])
# if not text.strip():
# raise ValueError("Transcript is empty")
# # Clean up the text for sentiment analysis
# cleaned_text = clean_text_for_analysis(text)
# # Sentiment analysis
# sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
# sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
# # Generate summary
# model = genai.GenerativeModel("gemini-pro")
# summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
# # Extract subtitle information
# subtitle_info = extract_subtitle_info(cleaned_text)
# except TranscriptsDisabled:
# metadata = get_video_metadata(video_id)
# summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
# sentiment_label = "N/A"
# subtitle_info = "No subtitles available for analysis."
# except NoTranscriptFound:
# metadata = get_video_metadata(video_id)
# summary = metadata.get("description", "⚠️ No English transcript available.")
# sentiment_label = "N/A"
# subtitle_info = "No subtitles available for analysis."
# except Exception as e:
# return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
# # Get recommendations
# if keywords.strip():
# recommendations = get_recommendations(keywords)
# return thumbnail, summary, sentiment_label, subtitle_info, recommendations
# except Exception as e:
# return None, f"Error: {str(e)}", "N/A", "", ""
# def extract_video_id(url):
# """
# Extracts the video ID from a YouTube URL.
# """
# match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
# return match.group(1) if match else None
# def get_video_metadata(video_id):
# """
# Fetches video metadata such as title and description using the YouTube Data API.
# """
# try:
# YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
# youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
# request = youtube.videos().list(part="snippet", id=video_id)
# response = request.execute()
# if "items" in response and len(response["items"]) > 0:
# snippet = response["items"][0]["snippet"]
# return {
# "title": snippet.get("title", "No title available"),
# "description": snippet.get("description", "No description available"),
# }
# return {}
# except Exception as e:
# return {"title": "Error fetching metadata", "description": str(e)}
# def extract_subtitle_info(text):
# """
# Extracts meaningful information from the subtitles.
# This could include topics, key insights, or a breakdown of the content.
# """
# try:
# # Split text into sentences for better analysis
# sentences = text.split(". ")
# # Example: Extract key topics or keywords
# words = text.split()
# common_words = Counter(words).most_common(10)
# key_topics = ", ".join([word for word, count in common_words])
# # Example: Provide a breakdown of the content
# info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
# return info
# except Exception as e:
# return f"Error extracting subtitle information: {str(e)}"
# def clean_text_for_analysis(text):
# """
# Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
# """
# # Remove extra spaces and line breaks
# cleaned_text = " ".join(text.split())
# return cleaned_text
# def get_recommendations(keywords):
# """
# Fetches related video recommendations based on the provided keywords.
# This function can be expanded with a proper API or custom logic.
# """
# # Placeholder for fetching recommendations based on keywords
# return f"Recommendations for: {keywords}" # Dummy return for now
######################################
# from textblob import TextBlob
# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
# import re
# from collections import Counter
# from googleapiclient.discovery import build
# import os
# # Set your YouTube API key
# YOUTUBE_API_KEY = "YOUR_API_KEY_HERE" # Replace with your actual API key
# # Alternatively, you can set it as an environment variable:
# # YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')
# def process_youtube_video(url=""):
# """
# Process a YouTube video URL and return sentiment analysis of its content.
# """
# try:
# # Input validation
# if not url.strip():
# return {"error": "Please enter a YouTube URL"}
# # Extract video ID
# video_id = extract_video_id(url)
# if not video_id:
# return {"error": "Invalid YouTube URL"}
# # Get video transcript
# text = get_video_transcript(video_id)
# if isinstance(text, dict) and "error" in text:
# return text
# # Get video metadata
# metadata = get_video_metadata(video_id)
# if "error" in metadata:
# return metadata
# # Perform sentiment analysis
# sentiment_result = analyze_sentiment(text)
# return {
# "success": True,
# "metadata": metadata,
# "sentiment": sentiment_result,
# "video_id": video_id
# }
# except Exception as e:
# return {"error": f"An error occurred: {str(e)}"}
# def get_video_metadata(video_id):
# """
# Fetches video metadata using the YouTube Data API.
# """
# try:
# youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
# request = youtube.videos().list(
# part="snippet",
# id=video_id
# )
# response = request.execute()
# if response.get("items"):
# snippet = response["items"][0]["snippet"]
# return {
# "title": snippet.get("title", ""),
# "description": snippet.get("description", ""),
# "publishedAt": snippet.get("publishedAt", ""),
# "channelTitle": snippet.get("channelTitle", "")
# }
# return {"error": "Video not found"}
# except Exception as e:
# return {"error": f"Error fetching metadata: {str(e)}"}
# # [Previous functions remain the same: get_video_transcript, analyze_sentiment,
# # extract_video_id, clean_text_for_analysis, get_detailed_sentiment]
# # Example usage with proper error handling:
# if __name__ == "__main__":
# # Example with a real YouTube URL
# test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with any YouTube URL
# # Check if API key is set
# if YOUTUBE_API_KEY == "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98":
# print("Error: Please set your YouTube API key first!")
# else:
# result = process_youtube_video(test_url)
# if "error" in result:
# print(f"Error: {result['error']}")
# else:
# print("\n=== Video Information ===")
# print(f"Title: {result['metadata']['title']}")
# print(f"Channel: {result['metadata']['channelTitle']}")
# print("\n=== Sentiment Analysis Results ===")
# sentiment = result['sentiment']
# print(f"Overall Sentiment: {sentiment['overall_sentiment']}")
# print(f"Average Polarity: {sentiment['average_polarity']}")
# print("\nSentiment Distribution:")
# dist = sentiment['sentiment_distribution']
# total = sum(dist.values())
# if total > 0:
# print(f"Positive: {dist['positive']} ({(dist['positive']/total*100):.1f}%)")
# print(f"Neutral: {dist['neutral']} ({(dist['neutral']/total*100):.1f}%)")
# print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)")
# print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
#####################################################################################################
def process_youtube_video(url="", keywords=""):
try:
thumbnail = None
summary = ""
sentiment_label = "N/A"
recommendations = ""
if not url.strip():
return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
video_id = extract_video_id(url)
if not video_id:
return thumbnail, "Invalid YouTube URL", sentiment_label, recommendations
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
try:
# Method 1: Direct transcript fetch
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
text = " ".join([t['text'] for t in transcript])
except:
# Method 2: Try list_transcripts
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Try multiple language variants
for lang_code in ['en', 'en-US', 'en-GB', 'a.en']:
try:
transcript = transcript_list.find_transcript([lang_code])
text = " ".join([t['text'] for t in transcript.fetch()])
break
except:
continue
# If no English transcript found, try auto-generated
if 'text' not in locals():
transcript = transcript_list.find_generated_transcript(['en'])
text = " ".join([t['text'] for t in transcript.fetch()])
except:
# Method 3: Try translation
available_transcripts = transcript_list.find_manually_created_transcript()
translated = available_transcripts.translate('en')
text = " ".join([t['text'] for t in translated.fetch()])
# Clean and process text
cleaned_text = re.sub(r'[^\w\s.]', '', text)
cleaned_text = ' '.join(cleaned_text.split())
# Sentiment Analysis
blob = TextBlob(cleaned_text[:2000])
polarity = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity
sentiment_label = (
f"Sentiment: {'Positive' if polarity > 0 else 'Negative' if polarity < 0 else 'Neutral'}\n"
f"Confidence: {abs(polarity):.2f}\n"
f"Subjectivity: {subjectivity:.2f}"
)
# Generate summary
model = genai.GenerativeModel("gemini-pro")
summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text
except Exception as e:
print(f"Debug - Transcript Error: {str(e)}") # Debug logging
return thumbnail, f"⚠️ Unable to process video: {str(e)}", "N/A", recommendations
# Get recommendations
if keywords.strip():
recommendations = get_recommendations(keywords)
return thumbnail, summary, sentiment_label, recommendations
except Exception as e:
print(f"Debug - Main Error: {str(e)}") # Debug logging
return None, f"Error: {str(e)}", "N/A", ""
def get_recommendations(keywords, max_results=5):
if not keywords:
return "Please provide search keywords"
try:
response = requests.get(
"https://www.googleapis.com/youtube/v3/search",
params={
"part": "snippet",
"q": f"educational {keywords}",
"type": "video",
"maxResults": max_results,
"relevanceLanguage": "en",
"key": YOUTUBE_API_KEY
}
).json()
results = []
for item in response.get("items", []):
title = item["snippet"]["title"]
channel = item["snippet"]["channelTitle"]
video_id = item["id"]["videoId"]
results.append(f"πŸ“Ί {title}\nπŸ‘€ {channel}\nπŸ”— https://youtube.com/watch?v={video_id}\n")
return "\n".join(results) if results else "No recommendations found"
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
# Login Page
with gr.Group() as login_page:
gr.Markdown("# πŸŽ“ Educational Learning Management System")
username = gr.Textbox(label="Username")
password = gr.Textbox(label="Password", type="password")
login_btn = gr.Button("Login", variant="primary")
login_msg = gr.Markdown()
# Main Interface
with gr.Group(visible=False) as main_page:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“‹ Navigation")
nav_dashboard = gr.Button("πŸ“Š Dashboard", variant="primary")
nav_students = gr.Button("πŸ‘₯ Students")
nav_teachers = gr.Button("πŸ‘¨β€πŸ« Teachers")
nav_courses = gr.Button("πŸ“š Courses")
nav_youtube = gr.Button("πŸŽ₯ YouTube Tool")
logout_btn = gr.Button("πŸšͺ Logout", variant="stop")
with gr.Column(scale=3):
# Dashboard Content
dashboard_page = gr.Group()
with dashboard_page:
gr.Markdown("## πŸ“Š Dashboard")
gr.Markdown(f"""
### System Overview
- πŸ‘₯ Total Students: {len(students_data)}
- πŸ‘¨β€πŸ« Total Teachers: {len(teachers_data)}
- πŸ“š Total Courses: {len(courses_data)}
### Quick Actions
- View student performance
- Access course materials
- Generate learning insights
""")
# Students Content
students_page = gr.Group(visible=False)
with students_page:
gr.Markdown("## πŸ‘₯ Students")
gr.DataFrame(
value=students_data,
headers=["ID", "Name", "Grade", "Program"]
)
# Teachers Content
teachers_page = gr.Group(visible=False)
with teachers_page:
gr.Markdown("## πŸ‘¨β€πŸ« Teachers")
gr.DataFrame(
value=teachers_data,
headers=["ID", "Name", "Subject", "Qualification"]
)
# Courses Content
courses_page = gr.Group(visible=False)
with courses_page:
gr.Markdown("## πŸ“š Courses")
gr.DataFrame(
value=courses_data,
headers=["ID", "Name", "Instructor", "Level"]
)
# YouTube Tool Content
youtube_page = gr.Group(visible=False)
with youtube_page:
gr.Markdown("## Agent for YouTube Content Exploration")
with gr.Row():
with gr.Column(scale=2):
video_url = gr.Textbox(
label="YouTube URL",
placeholder="https://youtube.com/watch?v=..."
)
keywords = gr.Textbox(
label="Keywords for Recommendations",
placeholder="e.g., python programming, machine learning"
)
analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
with gr.Column(scale=1):
video_thumbnail = gr.Image(label="Video Preview")
with gr.Row():
with gr.Column():
summary = gr.Textbox(label="πŸ“ Summary", lines=8)
sentiment = gr.Textbox(label="😊 Content Sentiment")
with gr.Column():
recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
def login_check(user, pwd):
if USER_CREDENTIALS.get(user) == pwd:
return {
login_page: gr.update(visible=False),
main_page: gr.update(visible=True),
login_msg: ""
}
return {
login_page: gr.update(visible=True),
main_page: gr.update(visible=False),
login_msg: "❌ Invalid credentials"
}
def show_page(page_name):
updates = {
dashboard_page: gr.update(visible=False),
students_page: gr.update(visible=False),
teachers_page: gr.update(visible=False),
courses_page: gr.update(visible=False),
youtube_page: gr.update(visible=False)
}
updates[page_name] = gr.update(visible=True)
return updates
# Event Handlers
login_btn.click(
login_check,
inputs=[username, password],
outputs=[login_page, main_page, login_msg]
)
nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
analyze_btn.click(
process_youtube_video,
inputs=[video_url, keywords],
outputs=[video_thumbnail, summary, sentiment, recommendations]
)
logout_btn.click(
lambda: {
login_page: gr.update(visible=True),
main_page: gr.update(visible=False)
},
outputs=[login_page, main_page]
)
if __name__ == "__main__":
app.launch()