Spaces:

Sayiqa
/

deployment

Sleeping

App Files Files Community

deployment / app.py

Sayiqa

Update app.py

985d66e verified over 1 year ago

raw

history blame

24.3 kB

	import subprocess
	subprocess.check_call(["pip", "install", "transformers==4.34.0"])
	subprocess.check_call(["pip", "install", "torch>=1.7.1"])
	subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
	subprocess.check_call(["pip", "install", "pytube"])
	subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
	subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
	subprocess.check_call(["pip", "install", "google-generativeai"])
	subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
	subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
	subprocess.check_call(["pip", "install", "genai"])
	subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
	import transformers
	import torch
	import os
	import youtube_transcript_api
	import pytube
	import gradio
	import PyPDF2
	import pathlib
	import pandas
	import numpy
	import textblob
	import gradio as gr
	from youtube_transcript_api import YouTubeTranscriptApi
	import google.generativeai as genai
	import requests
	from textblob import TextBlob
	import re
	#from google.cloud import generativeai
	from huggingface_hub import login
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	def install_missing_packages():
	required_packages = {
	"torch":">=1.11.0",
	"transformers":">=4.34.0",
	"youtube_transcript_api" :">=0.6.3" ,
	"pytube":None,
	"huggingface_hub": ">=0.19.0",
	"PyPDF2": ">=3.0.1",
	"textblob":">=0.17.1",
	"python-dotenv":">=1.0.0",
	"genai":None,
	"google-generativeai": None,
	"google-cloud-aiplatform":"==1.34.0"
	}


	for package, version in required_packages.items():
	try:
	__import__(package)
	except ImportError:
	package_name = f"{package}{version}" if version else package
	subprocess.check_call(["pip", "install", package_name])

	install_missing_packages()
	# Configuration

	hf_token = os.getenv("HF_TOKEN")
	if hf_token:
	login(hf_token)
	else:
	raise ValueError("HF_TOKEN environment variable not set.")

	# Configuration
	USER_CREDENTIALS = {
	"admin": "password123",
	"teacher": "teach2024",
	"student": "learn2024"
	}

	import os
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound

	# Use environment variables
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

	if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
	raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")

	genai.configure(api_key=GOOGLE_API_KEY)

	# Database
	students_data = [
	(1, "Alice", "A", "Computer Science"),
	(2, "Aliaa", "B", "Mathematics"),
	(3, "Charlie", "A", "Machine Learning"),
	(4, "Daan", "A", "Physics"),
	(5, "Jhon", "C", "Math"),
	(6, "Emma", "A+", "Computer Science")
	]

	teachers_data = [
	(1, "Dr. Smith", "Math", "MS Mathematics"),
	(2, "Ms. Johnson", "Science", "MSc Physics"),
	(3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
	(4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
	]

	courses_data = [
	(1, "Algebra", "Dr. Smith", "Advanced"),
	(2, "Biology", "Ms. Mia", "Intermediate"),
	(3, "Machine Learning", "Ms. Jack", "Intermediate"),
	(4, "Computer Science", "Ms. Evelyn", "Intermediate"),
	(5, "Mathematics", "Ms. Smith", "Intermediate")
	]

	def sanitize_text(text):
	"""Remove invalid Unicode characters."""
	return text.encode("utf-8", "replace").decode("utf-8")

	def extract_video_id(url):
	if not url:
	return None
	patterns = [
	r'(?:v=\|\/videos\/\|embed\/\|youtu.be\/\|\/v\/\|\/e\/\|watch\?v=\|\/watch\?v=)([^#\&\?]*)'
	]
	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	return match.group(1)
	return None



	from textblob import TextBlob
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	import re
	from collections import Counter
	from googleapiclient.discovery import build

	# def process_youtube_video(url="", keywords=""):
	# try:
	# #Initialize variables
	# thumbnail = None
	# summary = "No transcript available"
	# sentiment_label = "N/A"
	# recommendations = ""
	# subtitle_info = "No additional information available"

	# if not url.strip():
	# return None, "Please enter a YouTube URL", "N/A", "", ""

	# video_id = extract_video_id(url)
	# if not video_id:
	# return None, "Invalid YouTube URL", "N/A", "", ""

	# thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"

	# try:
	# # Fetch transcript
	# transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
	# transcript = None
	# try:
	# transcript = transcript_list.find_transcript(['en'])
	# except:
	# transcript = transcript_list.find_generated_transcript(['en'])

	# text = " ".join([t['text'] for t in transcript.fetch()])
	# if not text.strip():
	# raise ValueError("Transcript is empty")

	# # Clean up the text for sentiment analysis
	# cleaned_text = clean_text_for_analysis(text)

	# # Sentiment analysis
	# sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
	# sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"

	# # Generate summary
	# model = genai.GenerativeModel("gemini-pro")
	# summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text

	# # Extract subtitle information
	# subtitle_info = extract_subtitle_info(cleaned_text)

	# except TranscriptsDisabled:
	# metadata = get_video_metadata(video_id)
	# summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
	# sentiment_label = "N/A"
	# subtitle_info = "No subtitles available for analysis."
	# except NoTranscriptFound:
	# metadata = get_video_metadata(video_id)
	# summary = metadata.get("description", "⚠️ No English transcript available.")
	# sentiment_label = "N/A"
	# subtitle_info = "No subtitles available for analysis."
	# except Exception as e:
	# return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""

	# # Get recommendations
	# if keywords.strip():
	# recommendations = get_recommendations(keywords)

	# return thumbnail, summary, sentiment_label, subtitle_info, recommendations

	# except Exception as e:
	# return None, f"Error: {str(e)}", "N/A", "", ""


	# def extract_video_id(url):
	# """
	# Extracts the video ID from a YouTube URL.
	# """
	# match = re.search(r"(?:v=\|\/)([0-9A-Za-z_-]{11})", url)
	# return match.group(1) if match else None


	# def get_video_metadata(video_id):
	# """
	# Fetches video metadata such as title and description using the YouTube Data API.
	# """
	# try:
	# YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
	# youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
	# request = youtube.videos().list(part="snippet", id=video_id)
	# response = request.execute()

	# if "items" in response and len(response["items"]) > 0:
	# snippet = response["items"][0]["snippet"]
	# return {
	# "title": snippet.get("title", "No title available"),
	# "description": snippet.get("description", "No description available"),
	# }
	# return {}

	# except Exception as e:
	# return {"title": "Error fetching metadata", "description": str(e)}


	# def extract_subtitle_info(text):
	# """
	# Extracts meaningful information from the subtitles.
	# This could include topics, key insights, or a breakdown of the content.
	# """
	# try:
	# # Split text into sentences for better analysis
	# sentences = text.split(". ")

	# # Example: Extract key topics or keywords
	# words = text.split()
	# common_words = Counter(words).most_common(10)
	# key_topics = ", ".join([word for word, count in common_words])

	# # Example: Provide a breakdown of the content
	# info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."

	# return info
	# except Exception as e:
	# return f"Error extracting subtitle information: {str(e)}"


	# def clean_text_for_analysis(text):
	# """
	# Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
	# """
	# # Remove extra spaces and line breaks
	# cleaned_text = " ".join(text.split())
	# return cleaned_text


	# def get_recommendations(keywords):
	# """
	# Fetches related video recommendations based on the provided keywords.
	# This function can be expanded with a proper API or custom logic.
	# """
	# # Placeholder for fetching recommendations based on keywords
	# return f"Recommendations for: {keywords}" # Dummy return for now
	######################################
	# from textblob import TextBlob
	# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	# import re
	# from collections import Counter
	# from googleapiclient.discovery import build
	# import os

	# # Set your YouTube API key
	# YOUTUBE_API_KEY = "YOUR_API_KEY_HERE" # Replace with your actual API key
	# # Alternatively, you can set it as an environment variable:
	# # YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')

	# def process_youtube_video(url=""):
	# """
	# Process a YouTube video URL and return sentiment analysis of its content.
	# """
	# try:
	# # Input validation
	# if not url.strip():
	# return {"error": "Please enter a YouTube URL"}

	# # Extract video ID
	# video_id = extract_video_id(url)
	# if not video_id:
	# return {"error": "Invalid YouTube URL"}

	# # Get video transcript
	# text = get_video_transcript(video_id)
	# if isinstance(text, dict) and "error" in text:
	# return text

	# # Get video metadata
	# metadata = get_video_metadata(video_id)
	# if "error" in metadata:
	# return metadata

	# # Perform sentiment analysis
	# sentiment_result = analyze_sentiment(text)

	# return {
	# "success": True,
	# "metadata": metadata,
	# "sentiment": sentiment_result,
	# "video_id": video_id
	# }

	# except Exception as e:
	# return {"error": f"An error occurred: {str(e)}"}

	# def get_video_metadata(video_id):
	# """
	# Fetches video metadata using the YouTube Data API.
	# """
	# try:
	# youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
	# request = youtube.videos().list(
	# part="snippet",
	# id=video_id
	# )
	# response = request.execute()

	# if response.get("items"):
	# snippet = response["items"][0]["snippet"]
	# return {
	# "title": snippet.get("title", ""),
	# "description": snippet.get("description", ""),
	# "publishedAt": snippet.get("publishedAt", ""),
	# "channelTitle": snippet.get("channelTitle", "")
	# }
	# return {"error": "Video not found"}

	# except Exception as e:
	# return {"error": f"Error fetching metadata: {str(e)}"}

	# # [Previous functions remain the same: get_video_transcript, analyze_sentiment,
	# # extract_video_id, clean_text_for_analysis, get_detailed_sentiment]

	# # Example usage with proper error handling:
	# if __name__ == "__main__":
	# # Example with a real YouTube URL
	# test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with any YouTube URL

	# # Check if API key is set
	# if YOUTUBE_API_KEY == "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98":
	# print("Error: Please set your YouTube API key first!")
	# else:
	# result = process_youtube_video(test_url)

	# if "error" in result:
	# print(f"Error: {result['error']}")
	# else:
	# print("\n=== Video Information ===")
	# print(f"Title: {result['metadata']['title']}")
	# print(f"Channel: {result['metadata']['channelTitle']}")

	# print("\n=== Sentiment Analysis Results ===")
	# sentiment = result['sentiment']
	# print(f"Overall Sentiment: {sentiment['overall_sentiment']}")
	# print(f"Average Polarity: {sentiment['average_polarity']}")

	# print("\nSentiment Distribution:")
	# dist = sentiment['sentiment_distribution']
	# total = sum(dist.values())
	# if total > 0:
	# print(f"Positive: {dist['positive']} ({(dist['positive']/total*100):.1f}%)")
	# print(f"Neutral: {dist['neutral']} ({(dist['neutral']/total*100):.1f}%)")
	# print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)")

	# print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
	#####################################################################################################
	def process_youtube_video(url="", keywords=""):
	try:
	thumbnail = None
	summary = ""
	sentiment_label = "N/A"
	recommendations = ""

	if not url.strip():
	return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations

	video_id = extract_video_id(url)
	if not video_id:
	return thumbnail, "Invalid YouTube URL", sentiment_label, recommendations

	thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"

	try:
	# Method 1: Direct transcript fetch
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
	text = " ".join([t['text'] for t in transcript])
	except:
	# Method 2: Try list_transcripts
	try:
	transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

	# Try multiple language variants
	for lang_code in ['en', 'en-US', 'en-GB', 'a.en']:
	try:
	transcript = transcript_list.find_transcript([lang_code])
	text = " ".join([t['text'] for t in transcript.fetch()])
	break
	except:
	continue

	# If no English transcript found, try auto-generated
	if 'text' not in locals():
	transcript = transcript_list.find_generated_transcript(['en'])
	text = " ".join([t['text'] for t in transcript.fetch()])
	except:
	# Method 3: Try translation
	available_transcripts = transcript_list.find_manually_created_transcript()
	translated = available_transcripts.translate('en')
	text = " ".join([t['text'] for t in translated.fetch()])

	# Clean and process text
	cleaned_text = re.sub(r'[^\w\s.]', '', text)
	cleaned_text = ' '.join(cleaned_text.split())

	# Sentiment Analysis
	blob = TextBlob(cleaned_text[:2000])
	polarity = blob.sentiment.polarity
	subjectivity = blob.sentiment.subjectivity

	sentiment_label = (
	f"Sentiment: {'Positive' if polarity > 0 else 'Negative' if polarity < 0 else 'Neutral'}\n"
	f"Confidence: {abs(polarity):.2f}\n"
	f"Subjectivity: {subjectivity:.2f}"
	)

	# Generate summary
	model = genai.GenerativeModel("gemini-pro")
	summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text

	except Exception as e:
	print(f"Debug - Transcript Error: {str(e)}") # Debug logging
	return thumbnail, f"⚠️ Unable to process video: {str(e)}", "N/A", recommendations

	# Get recommendations
	if keywords.strip():
	recommendations = get_recommendations(keywords)

	return thumbnail, summary, sentiment_label, recommendations

	except Exception as e:
	print(f"Debug - Main Error: {str(e)}") # Debug logging
	return None, f"Error: {str(e)}", "N/A", ""










	def get_recommendations(keywords, max_results=5):
	if not keywords:
	return "Please provide search keywords"
	try:
	response = requests.get(
	"https://www.googleapis.com/youtube/v3/search",
	params={
	"part": "snippet",
	"q": f"educational {keywords}",
	"type": "video",
	"maxResults": max_results,
	"relevanceLanguage": "en",
	"key": YOUTUBE_API_KEY
	}
	).json()

	results = []
	for item in response.get("items", []):
	title = item["snippet"]["title"]
	channel = item["snippet"]["channelTitle"]
	video_id = item["id"]["videoId"]
	results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")

	return "\n".join(results) if results else "No recommendations found"
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	# Login Page
	with gr.Group() as login_page:
	gr.Markdown("# 🎓 Educational Learning Management System")
	username = gr.Textbox(label="Username")
	password = gr.Textbox(label="Password", type="password")
	login_btn = gr.Button("Login", variant="primary")
	login_msg = gr.Markdown()

	# Main Interface
	with gr.Group(visible=False) as main_page:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📋 Navigation")
	nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
	nav_students = gr.Button("👥 Students")
	nav_teachers = gr.Button("👨‍🏫 Teachers")
	nav_courses = gr.Button("📚 Courses")
	nav_youtube = gr.Button("🎥 YouTube Tool")
	logout_btn = gr.Button("🚪 Logout", variant="stop")

	with gr.Column(scale=3):
	# Dashboard Content
	dashboard_page = gr.Group()
	with dashboard_page:
	gr.Markdown("## 📊 Dashboard")
	gr.Markdown(f"""
	### System Overview
	- 👥 Total Students: {len(students_data)}
	- 👨‍🏫 Total Teachers: {len(teachers_data)}
	- 📚 Total Courses: {len(courses_data)}

	### Quick Actions
	- View student performance
	- Access course materials
	- Generate learning insights
	""")

	# Students Content
	students_page = gr.Group(visible=False)
	with students_page:
	gr.Markdown("## 👥 Students")
	gr.DataFrame(
	value=students_data,
	headers=["ID", "Name", "Grade", "Program"]
	)

	# Teachers Content
	teachers_page = gr.Group(visible=False)
	with teachers_page:
	gr.Markdown("## 👨‍🏫 Teachers")
	gr.DataFrame(
	value=teachers_data,
	headers=["ID", "Name", "Subject", "Qualification"]
	)

	# Courses Content
	courses_page = gr.Group(visible=False)
	with courses_page:
	gr.Markdown("## 📚 Courses")
	gr.DataFrame(
	value=courses_data,
	headers=["ID", "Name", "Instructor", "Level"]
	)

	# YouTube Tool Content
	youtube_page = gr.Group(visible=False)
	with youtube_page:
	gr.Markdown("## Agent for YouTube Content Exploration")
	with gr.Row():
	with gr.Column(scale=2):
	video_url = gr.Textbox(
	label="YouTube URL",
	placeholder="https://youtube.com/watch?v=..."
	)
	keywords = gr.Textbox(
	label="Keywords for Recommendations",
	placeholder="e.g., python programming, machine learning"
	)
	analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")

	with gr.Column(scale=1):
	video_thumbnail = gr.Image(label="Video Preview")

	with gr.Row():
	with gr.Column():
	summary = gr.Textbox(label="📝 Summary", lines=8)
	sentiment = gr.Textbox(label="😊 Content Sentiment")
	with gr.Column():
	recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)

	def login_check(user, pwd):
	if USER_CREDENTIALS.get(user) == pwd:
	return {
	login_page: gr.update(visible=False),
	main_page: gr.update(visible=True),
	login_msg: ""
	}
	return {
	login_page: gr.update(visible=True),
	main_page: gr.update(visible=False),
	login_msg: "❌ Invalid credentials"
	}

	def show_page(page_name):
	updates = {
	dashboard_page: gr.update(visible=False),
	students_page: gr.update(visible=False),
	teachers_page: gr.update(visible=False),
	courses_page: gr.update(visible=False),
	youtube_page: gr.update(visible=False)
	}
	updates[page_name] = gr.update(visible=True)
	return updates

	# Event Handlers
	login_btn.click(
	login_check,
	inputs=[username, password],
	outputs=[login_page, main_page, login_msg]
	)

	nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
	nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
	nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
	nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
	nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))

	analyze_btn.click(
	process_youtube_video,
	inputs=[video_url, keywords],
	outputs=[video_thumbnail, summary, sentiment, recommendations]
	)

	logout_btn.click(
	lambda: {
	login_page: gr.update(visible=True),
	main_page: gr.update(visible=False)
	},
	outputs=[login_page, main_page]
	)

	if __name__ == "__main__":
	app.launch()