Spaces:

Sayiqa7
/

Youtube_summarization

Runtime error

App Files Files Community

Youtube_summarization / app.py

Sayiqa7

Update app.py

ebb611b verified about 1 year ago

raw

history blame contribute delete

13.7 kB

	import subprocess
	subprocess.check_call(["pip", "install", "transformers==4.34.0"])
	subprocess.check_call(["pip", "install", "torch>=1.7.1"])
	subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
	subprocess.check_call(["pip", "install", "pytube"])
	subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
	subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
	subprocess.check_call(["pip", "install", "google-generativeai"])
	subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
	subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
	subprocess.check_call(["pip", "install", "genai"])
	subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
	subprocess.check_call(["pip", "install", "google-api-python-client>=2.0.0"])
	import transformers
	import torch
	import os
	import youtube_transcript_api
	import pytube
	import gradio
	import PyPDF2
	import pathlib
	import pandas
	import numpy
	import textblob
	import gradio as gr
	from youtube_transcript_api import YouTubeTranscriptApi
	import google.generativeai as genai
	from googleapiclient.discovery import build
	import requests
	from textblob import TextBlob
	import re
	#from google.cloud import generativeai
	from googleapiclient.discovery import build
	from huggingface_hub import login
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	def install_missing_packages():
	required_packages = {
	"torch":">=1.11.0",
	"transformers":">=4.34.0",
	"youtube_transcript_api" :">=0.6.3" ,
	"pytube":None,
	"huggingface_hub": ">=0.19.0",
	"PyPDF2": ">=3.0.1",
	"textblob":">=0.17.1",
	"python-dotenv":">=1.0.0",
	"genai":None,
	"google-generativeai": None,
	"google-cloud-aiplatform":"==1.34.0",
	"google-api-python-client": ">=2.0.0"
	}


	for package, version in required_packages.items():
	try:
	__import__(package)
	except ImportError:
	package_name = f"{package}{version}" if version else package
	subprocess.check_call(["pip", "install", package_name])

	install_missing_packages()
	# Configuration

	hf_token = os.getenv("HF_TOKEN")
	if hf_token:
	login(hf_token)
	else:
	raise ValueError("HF_TOKEN environment variable not set.")


	#YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube API Key

	USER_CREDENTIALS = {"admin": "password"} # Example user credentials

	import os
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound

	# Use environment variables
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

	if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
	raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")

	genai.configure(api_key=GOOGLE_API_KEY)

	# Database
	students_data = [
	(1, "Alice", "A", "Computer Science"),
	(2, "Aliaa", "B", "Mathematics"),
	(3, "Charlie", "A", "Machine Learning"),
	(4, "Daan", "A", "Physics"),
	(5, "Jhon", "C", "Math"),
	(6, "Emma", "A+", "Computer Science")
	]

	teachers_data = [
	(1, "Dr. Smith", "Math", "MS Mathematics"),
	(2, "Ms. Johnson", "Science", "MSc Physics"),
	(3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
	(4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
	]

	courses_data = [
	(1, "Algebra", "Dr. Smith", "Advanced"),
	(2, "Biology", "Ms. Mia", "Intermediate"),
	(3, "Machine Learning", "Ms. Jack", "Intermediate"),
	(4, "Computer Science", "Ms. Evelyn", "Intermediate"),
	(5, "Mathematics", "Ms. Smith", "Intermediate")
	]

	import youtube
	from google.cloud import language_v1beta3 as language
	from google.auth import credentials
	YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"

	# Replace with your Google Cloud project ID
	PROJECT_ID = "lively-machine-445513-t7"


	def extract_video_id(url):
	"""Extracts the video ID from a YouTube URL."""
	match = re.search(r"(?:v=\|\/)([0-9A-Za-z_-]{11})", url)
	return match.group(1) if match else None


	def get_video_transcript(video_id):
	"""Fetches the transcript of a YouTube video using the YouTube Data API v3.

	Args:
	video_id: The ID of the YouTube video.

	Returns:
	A list of dictionaries containing the transcript text for each segment,
	or None if the transcript is unavailable.
	"""

	youtube_service = youtube.Youtube(api_key=YOUTUBE_API_KEY)

	try:
	caption_response = youtube_service.captions().list(
	part="snippet", videoId=video_id
	).execute()

	# Assuming the first caption track is the desired transcript
	if caption_response.get("items"):
	transcript_id = caption_response["items"][0]["id"]
	transcript_details = youtube_service.captions().list(
	part="snippet", videoId=video_id, id=transcript_id
	).execute()
	return transcript_details["items"][0]["snippet"]["isAutotranslated"] is False and transcript_details["items"][0]["snippet"]["language"] == "en" and transcript_details["items"][0]["snippet"]["textTracks"][0]["vssId"]

	return None

	except Exception as e:
	print(f"Error fetching transcript: {str(e)}")
	return None


	def analyze_sentiment(text):
	"""Analyzes the sentiment of a text using Google Cloud Natural Language API.

	Args:
	text: The text to analyze.

	Returns:
	A dictionary containing sentiment score (polarity) and classification
	(positive, negative, or neutral).
	"""

	credentials = credentials.ApplicationDefaultCredentials()
	language_client = language.LanguageServiceClient(credentials=credentials)

	document = language.Document(
	content=text, type_=language.Document.Type.PLAIN_TEXT
	)

	sentiment = language_client.analyze_sentiment(document=document).document_sentiment

	return {
	"polarity": sentiment.score,
	"classification": "Positive"
	if sentiment.score > 0
	else "Negative"
	if sentiment.score < 0
	else "Neutral",
	}


	def process_youtube_video(url):
	"""Processes a YouTube video URL, returning thumbnail, summary, and sentiment analysis.

	Args:
	url: The URL of the YouTube video.

	Returns:
	A tuple containing thumbnail URL, summary text, and sentiment analysis dictionary
	(polarity and classification), or None if there's an error.
	"""

	video_id = extract_video_id(url)
	if not video_id:
	return None, "Invalid YouTube URL", "N/A"

	thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"

	transcript_id = get_video_transcript(video_id)
	if transcript_id:
	# Leverage the youtube_transcript library (assuming it's installed)
	# to fetch the transcript text using the transcript_id
	transcript_text = fetch_transcript_text_using_youtube_transcript_library(transcript_id)
	if transcript_text:
	summary = f"Summary: {transcript_text[:400]}..."
	sentiment_analysis = analyze_sentiment(transcript_text)
	return thumbnail, summary, sentiment_analysis
	else:
	print("Error fetching transcript text using youtube_transcript library")

	# Fallback to video description if transcript unavailable
	metadata = youtube.Youtube(api_key=YOUTUBE_API_KEY).videos().list(
	part="snippet", id=video_id
	).execute()
	summary = metadata.get("items", [])

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	# Login Page
	with gr.Group() as login_page:
	gr.Markdown("# 🎓 Educational Learning Management System")
	username = gr.Textbox(label="Username")
	password = gr.Textbox(label="Password", type="password")
	login_btn = gr.Button("Login", variant="primary")
	login_msg = gr.Markdown()

	# Main Interface
	with gr.Group(visible=False) as main_page:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📋 Navigation")
	nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
	nav_students = gr.Button("👥 Students")
	nav_teachers = gr.Button("👨‍🏫 Teachers")
	nav_courses = gr.Button("📚 Courses")
	nav_youtube = gr.Button("🎥 YouTube Tool")
	logout_btn = gr.Button("🚪 Logout", variant="stop")

	with gr.Column(scale=3):
	# Dashboard Content
	dashboard_page = gr.Group()
	with dashboard_page:
	gr.Markdown("## 📊 Dashboard")
	gr.Markdown(f"""
	### System Overview
	- 👥 Total Students: {len(students_data)}
	- 👨‍🏫 Total Teachers: {len(teachers_data)}
	- 📚 Total Courses: {len(courses_data)}
	### Quick Actions
	- View student performance
	- Access course materials
	- Generate learning insights
	""")

	# Students Content
	students_page = gr.Group(visible=False)
	with students_page:
	gr.Markdown("## 👥 Students")
	gr.DataFrame(
	value=students_data,
	headers=["ID", "Name", "Grade", "Program"]
	)

	# Teachers Content
	teachers_page = gr.Group(visible=False)
	with teachers_page:
	gr.Markdown("## 👨‍🏫 Teachers")
	gr.DataFrame(
	value=teachers_data,
	headers=["ID", "Name", "Subject", "Qualification"]
	)

	# Courses Content
	courses_page = gr.Group(visible=False)
	with courses_page:
	gr.Markdown("## 📚 Courses")
	gr.DataFrame(
	value=courses_data,
	headers=["ID", "Name", "Instructor", "Level"]
	)

	# YouTube Tool Content
	youtube_page = gr.Group(visible=False)
	with youtube_page:
	gr.Markdown("## Agent for YouTube Content Exploration")
	with gr.Row():
	with gr.Column(scale=2):
	video_url = gr.Textbox(
	label="YouTube URL",
	placeholder="https://youtube.com/watch?v=..."
	)
	keywords = gr.Textbox(
	label="Keywords for Recommendations",
	placeholder="e.g., python programming, machine learning"
	)
	analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
	recommend_btn = gr.Button("🔎 Get Recommendations", variant="primary")

	with gr.Column(scale=1):
	video_thumbnail = gr.Image(label="Video Preview")

	with gr.Row():
	with gr.Column():
	summary = gr.Textbox(label="📝 Summary", lines=8)
	sentiment = gr.Textbox(label="😊 Content Sentiment")
	with gr.Column():
	recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)

	def login_check(user, pwd):
	if USER_CREDENTIALS.get(user) == pwd:
	return {
	login_page: gr.update(visible=False),
	main_page: gr.update(visible=True),
	login_msg: ""
	}
	return {
	login_page: gr.update(visible=True),
	main_page: gr.update(visible=False),
	login_msg: "❌ Invalid credentials"
	}

	def show_page(page_name):
	updates = {
	dashboard_page: gr.update(visible=False),
	students_page: gr.update(visible=False),
	teachers_page: gr.update(visible=False),
	courses_page: gr.update(visible=False),
	youtube_page: gr.update(visible=False)
	}
	updates[page_name] = gr.update(visible=True)
	return updates

	# Event Handlers
	login_btn.click(
	login_check,
	inputs=[username, password],
	outputs=[login_page, main_page, login_msg]
	)

	nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
	nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
	nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
	nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
	nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))

	analyze_btn.click(
	process_youtube_video,
	inputs=[video_url],
	outputs=[video_thumbnail, summary, sentiment]
	)

	recommend_btn.click(
	get_recommendations,
	inputs=[keywords],
	outputs=[recommendations]
	)

	logout_btn.click(
	lambda: {
	login_page: gr.update(visible=True),
	main_page: gr.update(visible=False)
	},
	outputs=[login_page, main_page]
	)

	if __name__ == "__main__":
	app.launch()