Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,7 +32,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
| 32 |
import requests
|
| 33 |
from textblob import TextBlob
|
| 34 |
import re
|
| 35 |
-
|
| 36 |
from huggingface_hub import login
|
| 37 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 38 |
def install_missing_packages():
|
|
@@ -356,187 +356,7 @@ def install_missing_packages():
|
|
| 356 |
# if __name__ == "__main__":
|
| 357 |
# app.launch()
|
| 358 |
|
| 359 |
-
import os
|
| 360 |
-
import subprocess
|
| 361 |
-
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 362 |
-
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
| 363 |
-
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
| 364 |
-
import nltk
|
| 365 |
-
import requests
|
| 366 |
-
import gradio as gr
|
| 367 |
-
import re
|
| 368 |
-
|
| 369 |
-
# Download NLTK VADER data
|
| 370 |
-
nltk.download('vader_lexicon')
|
| 371 |
-
|
| 372 |
-
# Install Missing Packages
|
| 373 |
|
| 374 |
-
def install_missing_packages():
|
| 375 |
-
required_packages = {
|
| 376 |
-
"torch": ">=1.11.0",
|
| 377 |
-
"transformers": ">=4.34.0",
|
| 378 |
-
"youtube_transcript_api": ">=0.6.3",
|
| 379 |
-
"pytube": None,
|
| 380 |
-
"huggingface_hub": ">=0.19.0",
|
| 381 |
-
"PyPDF2": ">=3.0.1",
|
| 382 |
-
"textblob": ">=0.17.1",
|
| 383 |
-
"python-dotenv": ">=1.0.0",
|
| 384 |
-
"nltk": None
|
| 385 |
-
}
|
| 386 |
-
|
| 387 |
-
for package, version in required_packages.items():
|
| 388 |
-
try:
|
| 389 |
-
__import__(package)
|
| 390 |
-
except ImportError:
|
| 391 |
-
package_name = f"{package}{version}" if version else package
|
| 392 |
-
subprocess.check_call(["pip", "install", package_name])
|
| 393 |
-
|
| 394 |
-
install_missing_packages()
|
| 395 |
-
|
| 396 |
-
# Configuration
|
| 397 |
-
USER_CREDENTIALS = {
|
| 398 |
-
"admin": "password123",
|
| 399 |
-
"teacher": "teach2024",
|
| 400 |
-
"student": "learn2024"
|
| 401 |
-
}
|
| 402 |
-
|
| 403 |
-
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
| 404 |
-
if not YOUTUBE_API_KEY:
|
| 405 |
-
raise ValueError("YOUTUBE_API_KEY environment variable not set.")
|
| 406 |
-
|
| 407 |
-
# Functions
|
| 408 |
-
|
| 409 |
-
def sanitize_text(text):
|
| 410 |
-
"""Remove invalid Unicode characters."""
|
| 411 |
-
return text.encode("utf-8", "replace").decode("utf-8")
|
| 412 |
-
|
| 413 |
-
def extract_video_id(url):
|
| 414 |
-
patterns = [
|
| 415 |
-
r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
|
| 416 |
-
]
|
| 417 |
-
for pattern in patterns:
|
| 418 |
-
match = re.search(pattern, url)
|
| 419 |
-
if match:
|
| 420 |
-
return match.group(1)
|
| 421 |
-
return None
|
| 422 |
-
|
| 423 |
-
def get_youtube_transcript(video_url):
|
| 424 |
-
video_id = extract_video_id(video_url)
|
| 425 |
-
if not video_id:
|
| 426 |
-
raise ValueError("Invalid YouTube URL")
|
| 427 |
-
try:
|
| 428 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 429 |
-
return " ".join([entry['text'] for entry in transcript])
|
| 430 |
-
except (TranscriptsDisabled, NoTranscriptFound):
|
| 431 |
-
raise ValueError("Transcript not available for this video.")
|
| 432 |
-
|
| 433 |
-
def summarize_text(text, model_name="t5-small", max_length=150, min_length=40):
|
| 434 |
-
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 435 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
| 436 |
-
|
| 437 |
-
input_text = "summarize: " + text
|
| 438 |
-
inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
| 439 |
-
|
| 440 |
-
summary_ids = model.generate(
|
| 441 |
-
inputs, max_length=max_length, min_length=min_length,
|
| 442 |
-
length_penalty=2.0, num_beams=4, early_stopping=True
|
| 443 |
-
)
|
| 444 |
-
return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 445 |
-
|
| 446 |
-
def analyze_sentiment(text):
|
| 447 |
-
sid = SentimentIntensityAnalyzer()
|
| 448 |
-
return sid.polarity_scores(text)
|
| 449 |
-
|
| 450 |
-
def get_recommendations(keywords, max_results=5):
|
| 451 |
-
if not keywords:
|
| 452 |
-
return "Please provide search keywords"
|
| 453 |
-
try:
|
| 454 |
-
response = requests.get(
|
| 455 |
-
"https://www.googleapis.com/youtube/v3/search",
|
| 456 |
-
params={
|
| 457 |
-
"part": "snippet",
|
| 458 |
-
"q": f"educational {keywords}",
|
| 459 |
-
"type": "video",
|
| 460 |
-
"maxResults": max_results,
|
| 461 |
-
"relevanceLanguage": "en",
|
| 462 |
-
"key": YOUTUBE_API_KEY
|
| 463 |
-
}
|
| 464 |
-
).json()
|
| 465 |
-
|
| 466 |
-
results = []
|
| 467 |
-
for item in response.get("items", []):
|
| 468 |
-
title = item["snippet"]["title"]
|
| 469 |
-
channel = item["snippet"]["channelTitle"]
|
| 470 |
-
video_id = item["id"]["videoId"]
|
| 471 |
-
results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
|
| 472 |
-
|
| 473 |
-
return "\n".join(results) if results else "No recommendations found"
|
| 474 |
-
except Exception as e:
|
| 475 |
-
return f"Error: {str(e)}"
|
| 476 |
-
|
| 477 |
-
def process_youtube_video(video_url, keywords):
|
| 478 |
-
try:
|
| 479 |
-
transcript = get_youtube_transcript(video_url)
|
| 480 |
-
summary = summarize_text(transcript)
|
| 481 |
-
sentiment = analyze_sentiment(transcript)
|
| 482 |
-
recommendations = get_recommendations(keywords)
|
| 483 |
-
|
| 484 |
-
# Return results to the corresponding Gradio outputs
|
| 485 |
-
return summary, str(sentiment), recommendations
|
| 486 |
-
except Exception as e:
|
| 487 |
-
# Return error message for the Summary output, and empty for others
|
| 488 |
-
return str(e), "", ""
|
| 489 |
-
|
| 490 |
-
# Gradio Interface
|
| 491 |
-
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
| 492 |
-
with gr.Group() as login_page:
|
| 493 |
-
gr.Markdown("# 🎓 Educational Learning Management System")
|
| 494 |
-
username = gr.Textbox(label="Username")
|
| 495 |
-
password = gr.Textbox(label="Password", type="password")
|
| 496 |
-
login_btn = gr.Button("Login", variant="primary")
|
| 497 |
-
login_msg = gr.Markdown()
|
| 498 |
-
|
| 499 |
-
with gr.Group(visible=False) as main_page:
|
| 500 |
-
with gr.Row():
|
| 501 |
-
with gr.Column(scale=1):
|
| 502 |
-
nav_youtube = gr.Button("🎥 YouTube Tool")
|
| 503 |
-
logout_btn = gr.Button("🚪 Logout", variant="stop")
|
| 504 |
-
with gr.Column(scale=3):
|
| 505 |
-
youtube_page = gr.Group()
|
| 506 |
-
with youtube_page:
|
| 507 |
-
gr.Markdown("## 🎥 YouTube Tool")
|
| 508 |
-
video_url = gr.Textbox(label="YouTube URL", placeholder="https://youtube.com/watch?v=...")
|
| 509 |
-
keywords = gr.Textbox(label="Keywords for Recommendations", placeholder="e.g., Python programming")
|
| 510 |
-
analyze_btn = gr.Button("Analyze Video", variant="primary")
|
| 511 |
-
|
| 512 |
-
summary = gr.Textbox(label="Summary", lines=5)
|
| 513 |
-
sentiment = gr.Textbox(label="Sentiment Analysis")
|
| 514 |
-
recommendations = gr.Textbox(label="Recommendations", lines=10)
|
| 515 |
-
|
| 516 |
-
def login_check(user, pwd):
|
| 517 |
-
if USER_CREDENTIALS.get(user) == pwd:
|
| 518 |
-
return {login_page: gr.update(visible=False), main_page: gr.update(visible=True), login_msg: ""}
|
| 519 |
-
return {login_msg: "Invalid credentials"}
|
| 520 |
-
|
| 521 |
-
def show_youtube_page():
|
| 522 |
-
return {youtube_page: gr.update(visible=True)}
|
| 523 |
-
|
| 524 |
-
login_btn.click(login_check, inputs=[username, password], outputs=[login_page, main_page, login_msg])
|
| 525 |
-
nav_youtube.click(show_youtube_page, outputs=[youtube_page])
|
| 526 |
-
|
| 527 |
-
analyze_btn.click(
|
| 528 |
-
process_youtube_video,
|
| 529 |
-
inputs=[video_url, keywords],
|
| 530 |
-
outputs=[summary, sentiment, recommendations]
|
| 531 |
-
)
|
| 532 |
-
|
| 533 |
-
logout_btn.click(
|
| 534 |
-
lambda: {login_page: gr.update(visible=True), main_page: gr.update(visible=False)},
|
| 535 |
-
outputs=[login_page, main_page]
|
| 536 |
-
)
|
| 537 |
-
|
| 538 |
-
if __name__ == "__main__":
|
| 539 |
-
app.launch()
|
| 540 |
|
| 541 |
|
| 542 |
|
|
|
|
| 32 |
import requests
|
| 33 |
from textblob import TextBlob
|
| 34 |
import re
|
| 35 |
+
|
| 36 |
from huggingface_hub import login
|
| 37 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 38 |
def install_missing_packages():
|
|
|
|
| 356 |
# if __name__ == "__main__":
|
| 357 |
# app.launch()
|
| 358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
|
| 362 |
|