Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -464,13 +464,13 @@ courses_data = [
|
|
| 464 |
(5, "Mathematics", "Ms. Smith", "Intermediate")
|
| 465 |
]
|
| 466 |
from transformers import pipeline
|
| 467 |
-
# Load Hugging Face summarization pipeline
|
| 468 |
-
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
| 469 |
-
|
| 470 |
def extract_video_id(url):
|
| 471 |
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
|
| 472 |
return match.group(1) if match else None
|
| 473 |
|
|
|
|
|
|
|
|
|
|
| 474 |
def get_video_metadata(video_id):
|
| 475 |
try:
|
| 476 |
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
|
|
@@ -484,78 +484,60 @@ def get_video_metadata(video_id):
|
|
| 484 |
"description": snippet.get("description", "No description available"),
|
| 485 |
}
|
| 486 |
return {}
|
| 487 |
-
|
| 488 |
except Exception as e:
|
| 489 |
return {"title": "Error fetching metadata", "description": str(e)}
|
| 490 |
|
| 491 |
-
def
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
"relevanceLanguage": "en",
|
| 506 |
-
"key": YOUTUBE_API_KEY
|
| 507 |
-
}
|
| 508 |
-
).json()
|
| 509 |
-
|
| 510 |
-
results = []
|
| 511 |
-
for item in response.get("items", []):
|
| 512 |
-
title = item["snippet"]["title"]
|
| 513 |
-
channel = item["snippet"]["channelTitle"]
|
| 514 |
-
video_id = item["id"]["videoId"]
|
| 515 |
-
results.append(f"\ud83d\udcfa {title}\n\ud83d\udc64 {channel}\n\ud83d\udd17 https://youtube.com/watch?v={video_id}\n")
|
| 516 |
-
|
| 517 |
-
return "\n".join(results) if results else "No recommendations found"
|
| 518 |
-
except Exception as e:
|
| 519 |
-
return f"Error: {str(e)}"
|
| 520 |
-
|
| 521 |
-
def summarize_text(text):
|
| 522 |
-
try:
|
| 523 |
-
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] # Summarize in chunks
|
| 524 |
-
summaries = summarizer(chunks, max_length=150, min_length=50, do_sample=False)
|
| 525 |
-
return " ".join([summary['summary_text'] for summary in summaries])
|
| 526 |
-
except Exception as e:
|
| 527 |
-
return f"Error during summarization: {str(e)}"
|
| 528 |
|
| 529 |
def process_youtube_video(url):
|
| 530 |
try:
|
| 531 |
-
thumbnail = None
|
| 532 |
-
detailed_summary = "No transcript available"
|
| 533 |
-
sentiment_label = "N/A"
|
| 534 |
-
|
| 535 |
video_id = extract_video_id(url)
|
| 536 |
if not video_id:
|
| 537 |
return None, "Invalid YouTube URL", "N/A"
|
| 538 |
|
| 539 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 540 |
|
|
|
|
|
|
|
|
|
|
| 541 |
try:
|
|
|
|
| 542 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 543 |
-
transcript =
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
except:
|
| 547 |
-
transcript = transcript_list.find_generated_transcript(['en'])
|
| 548 |
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
raise ValueError("Transcript is empty")
|
| 552 |
|
| 553 |
-
|
| 554 |
-
|
|
|
|
|
|
|
| 555 |
|
|
|
|
| 556 |
sentiment = TextBlob(cleaned_text).sentiment
|
| 557 |
sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
|
| 558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
except (TranscriptsDisabled, NoTranscriptFound):
|
| 560 |
metadata = get_video_metadata(video_id)
|
| 561 |
detailed_summary = metadata.get("description", "No subtitles available")
|
|
|
|
| 464 |
(5, "Mathematics", "Ms. Smith", "Intermediate")
|
| 465 |
]
|
| 466 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
| 467 |
def extract_video_id(url):
|
| 468 |
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
|
| 469 |
return match.group(1) if match else None
|
| 470 |
|
| 471 |
+
def clean_text(text):
|
| 472 |
+
return " ".join(text.split())
|
| 473 |
+
|
| 474 |
def get_video_metadata(video_id):
|
| 475 |
try:
|
| 476 |
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
|
|
|
|
| 484 |
"description": snippet.get("description", "No description available"),
|
| 485 |
}
|
| 486 |
return {}
|
|
|
|
| 487 |
except Exception as e:
|
| 488 |
return {"title": "Error fetching metadata", "description": str(e)}
|
| 489 |
|
| 490 |
+
def segment_transcript(transcript_text):
|
| 491 |
+
"""Segment transcript into sections like intro, body, and conclusion."""
|
| 492 |
+
lines = transcript_text.split(". ")
|
| 493 |
+
intro = ". ".join(lines[:3]) # First 3 lines for intro
|
| 494 |
+
body = ". ".join(lines[3:-2]) # Middle lines for body
|
| 495 |
+
conclusion = ". ".join(lines[-2:]) # Last 2 lines for conclusion
|
| 496 |
+
return {"intro": intro, "body": body, "conclusion": conclusion}
|
| 497 |
+
|
| 498 |
+
def summarize_text(text, summarizer):
|
| 499 |
+
"""Summarize text using the provided summarization model."""
|
| 500 |
+
max_chunk_size = 512
|
| 501 |
+
chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
|
| 502 |
+
summaries = summarizer(chunks, max_length=150, min_length=40, do_sample=False)
|
| 503 |
+
return " ".join(summary["summary_text"] for summary in summaries)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
def process_youtube_video(url):
|
| 506 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
video_id = extract_video_id(url)
|
| 508 |
if not video_id:
|
| 509 |
return None, "Invalid YouTube URL", "N/A"
|
| 510 |
|
| 511 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 512 |
|
| 513 |
+
# Load summarization model
|
| 514 |
+
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
| 515 |
+
|
| 516 |
try:
|
| 517 |
+
# Fetch transcript
|
| 518 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 519 |
+
transcript = transcript_list.find_transcript(['en']).fetch()
|
| 520 |
+
transcript_text = " ".join([t['text'] for t in transcript])
|
| 521 |
+
cleaned_text = clean_text(transcript_text)
|
|
|
|
|
|
|
| 522 |
|
| 523 |
+
# Segment transcript into sections
|
| 524 |
+
segments = segment_transcript(cleaned_text)
|
|
|
|
| 525 |
|
| 526 |
+
# Summarize each section
|
| 527 |
+
intro_summary = summarize_text(segments["intro"], summarizer)
|
| 528 |
+
body_summary = summarize_text(segments["body"], summarizer)
|
| 529 |
+
conclusion_summary = summarize_text(segments["conclusion"], summarizer)
|
| 530 |
|
| 531 |
+
# Sentiment analysis
|
| 532 |
sentiment = TextBlob(cleaned_text).sentiment
|
| 533 |
sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
|
| 534 |
|
| 535 |
+
detailed_summary = (
|
| 536 |
+
f"### Introduction\n{intro_summary}\n\n"
|
| 537 |
+
f"### Main Body\n{body_summary}\n\n"
|
| 538 |
+
f"### Conclusion\n{conclusion_summary}"
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
except (TranscriptsDisabled, NoTranscriptFound):
|
| 542 |
metadata = get_video_metadata(video_id)
|
| 543 |
detailed_summary = metadata.get("description", "No subtitles available")
|