Sayiqa7 commited on
Commit
7a199b6
·
verified ·
1 Parent(s): 17d2cf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -55
app.py CHANGED
@@ -464,13 +464,13 @@ courses_data = [
464
  (5, "Mathematics", "Ms. Smith", "Intermediate")
465
  ]
466
  from transformers import pipeline
467
- # Load Hugging Face summarization pipeline
468
- summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
469
-
470
  def extract_video_id(url):
471
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
472
  return match.group(1) if match else None
473
 
 
 
 
474
  def get_video_metadata(video_id):
475
  try:
476
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
@@ -484,78 +484,60 @@ def get_video_metadata(video_id):
484
  "description": snippet.get("description", "No description available"),
485
  }
486
  return {}
487
-
488
  except Exception as e:
489
  return {"title": "Error fetching metadata", "description": str(e)}
490
 
491
- def clean_text_for_analysis(text):
492
- return " ".join(text.split())
493
-
494
- def get_recommendations(keywords, max_results=5):
495
- if not keywords:
496
- return "Please provide search keywords"
497
- try:
498
- response = requests.get(
499
- "https://www.googleapis.com/youtube/v3/search",
500
- params={
501
- "part": "snippet",
502
- "q": f"educational {keywords}",
503
- "type": "video",
504
- "maxResults": max_results,
505
- "relevanceLanguage": "en",
506
- "key": YOUTUBE_API_KEY
507
- }
508
- ).json()
509
-
510
- results = []
511
- for item in response.get("items", []):
512
- title = item["snippet"]["title"]
513
- channel = item["snippet"]["channelTitle"]
514
- video_id = item["id"]["videoId"]
515
- results.append(f"\ud83d\udcfa {title}\n\ud83d\udc64 {channel}\n\ud83d\udd17 https://youtube.com/watch?v={video_id}\n")
516
-
517
- return "\n".join(results) if results else "No recommendations found"
518
- except Exception as e:
519
- return f"Error: {str(e)}"
520
-
521
- def summarize_text(text):
522
- try:
523
- chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] # Summarize in chunks
524
- summaries = summarizer(chunks, max_length=150, min_length=50, do_sample=False)
525
- return " ".join([summary['summary_text'] for summary in summaries])
526
- except Exception as e:
527
- return f"Error during summarization: {str(e)}"
528
 
529
  def process_youtube_video(url):
530
  try:
531
- thumbnail = None
532
- detailed_summary = "No transcript available"
533
- sentiment_label = "N/A"
534
-
535
  video_id = extract_video_id(url)
536
  if not video_id:
537
  return None, "Invalid YouTube URL", "N/A"
538
 
539
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
540
 
 
 
 
541
  try:
 
542
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
543
- transcript = None
544
- try:
545
- transcript = transcript_list.find_transcript(['en'])
546
- except:
547
- transcript = transcript_list.find_generated_transcript(['en'])
548
 
549
- text = " ".join([t['text'] for t in transcript.fetch()])
550
- if not text.strip():
551
- raise ValueError("Transcript is empty")
552
 
553
- cleaned_text = clean_text_for_analysis(text)
554
- detailed_summary = summarize_text(cleaned_text)
 
 
555
 
 
556
  sentiment = TextBlob(cleaned_text).sentiment
557
  sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
558
 
 
 
 
 
 
 
559
  except (TranscriptsDisabled, NoTranscriptFound):
560
  metadata = get_video_metadata(video_id)
561
  detailed_summary = metadata.get("description", "No subtitles available")
 
464
  (5, "Mathematics", "Ms. Smith", "Intermediate")
465
  ]
466
  from transformers import pipeline
 
 
 
467
  def extract_video_id(url):
468
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
469
  return match.group(1) if match else None
470
 
471
+ def clean_text(text):
472
+ return " ".join(text.split())
473
+
474
  def get_video_metadata(video_id):
475
  try:
476
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
 
484
  "description": snippet.get("description", "No description available"),
485
  }
486
  return {}
 
487
  except Exception as e:
488
  return {"title": "Error fetching metadata", "description": str(e)}
489
 
490
+ def segment_transcript(transcript_text):
491
+ """Segment transcript into sections like intro, body, and conclusion."""
492
+ lines = transcript_text.split(". ")
493
+ intro = ". ".join(lines[:3]) # First 3 lines for intro
494
+ body = ". ".join(lines[3:-2]) # Middle lines for body
495
+ conclusion = ". ".join(lines[-2:]) # Last 2 lines for conclusion
496
+ return {"intro": intro, "body": body, "conclusion": conclusion}
497
+
498
+ def summarize_text(text, summarizer):
499
+ """Summarize text using the provided summarization model."""
500
+ max_chunk_size = 512
501
+ chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
502
+ summaries = summarizer(chunks, max_length=150, min_length=40, do_sample=False)
503
+ return " ".join(summary["summary_text"] for summary in summaries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
  def process_youtube_video(url):
506
  try:
 
 
 
 
507
  video_id = extract_video_id(url)
508
  if not video_id:
509
  return None, "Invalid YouTube URL", "N/A"
510
 
511
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
512
 
513
+ # Load summarization model
514
+ summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
515
+
516
  try:
517
+ # Fetch transcript
518
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
519
+ transcript = transcript_list.find_transcript(['en']).fetch()
520
+ transcript_text = " ".join([t['text'] for t in transcript])
521
+ cleaned_text = clean_text(transcript_text)
 
 
522
 
523
+ # Segment transcript into sections
524
+ segments = segment_transcript(cleaned_text)
 
525
 
526
+ # Summarize each section
527
+ intro_summary = summarize_text(segments["intro"], summarizer)
528
+ body_summary = summarize_text(segments["body"], summarizer)
529
+ conclusion_summary = summarize_text(segments["conclusion"], summarizer)
530
 
531
+ # Sentiment analysis
532
  sentiment = TextBlob(cleaned_text).sentiment
533
  sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
534
 
535
+ detailed_summary = (
536
+ f"### Introduction\n{intro_summary}\n\n"
537
+ f"### Main Body\n{body_summary}\n\n"
538
+ f"### Conclusion\n{conclusion_summary}"
539
+ )
540
+
541
  except (TranscriptsDisabled, NoTranscriptFound):
542
  metadata = get_video_metadata(video_id)
543
  detailed_summary = metadata.get("description", "No subtitles available")