Sai16216 commited on
Commit
2c568d7
Β·
verified Β·
1 Parent(s): aa51bf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -4,6 +4,8 @@ import google.generativeai as genai
4
  from dotenv import load_dotenv
5
  from pytube import YouTube
6
  from PyPDF2 import PdfReader
 
 
7
 
8
  load_dotenv()
9
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
@@ -75,27 +77,47 @@ Content:\n\n{text[:5000]}
75
  return f"Error reading PDF: {e}"
76
 
77
 
 
 
 
 
 
 
 
 
 
 
78
  def summarize_youtube(url):
79
  try:
80
- yt = YouTube(url)
81
- caption = yt.captions.get_by_language_code('en')
 
82
 
83
- if not caption:
84
- return "❌ No English captions available for this video."
 
 
 
 
 
85
 
86
- # Captions are in XML/TTML format – convert to plain text
87
- caption_text = caption.generate_srt_captions()
88
 
89
- if not caption_text.strip():
90
- return "❌ Could not extract meaningful caption text."
91
 
92
- prompt = f"Summarize the key educational points in the following YouTube video transcript:\n\n{caption_text[:5000]} in markdown points. Use Bullet points and sections."
93
  response = model.generate_content(prompt)
94
  return response.text
95
 
 
 
 
 
96
  except Exception as e:
97
  return f"❌ Error summarizing video: {e}"
98
 
 
99
  with gr.Blocks() as demo:
100
  gr.Markdown("# πŸ“˜ Study Buddy\nYour smart AI learning assistant!")
101
 
 
4
  from dotenv import load_dotenv
5
  from pytube import YouTube
6
  from PyPDF2 import PdfReader
7
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
8
+ from urllib.parse import urlparse, parse_qs
9
 
10
  load_dotenv()
11
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
77
  return f"Error reading PDF: {e}"
78
 
79
 
80
+ def get_video_id(url):
81
+ try:
82
+ parsed_url = urlparse(url)
83
+ if parsed_url.hostname in ['www.youtube.com', 'youtube.com']:
84
+ return parse_qs(parsed_url.query)['v'][0]
85
+ elif parsed_url.hostname == 'youtu.be':
86
+ return parsed_url.path[1:]
87
+ except Exception:
88
+ return None
89
+
90
  def summarize_youtube(url):
91
  try:
92
+ video_id = get_video_id(url)
93
+ if not video_id:
94
+ return "❌ Invalid YouTube URL."
95
 
96
+ # Fetch English transcript
97
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
98
+ transcript = None
99
+ try:
100
+ transcript = transcript_list.find_transcript(['en']) # Try manual transcript first
101
+ except NoTranscriptFound:
102
+ transcript = transcript_list.find_transcript(['en']).fetch() # Try auto-generated
103
 
104
+ transcript_text = " ".join([t['text'] for t in transcript.fetch()])
 
105
 
106
+ if len(transcript_text.strip()) == 0:
107
+ return "❌ Transcript is empty."
108
 
109
+ prompt = f"Summarize the key educational points in the following YouTube video transcript:\n\n{transcript_text[:5000]} in markdown format and Organize into sections."
110
  response = model.generate_content(prompt)
111
  return response.text
112
 
113
+ except TranscriptsDisabled:
114
+ return "❌ Transcripts are disabled for this video."
115
+ except NoTranscriptFound:
116
+ return "❌ No English transcript available for this video."
117
  except Exception as e:
118
  return f"❌ Error summarizing video: {e}"
119
 
120
+
121
  with gr.Blocks() as demo:
122
  gr.Markdown("# πŸ“˜ Study Buddy\nYour smart AI learning assistant!")
123