Sayiqa7 commited on
Commit
a274f10
·
verified ·
1 Parent(s): dbac2c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -83
app.py CHANGED
@@ -33,7 +33,6 @@ import re
33
  from googleapiclient.discovery import build
34
  from huggingface_hub import login
35
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
36
- from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled
37
  def install_missing_packages():
38
  required_packages = {
39
  "torch":">=1.11.0",
@@ -109,17 +108,14 @@ courses_data = [
109
  (5, "Mathematics", "Ms. Smith", "Intermediate")
110
  ]
111
 
112
-
113
-
114
  def extract_video_id(url):
115
- # Improved regex to handle various YouTube URL formats
116
- match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
117
  return match.group(1) if match else None
118
 
119
  def get_video_metadata(video_id):
120
  try:
121
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
122
- request = youtube.videos().list(part="snippet,contentDetails", id=video_id)
123
  response = request.execute()
124
 
125
  if "items" in response and len(response["items"]) > 0:
@@ -127,8 +123,6 @@ def get_video_metadata(video_id):
127
  return {
128
  "title": snippet.get("title", "No title available"),
129
  "description": snippet.get("description", "No description available"),
130
- "publishedAt": snippet.get("publishedAt", "Unknown"),
131
- "channelTitle": snippet.get("channelTitle", "Unknown"),
132
  }
133
  return {}
134
 
@@ -138,50 +132,6 @@ def get_video_metadata(video_id):
138
  def clean_text_for_analysis(text):
139
  return " ".join(text.split())
140
 
141
- def segment_transcript(transcript):
142
- # Segment the transcript into introduction, key points, main body, and conclusion
143
- intro, key_points, main_body, conclusion = [], [], [], []
144
- total_segments = len(transcript)
145
-
146
- for idx, segment in enumerate(transcript):
147
- start_time = segment['start']
148
- text = segment['text']
149
-
150
- # Use rough heuristics to segment transcript
151
- if idx < total_segments * 0.1: # First 10% as introduction
152
- intro.append(text)
153
- elif idx < total_segments * 0.5: # Next 40% as key points
154
- key_points.append(text)
155
- elif idx < total_segments * 0.9: # Next 40% as main body
156
- main_body.append(text)
157
- else: # Last 10% as conclusion
158
- conclusion.append(text)
159
-
160
- return {
161
- "introduction": " ".join(intro),
162
- "key_points": " ".join(key_points),
163
- "main_body": " ".join(main_body),
164
- "conclusion": " ".join(conclusion),
165
- }
166
-
167
- def generate_summary(segments):
168
- # Generate a formatted summary
169
- return (
170
- "**Introduction:**\n" + segments["introduction"][:400] + "...\n\n" +
171
- "**Key Points:**\n" + segments["key_points"][:400] + "...\n\n" +
172
- "**Main Body:**\n" + segments["main_body"][:400] + "...\n\n" +
173
- "**Conclusion:**\n" + segments["conclusion"][:400] + "...\n"
174
- )
175
-
176
- def analyze_sentiment(text):
177
- sentiment = TextBlob(text).sentiment
178
- sentiment_label = (
179
- "Positive" if sentiment.polarity > 0 else
180
- "Negative" if sentiment.polarity < 0 else
181
- "Neutral"
182
- )
183
- return f"{sentiment_label} ({sentiment.polarity:.2f})"
184
-
185
  def get_recommendations(keywords, max_results=5):
186
  if not keywords:
187
  return "Please provide search keywords"
@@ -190,9 +140,10 @@ def get_recommendations(keywords, max_results=5):
190
  "https://www.googleapis.com/youtube/v3/search",
191
  params={
192
  "part": "snippet",
193
- "q": keywords,
194
  "type": "video",
195
  "maxResults": max_results,
 
196
  "key": YOUTUBE_API_KEY
197
  }
198
  ).json()
@@ -210,17 +161,15 @@ def get_recommendations(keywords, max_results=5):
210
 
211
  def process_youtube_video(url):
212
  try:
 
 
 
 
213
  video_id = extract_video_id(url)
214
  if not video_id:
215
  return None, "Invalid YouTube URL", "N/A"
216
 
217
- metadata = get_video_metadata(video_id)
218
- title = metadata.get("title", "No title")
219
- description = metadata.get("description", "No description available")
220
-
221
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
222
- summary = ""
223
- sentiment_label = "N/A"
224
 
225
  try:
226
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -230,33 +179,26 @@ def process_youtube_video(url):
230
  except:
231
  transcript = transcript_list.find_generated_transcript(['en'])
232
 
233
- text_segments = transcript.fetch()
234
- transcript_text = " ".join([segment['text'] for segment in text_segments])
235
- cleaned_text = clean_text_for_analysis(transcript_text)
236
- segmented = segment_transcript(text_segments)
237
- summary = generate_summary(segmented)
238
- sentiment_label = analyze_sentiment(cleaned_text)
239
-
240
- except (TranscriptsDisabled, NoTranscriptFound):
241
- summary = "No transcript available."
242
-
243
- return thumbnail, title, description, summary, sentiment_label
244
-
245
- except Exception as e:
246
- return None, f"Error: {str(e)}", "N/A", "N/A", "N/A"
247
 
248
- url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
249
- thumbnail, title, description, summary, sentiment = process_youtube_video(url)
250
 
251
- print(f"Thumbnail: {thumbnail}\n")
252
- print(f"Title: {title}\n")
253
- print(f"Description:\n{description}\n")
254
- print(f"Summary:\n{summary}\n")
255
- print(f"Sentiment: {sentiment}")
256
 
 
257
 
 
 
 
 
258
 
 
259
 
 
 
260
 
261
  # Gradio Interface
262
  with gr.Blocks(theme=gr.themes.Soft()) as app:
@@ -409,6 +351,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
409
 
410
  if __name__ == "__main__":
411
  app.launch()
412
-
413
-
414
-
 
33
  from googleapiclient.discovery import build
34
  from huggingface_hub import login
35
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
36
  def install_missing_packages():
37
  required_packages = {
38
  "torch":">=1.11.0",
 
108
  (5, "Mathematics", "Ms. Smith", "Intermediate")
109
  ]
110
 
 
 
111
  def extract_video_id(url):
112
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
 
113
  return match.group(1) if match else None
114
 
115
  def get_video_metadata(video_id):
116
  try:
117
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
118
+ request = youtube.videos().list(part="snippet", id=video_id)
119
  response = request.execute()
120
 
121
  if "items" in response and len(response["items"]) > 0:
 
123
  return {
124
  "title": snippet.get("title", "No title available"),
125
  "description": snippet.get("description", "No description available"),
 
 
126
  }
127
  return {}
128
 
 
132
  def clean_text_for_analysis(text):
133
  return " ".join(text.split())
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def get_recommendations(keywords, max_results=5):
136
  if not keywords:
137
  return "Please provide search keywords"
 
140
  "https://www.googleapis.com/youtube/v3/search",
141
  params={
142
  "part": "snippet",
143
+ "q": f"educational {keywords}",
144
  "type": "video",
145
  "maxResults": max_results,
146
+ "relevanceLanguage": "en",
147
  "key": YOUTUBE_API_KEY
148
  }
149
  ).json()
 
161
 
162
  def process_youtube_video(url):
163
  try:
164
+ thumbnail = None
165
+ summary = "No transcript available"
166
+ sentiment_label = "N/A"
167
+
168
  video_id = extract_video_id(url)
169
  if not video_id:
170
  return None, "Invalid YouTube URL", "N/A"
171
 
 
 
 
 
172
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
 
 
173
 
174
  try:
175
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
179
  except:
180
  transcript = transcript_list.find_generated_transcript(['en'])
181
 
182
+ text = " ".join([t['text'] for t in transcript.fetch()])
183
+ if not text.strip():
184
+ raise ValueError("Transcript is empty")
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ cleaned_text = clean_text_for_analysis(text)
 
187
 
188
+ sentiment = TextBlob(cleaned_text).sentiment
189
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
 
 
 
190
 
191
+ summary = f"Summary: {cleaned_text[:400]}..."
192
 
193
+ except (TranscriptsDisabled, NoTranscriptFound):
194
+ metadata = get_video_metadata(video_id)
195
+ summary = metadata.get("description", "No subtitles available")
196
+ sentiment_label = "N/A"
197
 
198
+ return thumbnail, summary, sentiment_label
199
 
200
+ except Exception as e:
201
+ return None, f"Error: {str(e)}", "N/A"
202
 
203
  # Gradio Interface
204
  with gr.Blocks(theme=gr.themes.Soft()) as app:
 
351
 
352
  if __name__ == "__main__":
353
  app.launch()