Sayiqa commited on
Commit
a61dc77
·
verified ·
1 Parent(s): 444eea5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -81
app.py CHANGED
@@ -136,44 +136,170 @@ from googleapiclient.discovery import build
136
 
137
  YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your actual API key
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  def extract_video_id(url):
140
- match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
141
- return match.group(1) if match else None
 
 
 
 
 
 
 
 
142
 
143
- def get_video_metadata(video_id):
 
144
  try:
145
- youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
146
- request = youtube.videos().list(part="snippet", id=video_id)
147
- response = request.execute()
148
-
149
- if "items" in response and len(response["items"]) > 0:
150
- snippet = response["items"][0]["snippet"]
151
- return {
152
- "title": snippet.get("title", "No title available"),
153
- "description": snippet.get("description", "No description available"),
154
- }
155
- return {}
156
 
157
- except Exception as e:
158
- return {"title": "Error fetching metadata", "description": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- def clean_text_for_analysis(text):
161
- return " ".join(text.split())
162
 
163
- def extract_subtitle_info(text):
164
- try:
165
- sentences = text.split(". ")
166
- words = text.split()
167
- common_words = Counter(words).most_common(10)
168
- key_topics = ", ".join([word for word, count in common_words])
169
- info = {
170
- "key_topics": key_topics,
171
- "sentence_count": len(sentences),
172
- "word_count": len(words),
173
- }
174
- return info
175
  except Exception as e:
176
- return {"error": str(e)}
177
 
178
  def get_recommendations(keywords, max_results=5):
179
  if not keywords:
@@ -190,66 +316,18 @@ def get_recommendations(keywords, max_results=5):
190
  "key": YOUTUBE_API_KEY
191
  }
192
  ).json()
193
-
194
  results = []
195
  for item in response.get("items", []):
196
  title = item["snippet"]["title"]
197
  channel = item["snippet"]["channelTitle"]
198
  video_id = item["id"]["videoId"]
199
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
200
-
201
  return "\n".join(results) if results else "No recommendations found"
202
  except Exception as e:
203
  return f"Error: {str(e)}"
204
 
205
- def process_youtube_video(url, keywords):
206
- try:
207
- thumbnail = None
208
- summary = "No transcript available"
209
- sentiment_label = "N/A"
210
- subtitle_info = {}
211
- recommendations = ""
212
-
213
- video_id = extract_video_id(url)
214
- if not video_id:
215
- return None, "Invalid YouTube URL", "N/A", {}, ""
216
-
217
- thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
218
-
219
- try:
220
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
221
- transcript = None
222
- try:
223
- transcript = transcript_list.find_transcript(['en'])
224
- except:
225
- transcript = transcript_list.find_generated_transcript(['en'])
226
-
227
- text = " ".join([t['text'] for t in transcript.fetch()])
228
- if not text.strip():
229
- raise ValueError("Transcript is empty")
230
-
231
- cleaned_text = clean_text_for_analysis(text)
232
- subtitle_info = extract_subtitle_info(cleaned_text)
233
-
234
- sentiment = TextBlob(cleaned_text).sentiment
235
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
236
-
237
- summary = f"Summary: {cleaned_text[:400]}..."
238
-
239
- except (TranscriptsDisabled, NoTranscriptFound):
240
- metadata = get_video_metadata(video_id)
241
- summary = metadata.get("description", "No subtitles available")
242
- sentiment_label = "N/A"
243
-
244
- if keywords.strip():
245
- recommendations = get_recommendations(keywords)
246
-
247
- return thumbnail, summary, sentiment_label, subtitle_info, recommendations
248
-
249
- except Exception as e:
250
- return None, f"Error: {str(e)}", "N/A", {}, ""
251
-
252
-
253
 
254
 
255
  # Gradio Interface
 
136
 
137
  YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your actual API key
138
 
139
+ # def extract_video_id(url):
140
+ # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
141
+ # return match.group(1) if match else None
142
+
143
+ # def get_video_metadata(video_id):
144
+ # try:
145
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
146
+ # request = youtube.videos().list(part="snippet", id=video_id)
147
+ # response = request.execute()
148
+
149
+ # if "items" in response and len(response["items"]) > 0:
150
+ # snippet = response["items"][0]["snippet"]
151
+ # return {
152
+ # "title": snippet.get("title", "No title available"),
153
+ # "description": snippet.get("description", "No description available"),
154
+ # }
155
+ # return {}
156
+
157
+ # except Exception as e:
158
+ # return {"title": "Error fetching metadata", "description": str(e)}
159
+
160
+ # def clean_text_for_analysis(text):
161
+ # return " ".join(text.split())
162
+
163
+ # def extract_subtitle_info(text):
164
+ # try:
165
+ # sentences = text.split(". ")
166
+ # words = text.split()
167
+ # common_words = Counter(words).most_common(10)
168
+ # key_topics = ", ".join([word for word, count in common_words])
169
+ # info = {
170
+ # "key_topics": key_topics,
171
+ # "sentence_count": len(sentences),
172
+ # "word_count": len(words),
173
+ # }
174
+ # return info
175
+ # except Exception as e:
176
+ # return {"error": str(e)}
177
+
178
+ # def get_recommendations(keywords, max_results=5):
179
+ # if not keywords:
180
+ # return "Please provide search keywords"
181
+ # try:
182
+ # response = requests.get(
183
+ # "https://www.googleapis.com/youtube/v3/search",
184
+ # params={
185
+ # "part": "snippet",
186
+ # "q": f"educational {keywords}",
187
+ # "type": "video",
188
+ # "maxResults": max_results,
189
+ # "relevanceLanguage": "en",
190
+ # "key": YOUTUBE_API_KEY
191
+ # }
192
+ # ).json()
193
+
194
+ # results = []
195
+ # for item in response.get("items", []):
196
+ # title = item["snippet"]["title"]
197
+ # channel = item["snippet"]["channelTitle"]
198
+ # video_id = item["id"]["videoId"]
199
+ # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
200
+
201
+ # return "\n".join(results) if results else "No recommendations found"
202
+ # except Exception as e:
203
+ # return f"Error: {str(e)}"
204
+
205
+ # def process_youtube_video(url, keywords):
206
+ # try:
207
+ # thumbnail = None
208
+ # summary = "No transcript available"
209
+ # sentiment_label = "N/A"
210
+ # subtitle_info = {}
211
+ # recommendations = ""
212
+
213
+ # video_id = extract_video_id(url)
214
+ # if not video_id:
215
+ # return None, "Invalid YouTube URL", "N/A", {}, ""
216
+
217
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
218
+
219
+ # try:
220
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
221
+ # transcript = None
222
+ # try:
223
+ # transcript = transcript_list.find_transcript(['en'])
224
+ # except:
225
+ # transcript = transcript_list.find_generated_transcript(['en'])
226
+
227
+ # text = " ".join([t['text'] for t in transcript.fetch()])
228
+ # if not text.strip():
229
+ # raise ValueError("Transcript is empty")
230
+
231
+ # cleaned_text = clean_text_for_analysis(text)
232
+ # subtitle_info = extract_subtitle_info(cleaned_text)
233
+
234
+ # sentiment = TextBlob(cleaned_text).sentiment
235
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
236
+
237
+ # summary = f"Summary: {cleaned_text[:400]}..."
238
+
239
+ # except (TranscriptsDisabled, NoTranscriptFound):
240
+ # metadata = get_video_metadata(video_id)
241
+ # summary = metadata.get("description", "No subtitles available")
242
+ # sentiment_label = "N/A"
243
+
244
+ # if keywords.strip():
245
+ # recommendations = get_recommendations(keywords)
246
+
247
+ # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
248
+
249
+ # except Exception as e:
250
+ # return None, f"Error: {str(e)}", "N/A", {}, ""
251
+ def sanitize_text(text):
252
+ """Remove invalid Unicode characters."""
253
+ return text.encode("utf-8", "replace").decode("utf-8")
254
+
255
  def extract_video_id(url):
256
+ if not url:
257
+ return None
258
+ patterns = [
259
+ r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
260
+ ]
261
+ for pattern in patterns:
262
+ match = re.search(pattern, url)
263
+ if match:
264
+ return match.group(1)
265
+ return None
266
 
267
+ def process_youtube_video(url="", keywords=""):
268
+ """Process either video URL or keywords or both"""
269
  try:
270
+ thumbnail = None
271
+ summary = ""
272
+ sentiment_label = "N/A"
273
+ recommendations = ""
 
 
 
 
 
 
 
274
 
275
+ if url.strip():
276
+ video_id = extract_video_id(url)
277
+ if video_id:
278
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
279
+ text = " ".join([t['text'] for t in transcript])
280
+
281
+ # Sanitize text
282
+ text = sanitize_text(text)
283
+
284
+ model = genai.GenerativeModel("gemini-pro")
285
+ summary = model.generate_content(f"Summarize this: {text}").text
286
+
287
+ sentiment = TextBlob(text).sentiment
288
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
289
+
290
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
291
 
292
+ if keywords.strip():
293
+ recommendations = get_recommendations(keywords)
294
 
295
+ return (
296
+ thumbnail,
297
+ summary or "Enter a video URL for analysis",
298
+ sentiment_label,
299
+ recommendations or "Enter keywords for recommendations"
300
+ )
 
 
 
 
 
 
301
  except Exception as e:
302
+ return None, f"Error: {str(e)}", "N/A", ""
303
 
304
  def get_recommendations(keywords, max_results=5):
305
  if not keywords:
 
316
  "key": YOUTUBE_API_KEY
317
  }
318
  ).json()
319
+
320
  results = []
321
  for item in response.get("items", []):
322
  title = item["snippet"]["title"]
323
  channel = item["snippet"]["channelTitle"]
324
  video_id = item["id"]["videoId"]
325
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
326
+
327
  return "\n".join(results) if results else "No recommendations found"
328
  except Exception as e:
329
  return f"Error: {str(e)}"
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
 
333
  # Gradio Interface