Sayiqa commited on
Commit
7d97b0f
·
verified ·
1 Parent(s): d778a04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -222
app.py CHANGED
@@ -130,253 +130,143 @@ import re
130
  from collections import Counter
131
  from googleapiclient.discovery import build
132
 
133
- # def process_youtube_video(url="", keywords=""):
134
- # try:
135
- # #Initialize variables
136
- # thumbnail = None
137
- # summary = "No transcript available"
138
- # sentiment_label = "N/A"
139
- # recommendations = ""
140
- # subtitle_info = "No additional information available"
141
-
142
- # if not url.strip():
143
- # return None, "Please enter a YouTube URL", "N/A", "", ""
144
-
145
- # video_id = extract_video_id(url)
146
- # if not video_id:
147
- # return None, "Invalid YouTube URL", "N/A", "", ""
148
-
149
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
150
-
151
- # try:
152
- # # Fetch transcript
153
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
- # transcript = None
155
- # try:
156
- # transcript = transcript_list.find_transcript(['en'])
157
- # except:
158
- # transcript = transcript_list.find_generated_transcript(['en'])
159
-
160
- # text = " ".join([t['text'] for t in transcript.fetch()])
161
- # if not text.strip():
162
- # raise ValueError("Transcript is empty")
163
-
164
- # # Clean up the text for sentiment analysis
165
- # cleaned_text = clean_text_for_analysis(text)
166
-
167
- # # Sentiment analysis
168
- # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
-
171
- # # Generate summary
172
- # model = genai.GenerativeModel("gemini-pro")
173
- # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
-
175
- # # Extract subtitle information
176
- # subtitle_info = extract_subtitle_info(cleaned_text)
177
-
178
- # except TranscriptsDisabled:
179
- # metadata = get_video_metadata(video_id)
180
- # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
- # sentiment_label = "N/A"
182
- # subtitle_info = "No subtitles available for analysis."
183
- # except NoTranscriptFound:
184
- # metadata = get_video_metadata(video_id)
185
- # summary = metadata.get("description", "⚠️ No English transcript available.")
186
- # sentiment_label = "N/A"
187
- # subtitle_info = "No subtitles available for analysis."
188
- # except Exception as e:
189
- # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
-
191
- # # Get recommendations
192
- # if keywords.strip():
193
- # recommendations = get_recommendations(keywords)
194
-
195
- # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
196
-
197
- # except Exception as e:
198
- # return None, f"Error: {str(e)}", "N/A", "", ""
199
-
200
-
201
- # def extract_video_id(url):
202
- # """
203
- # Extracts the video ID from a YouTube URL.
204
- # """
205
- # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
- # return match.group(1) if match else None
207
-
208
-
209
- # def get_video_metadata(video_id):
210
- # """
211
- # Fetches video metadata such as title and description using the YouTube Data API.
212
- # """
213
- # try:
214
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
- # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
- # request = youtube.videos().list(part="snippet", id=video_id)
217
- # response = request.execute()
218
-
219
- # if "items" in response and len(response["items"]) > 0:
220
- # snippet = response["items"][0]["snippet"]
221
- # return {
222
- # "title": snippet.get("title", "No title available"),
223
- # "description": snippet.get("description", "No description available"),
224
- # }
225
- # return {}
226
-
227
- # except Exception as e:
228
- # return {"title": "Error fetching metadata", "description": str(e)}
229
-
230
-
231
- # def extract_subtitle_info(text):
232
- # """
233
- # Extracts meaningful information from the subtitles.
234
- # This could include topics, key insights, or a breakdown of the content.
235
- # """
236
- # try:
237
- # # Split text into sentences for better analysis
238
- # sentences = text.split(". ")
239
-
240
- # # Example: Extract key topics or keywords
241
- # words = text.split()
242
- # common_words = Counter(words).most_common(10)
243
- # key_topics = ", ".join([word for word, count in common_words])
244
-
245
- # # Example: Provide a breakdown of the content
246
- # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
-
248
- # return info
249
- # except Exception as e:
250
- # return f"Error extracting subtitle information: {str(e)}"
251
-
252
-
253
- # def clean_text_for_analysis(text):
254
- # """
255
- # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
- # """
257
- # # Remove extra spaces and line breaks
258
- # cleaned_text = " ".join(text.split())
259
- # return cleaned_text
260
-
261
-
262
- # def get_recommendations(keywords):
263
- # """
264
- # Fetches related video recommendations based on the provided keywords.
265
- # This function can be expanded with a proper API or custom logic.
266
- # """
267
- # # Placeholder for fetching recommendations based on keywords
268
- # return f"Recommendations for: {keywords}" # Dummy return for now
269
- ######################################
270
- from textblob import TextBlob
271
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
272
- import re
273
- from collections import Counter
274
- from googleapiclient.discovery import build
275
- import os
276
-
277
- # Set your YouTube API key
278
- YOUTUBE_API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4" # Replace with your actual API key
279
- # Alternatively, you can set it as an environment variable:
280
- # YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')
281
-
282
- def process_youtube_video(url=""):
283
- """
284
- Process a YouTube video URL and return sentiment analysis of its content.
285
- """
286
  try:
287
- # Input validation
 
 
 
 
 
 
288
  if not url.strip():
289
- return {"error": "Please enter a YouTube URL"}
290
 
291
- # Extract video ID
292
  video_id = extract_video_id(url)
293
  if not video_id:
294
- return {"error": "Invalid YouTube URL"}
295
-
296
- # Get video transcript
297
- text = get_video_transcript(video_id)
298
- if isinstance(text, dict) and "error" in text:
299
- return text
300
 
301
- # Get video metadata
302
- metadata = get_video_metadata(video_id)
303
- if "error" in metadata:
304
- return metadata
305
 
306
- # Perform sentiment analysis
307
- sentiment_result = analyze_sentiment(text)
308
-
309
- return {
310
- "success": True,
311
- "metadata": metadata,
312
- "sentiment": sentiment_result,
313
- "video_id": video_id
314
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  except Exception as e:
317
- return {"error": f"An error occurred: {str(e)}"}
 
 
 
 
 
 
 
 
 
318
 
319
  def get_video_metadata(video_id):
320
  """
321
- Fetches video metadata using the YouTube Data API.
322
  """
323
  try:
 
324
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
325
- request = youtube.videos().list(
326
- part="snippet",
327
- id=video_id
328
- )
329
  response = request.execute()
330
 
331
- if response.get("items"):
332
  snippet = response["items"][0]["snippet"]
333
  return {
334
- "title": snippet.get("title", ""),
335
- "description": snippet.get("description", ""),
336
- "publishedAt": snippet.get("publishedAt", ""),
337
- "channelTitle": snippet.get("channelTitle", "")
338
  }
339
- return {"error": "Video not found"}
340
 
341
  except Exception as e:
342
- return {"error": f"Error fetching metadata: {str(e)}"}
343
 
344
- # [Previous functions remain the same: get_video_transcript, analyze_sentiment,
345
- # extract_video_id, clean_text_for_analysis, get_detailed_sentiment]
346
 
347
- # Example usage with proper error handling:
348
- if __name__ == "__main__":
349
- # Example with a real YouTube URL
350
- test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with any YouTube URL
351
-
352
- # Check if API key is set
353
- if YOUTUBE_API_KEY == "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98":
354
- print("Error: Please set your YouTube API key first!")
355
- else:
356
- result = process_youtube_video(test_url)
357
 
358
- if "error" in result:
359
- print(f"Error: {result['error']}")
360
- else:
361
- print("\n=== Video Information ===")
362
- print(f"Title: {result['metadata']['title']}")
363
- print(f"Channel: {result['metadata']['channelTitle']}")
364
-
365
- print("\n=== Sentiment Analysis Results ===")
366
- sentiment = result['sentiment']
367
- print(f"Overall Sentiment: {sentiment['overall_sentiment']}")
368
- print(f"Average Polarity: {sentiment['average_polarity']}")
369
-
370
- print("\nSentiment Distribution:")
371
- dist = sentiment['sentiment_distribution']
372
- total = sum(dist.values())
373
- if total > 0:
374
- print(f"Positive: {dist['positive']} ({(dist['positive']/total*100):.1f}%)")
375
- print(f"Neutral: {dist['neutral']} ({(dist['neutral']/total*100):.1f}%)")
376
- print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)")
377
-
378
- print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
379
- #####################################################################################################
 
 
 
 
 
 
 
 
380
 
381
  def get_recommendations(keywords, max_results=5):
382
  if not keywords:
 
130
  from collections import Counter
131
  from googleapiclient.discovery import build
132
 
133
+ def process_youtube_video(url="", keywords=""):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  try:
135
+ #Initialize variables
136
+ thumbnail = None
137
+ summary = "No transcript available"
138
+ sentiment_label = "N/A"
139
+ recommendations = ""
140
+ subtitle_info = "No additional information available"
141
+
142
  if not url.strip():
143
+ return None, "Please enter a YouTube URL", "N/A", "", ""
144
 
 
145
  video_id = extract_video_id(url)
146
  if not video_id:
147
+ return None, "Invalid YouTube URL", "N/A", "", ""
 
 
 
 
 
148
 
149
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
 
 
 
150
 
151
+ try:
152
+ # Fetch transcript
153
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
+ transcript = None
155
+ try:
156
+ transcript = transcript_list.find_transcript(['en'])
157
+ except:
158
+ transcript = transcript_list.find_generated_transcript(['en'])
159
+
160
+ text = " ".join([t['text'] for t in transcript.fetch()])
161
+ if not text.strip():
162
+ raise ValueError("Transcript is empty")
163
+
164
+ # Clean up the text for sentiment analysis
165
+ cleaned_text = clean_text_for_analysis(text)
166
+
167
+ # Sentiment analysis
168
+ sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
+
171
+ # Generate summary
172
+ model = genai.GenerativeModel("gemini-pro")
173
+ summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
+
175
+ # Extract subtitle information
176
+ subtitle_info = extract_subtitle_info(cleaned_text)
177
+
178
+ except TranscriptsDisabled:
179
+ metadata = get_video_metadata(video_id)
180
+ summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
+ sentiment_label = "N/A"
182
+ subtitle_info = "No subtitles available for analysis."
183
+ except NoTranscriptFound:
184
+ metadata = get_video_metadata(video_id)
185
+ summary = metadata.get("description", "⚠️ No English transcript available.")
186
+ sentiment_label = "N/A"
187
+ subtitle_info = "No subtitles available for analysis."
188
+ except Exception as e:
189
+ return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
+
191
+ # Get recommendations
192
+ if keywords.strip():
193
+ recommendations = get_recommendations(keywords)
194
+
195
+ return thumbnail, summary, sentiment_label, subtitle_info, recommendations
196
 
197
  except Exception as e:
198
+ return None, f"Error: {str(e)}", "N/A", "", ""
199
+
200
+
201
+ def extract_video_id(url):
202
+ """
203
+ Extracts the video ID from a YouTube URL.
204
+ """
205
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
+ return match.group(1) if match else None
207
+
208
 
209
  def get_video_metadata(video_id):
210
  """
211
+ Fetches video metadata such as title and description using the YouTube Data API.
212
  """
213
  try:
214
+ YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
+ request = youtube.videos().list(part="snippet", id=video_id)
 
 
 
217
  response = request.execute()
218
 
219
+ if "items" in response and len(response["items"]) > 0:
220
  snippet = response["items"][0]["snippet"]
221
  return {
222
+ "title": snippet.get("title", "No title available"),
223
+ "description": snippet.get("description", "No description available"),
 
 
224
  }
225
+ return {}
226
 
227
  except Exception as e:
228
+ return {"title": "Error fetching metadata", "description": str(e)}
229
 
 
 
230
 
231
+ def extract_subtitle_info(text):
232
+ """
233
+ Extracts meaningful information from the subtitles.
234
+ This could include topics, key insights, or a breakdown of the content.
235
+ """
236
+ try:
237
+ # Split text into sentences for better analysis
238
+ sentences = text.split(". ")
 
 
239
 
240
+ # Example: Extract key topics or keywords
241
+ words = text.split()
242
+ common_words = Counter(words).most_common(10)
243
+ key_topics = ", ".join([word for word, count in common_words])
244
+
245
+ # Example: Provide a breakdown of the content
246
+ info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
+
248
+ return info
249
+ except Exception as e:
250
+ return f"Error extracting subtitle information: {str(e)}"
251
+
252
+
253
+ def clean_text_for_analysis(text):
254
+ """
255
+ Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
+ """
257
+ # Remove extra spaces and line breaks
258
+ cleaned_text = " ".join(text.split())
259
+ return cleaned_text
260
+
261
+
262
+ def get_recommendations(keywords):
263
+ """
264
+ Fetches related video recommendations based on the provided keywords.
265
+ This function can be expanded with a proper API or custom logic.
266
+ """
267
+ # Placeholder for fetching recommendations based on keywords
268
+ return f"Recommendations for: {keywords}" # Dummy return for now
269
+ ######################################
270
 
271
  def get_recommendations(keywords, max_results=5):
272
  if not keywords: