Sayiqa commited on
Commit
3850b64
·
verified ·
1 Parent(s): 46dbba3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -118
app.py CHANGED
@@ -130,142 +130,142 @@ import re
130
  from collections import Counter
131
  from googleapiclient.discovery import build
132
 
133
- def process_youtube_video(url="", keywords=""):
134
- try:
135
- #Initialize variables
136
- thumbnail = None
137
- summary = "No transcript available"
138
- sentiment_label = "N/A"
139
- recommendations = ""
140
- subtitle_info = "No additional information available"
141
-
142
- if not url.strip():
143
- return None, "Please enter a YouTube URL", "N/A", "", ""
144
-
145
- video_id = extract_video_id(url)
146
- if not video_id:
147
- return None, "Invalid YouTube URL", "N/A", "", ""
148
-
149
- thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
150
-
151
- try:
152
- # Fetch transcript
153
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
- transcript = None
155
- try:
156
- transcript = transcript_list.find_transcript(['en'])
157
- except:
158
- transcript = transcript_list.find_generated_transcript(['en'])
159
-
160
- text = " ".join([t['text'] for t in transcript.fetch()])
161
- if not text.strip():
162
- raise ValueError("Transcript is empty")
163
-
164
- # Clean up the text for sentiment analysis
165
- cleaned_text = clean_text_for_analysis(text)
166
-
167
- # Sentiment analysis
168
- sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
-
171
- # Generate summary
172
- model = genai.GenerativeModel("gemini-pro")
173
- summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
-
175
- # Extract subtitle information
176
- subtitle_info = extract_subtitle_info(cleaned_text)
177
-
178
- except TranscriptsDisabled:
179
- metadata = get_video_metadata(video_id)
180
- summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
- sentiment_label = "N/A"
182
- subtitle_info = "No subtitles available for analysis."
183
- except NoTranscriptFound:
184
- metadata = get_video_metadata(video_id)
185
- summary = metadata.get("description", "⚠️ No English transcript available.")
186
- sentiment_label = "N/A"
187
- subtitle_info = "No subtitles available for analysis."
188
- except Exception as e:
189
- return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
 
191
- # Get recommendations
192
- if keywords.strip():
193
- recommendations = get_recommendations(keywords)
194
 
195
- return thumbnail, summary, sentiment_label, subtitle_info, recommendations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
- except Exception as e:
198
- return None, f"Error: {str(e)}", "N/A", "", ""
199
 
200
 
201
- def extract_video_id(url):
202
- """
203
- Extracts the video ID from a YouTube URL.
204
- """
205
- match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
- return match.group(1) if match else None
207
 
208
 
209
- def get_video_metadata(video_id):
210
- """
211
- Fetches video metadata such as title and description using the YouTube Data API.
212
- """
213
- try:
214
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
- youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
- request = youtube.videos().list(part="snippet", id=video_id)
217
- response = request.execute()
218
 
219
- if "items" in response and len(response["items"]) > 0:
220
- snippet = response["items"][0]["snippet"]
221
- return {
222
- "title": snippet.get("title", "No title available"),
223
- "description": snippet.get("description", "No description available"),
224
- }
225
- return {}
226
 
227
- except Exception as e:
228
- return {"title": "Error fetching metadata", "description": str(e)}
229
 
230
 
231
- def extract_subtitle_info(text):
232
- """
233
- Extracts meaningful information from the subtitles.
234
- This could include topics, key insights, or a breakdown of the content.
235
- """
236
- try:
237
- # Split text into sentences for better analysis
238
- sentences = text.split(". ")
239
 
240
- # Example: Extract key topics or keywords
241
- words = text.split()
242
- common_words = Counter(words).most_common(10)
243
- key_topics = ", ".join([word for word, count in common_words])
244
 
245
- # Example: Provide a breakdown of the content
246
- info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
 
248
- return info
249
- except Exception as e:
250
- return f"Error extracting subtitle information: {str(e)}"
251
 
252
 
253
- def clean_text_for_analysis(text):
254
- """
255
- Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
- """
257
- # Remove extra spaces and line breaks
258
- cleaned_text = " ".join(text.split())
259
- return cleaned_text
260
 
261
 
262
- def get_recommendations(keywords):
263
- """
264
- Fetches related video recommendations based on the provided keywords.
265
- This function can be expanded with a proper API or custom logic.
266
- """
267
- # Placeholder for fetching recommendations based on keywords
268
- return f"Recommendations for: {keywords}" # Dummy return for now
269
  ######################################
270
  # from textblob import TextBlob
271
  # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
@@ -376,6 +376,79 @@ def get_recommendations(keywords):
376
  # print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)")
377
 
378
  # print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
 
381
 
 
130
  from collections import Counter
131
  from googleapiclient.discovery import build
132
 
133
+ # def process_youtube_video(url="", keywords=""):
134
+ # try:
135
+ # #Initialize variables
136
+ # thumbnail = None
137
+ # summary = "No transcript available"
138
+ # sentiment_label = "N/A"
139
+ # recommendations = ""
140
+ # subtitle_info = "No additional information available"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ # if not url.strip():
143
+ # return None, "Please enter a YouTube URL", "N/A", "", ""
 
144
 
145
+ # video_id = extract_video_id(url)
146
+ # if not video_id:
147
+ # return None, "Invalid YouTube URL", "N/A", "", ""
148
+
149
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
150
+
151
+ # try:
152
+ # # Fetch transcript
153
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
+ # transcript = None
155
+ # try:
156
+ # transcript = transcript_list.find_transcript(['en'])
157
+ # except:
158
+ # transcript = transcript_list.find_generated_transcript(['en'])
159
+
160
+ # text = " ".join([t['text'] for t in transcript.fetch()])
161
+ # if not text.strip():
162
+ # raise ValueError("Transcript is empty")
163
+
164
+ # # Clean up the text for sentiment analysis
165
+ # cleaned_text = clean_text_for_analysis(text)
166
+
167
+ # # Sentiment analysis
168
+ # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
+
171
+ # # Generate summary
172
+ # model = genai.GenerativeModel("gemini-pro")
173
+ # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
+
175
+ # # Extract subtitle information
176
+ # subtitle_info = extract_subtitle_info(cleaned_text)
177
+
178
+ # except TranscriptsDisabled:
179
+ # metadata = get_video_metadata(video_id)
180
+ # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
+ # sentiment_label = "N/A"
182
+ # subtitle_info = "No subtitles available for analysis."
183
+ # except NoTranscriptFound:
184
+ # metadata = get_video_metadata(video_id)
185
+ # summary = metadata.get("description", "⚠️ No English transcript available.")
186
+ # sentiment_label = "N/A"
187
+ # subtitle_info = "No subtitles available for analysis."
188
+ # except Exception as e:
189
+ # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
+
191
+ # # Get recommendations
192
+ # if keywords.strip():
193
+ # recommendations = get_recommendations(keywords)
194
+
195
+ # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
196
 
197
+ # except Exception as e:
198
+ # return None, f"Error: {str(e)}", "N/A", "", ""
199
 
200
 
201
+ # def extract_video_id(url):
202
+ # """
203
+ # Extracts the video ID from a YouTube URL.
204
+ # """
205
+ # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
+ # return match.group(1) if match else None
207
 
208
 
209
+ # def get_video_metadata(video_id):
210
+ # """
211
+ # Fetches video metadata such as title and description using the YouTube Data API.
212
+ # """
213
+ # try:
214
+ # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
+ # request = youtube.videos().list(part="snippet", id=video_id)
217
+ # response = request.execute()
218
 
219
+ # if "items" in response and len(response["items"]) > 0:
220
+ # snippet = response["items"][0]["snippet"]
221
+ # return {
222
+ # "title": snippet.get("title", "No title available"),
223
+ # "description": snippet.get("description", "No description available"),
224
+ # }
225
+ # return {}
226
 
227
+ # except Exception as e:
228
+ # return {"title": "Error fetching metadata", "description": str(e)}
229
 
230
 
231
+ # def extract_subtitle_info(text):
232
+ # """
233
+ # Extracts meaningful information from the subtitles.
234
+ # This could include topics, key insights, or a breakdown of the content.
235
+ # """
236
+ # try:
237
+ # # Split text into sentences for better analysis
238
+ # sentences = text.split(". ")
239
 
240
+ # # Example: Extract key topics or keywords
241
+ # words = text.split()
242
+ # common_words = Counter(words).most_common(10)
243
+ # key_topics = ", ".join([word for word, count in common_words])
244
 
245
+ # # Example: Provide a breakdown of the content
246
+ # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
 
248
+ # return info
249
+ # except Exception as e:
250
+ # return f"Error extracting subtitle information: {str(e)}"
251
 
252
 
253
+ # def clean_text_for_analysis(text):
254
+ # """
255
+ # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
+ # """
257
+ # # Remove extra spaces and line breaks
258
+ # cleaned_text = " ".join(text.split())
259
+ # return cleaned_text
260
 
261
 
262
+ # def get_recommendations(keywords):
263
+ # """
264
+ # Fetches related video recommendations based on the provided keywords.
265
+ # This function can be expanded with a proper API or custom logic.
266
+ # """
267
+ # # Placeholder for fetching recommendations based on keywords
268
+ # return f"Recommendations for: {keywords}" # Dummy return for now
269
  ######################################
270
  # from textblob import TextBlob
271
  # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
376
  # print(f"Negative: {dist['negative']} ({(dist['negative']/total*100):.1f}%)")
377
 
378
  # print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
379
+ #####################################################################################################
380
+ def process_youtube_video(url="", keywords=""):
381
+ try:
382
+ thumbnail = None
383
+ summary = ""
384
+ sentiment_label = "N/A"
385
+ recommendations = ""
386
+
387
+ if not url.strip():
388
+ return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
389
+
390
+ video_id = extract_video_id(url)
391
+ if not video_id:
392
+ return thumbnail, "Invalid YouTube URL", sentiment_label, recommendations
393
+
394
+ # Set thumbnail
395
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
396
+
397
+ try:
398
+ # Get transcript
399
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
400
+ transcript = None
401
+
402
+ # Try different transcript options
403
+ for lang in ['en', 'en-US', 'a.en']:
404
+ try:
405
+ transcript = transcript_list.find_transcript([lang])
406
+ break
407
+ except:
408
+ continue
409
+
410
+ if not transcript:
411
+ transcript = transcript_list.find_generated_transcript(['en'])
412
+
413
+ # Get transcript text
414
+ text = " ".join([t['text'] for t in transcript.fetch()])
415
+
416
+ # Clean text
417
+ cleaned_text = re.sub(r'[^\w\s.]', '', text)
418
+ cleaned_text = ' '.join(cleaned_text.split())
419
+
420
+ # Sentiment Analysis
421
+ blob = TextBlob(cleaned_text[:2000]) # Analyze first 2000 chars
422
+ polarity = blob.sentiment.polarity
423
+ subjectivity = blob.sentiment.subjectivity
424
+
425
+ sentiment_label = (
426
+ f"Sentiment: {'Positive' if polarity > 0 else 'Negative' if polarity < 0 else 'Neutral'}\n"
427
+ f"Confidence: {abs(polarity):.2f}\n"
428
+ f"Subjectivity: {subjectivity:.2f}"
429
+ )
430
+
431
+ # Generate summary
432
+ model = genai.GenerativeModel("gemini-pro")
433
+ summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text
434
+
435
+ except (TranscriptsDisabled, NoTranscriptFound):
436
+ return thumbnail, "⚠️ No English subtitles available", "N/A", recommendations
437
+ except Exception as e:
438
+ return thumbnail, f"⚠️ Error: {str(e)}", "N/A", recommendations
439
+
440
+ # Get recommendations
441
+ if keywords.strip():
442
+ recommendations = get_recommendations(keywords)
443
+
444
+ return thumbnail, summary, sentiment_label, recommendations
445
+
446
+ except Exception as e:
447
+ return None, f"Error: {str(e)}", "N/A", ""
448
+
449
+
450
+
451
+
452
 
453
 
454