Sayiqa7 commited on
Commit
86ec2ff
·
verified ·
1 Parent(s): 9cf0ee3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -48
app.py CHANGED
@@ -33,6 +33,7 @@ import re
33
  from googleapiclient.discovery import build
34
  from huggingface_hub import login
35
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
36
  def install_missing_packages():
37
  required_packages = {
38
  "torch":">=1.11.0",
@@ -109,16 +110,123 @@ courses_data = [
109
  ]
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def extract_video_id(url):
113
  # Improved regex to handle various YouTube URL formats
114
  match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
115
  return match.group(1) if match else None
116
 
117
-
118
  def get_video_metadata(video_id):
119
  try:
120
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
121
- request = youtube.videos().list(part="snippet", id=video_id)
122
  response = request.execute()
123
 
124
  if "items" in response and len(response["items"]) > 0:
@@ -126,6 +234,8 @@ def get_video_metadata(video_id):
126
  return {
127
  "title": snippet.get("title", "No title available"),
128
  "description": snippet.get("description", "No description available"),
 
 
129
  }
130
  return {}
131
 
@@ -135,12 +245,14 @@ def get_video_metadata(video_id):
135
  def clean_text_for_analysis(text):
136
  return " ".join(text.split())
137
 
138
- def generate_summary(content):
139
- return "\n".join([
140
- "**Key Insights**",
141
- "",
142
- content[:400] + "..." if len(content) > 400 else content
143
- ])
 
 
144
 
145
  def analyze_sentiment(text):
146
  sentiment = TextBlob(text).sentiment
@@ -151,40 +263,19 @@ def analyze_sentiment(text):
151
  )
152
  return f"{sentiment_label} ({sentiment.polarity:.2f})"
153
 
154
- def get_recommendations(keywords, max_results=5):
155
- if not keywords:
156
- return "Please provide search keywords"
157
- try:
158
- response = requests.get(
159
- "https://www.googleapis.com/youtube/v3/search",
160
- params={
161
- "part": "snippet",
162
- "q": f"educational {keywords}",
163
- "type": "video",
164
- "maxResults": max_results,
165
- "relevanceLanguage": "en",
166
- "key": YOUTUBE_API_KEY
167
- }
168
- ).json()
169
-
170
- results = []
171
- for item in response.get("items", []):
172
- title = item["snippet"]["title"]
173
- channel = item["snippet"]["channelTitle"]
174
- video_id = item["id"]["videoId"]
175
- results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
176
-
177
- return "\n".join(results) if results else "No recommendations found"
178
- except Exception as e:
179
- return f"Error: {str(e)}"
180
-
181
  def process_youtube_video(url):
182
  try:
183
  video_id = extract_video_id(url)
184
  if not video_id:
185
  return None, "Invalid YouTube URL", "N/A"
186
 
 
 
 
 
187
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
 
 
188
 
189
  try:
190
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -194,27 +285,26 @@ def process_youtube_video(url):
194
  except:
195
  transcript = transcript_list.find_generated_transcript(['en'])
196
 
197
- text = " ".join([t['text'] for t in transcript.fetch()])
198
- if not text.strip():
199
- raise ValueError("Transcript is empty")
200
-
201
- cleaned_text = clean_text_for_analysis(text)
202
- summary = generate_summary(cleaned_text)
203
  sentiment_label = analyze_sentiment(cleaned_text)
204
 
205
  except (TranscriptsDisabled, NoTranscriptFound):
206
- metadata = get_video_metadata(video_id)
207
- summary = generate_summary(metadata.get("description", "No subtitles available"))
208
- sentiment_label = "N/A"
209
 
210
- return thumbnail, summary, sentiment_label
211
 
212
  except Exception as e:
213
- return None, f"Error: {str(e)}", "N/A"
214
-
215
  url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
216
- thumbnail, summary, sentiment = process_youtube_video(url)
 
217
  print(f"Thumbnail: {thumbnail}\n")
 
 
218
  print(f"Summary:\n{summary}\n")
219
  print(f"Sentiment: {sentiment}")
220
 
 
33
  from googleapiclient.discovery import build
34
  from huggingface_hub import login
35
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
36
+ from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled
37
  def install_missing_packages():
38
  required_packages = {
39
  "torch":">=1.11.0",
 
110
  ]
111
 
112
 
113
+ # def extract_video_id(url):
114
+ # # Improved regex to handle various YouTube URL formats
115
+ # match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
116
+ # return match.group(1) if match else None
117
+
118
+
119
+ # def get_video_metadata(video_id):
120
+ # try:
121
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
122
+ # request = youtube.videos().list(part="snippet", id=video_id)
123
+ # response = request.execute()
124
+
125
+ # if "items" in response and len(response["items"]) > 0:
126
+ # snippet = response["items"][0]["snippet"]
127
+ # return {
128
+ # "title": snippet.get("title", "No title available"),
129
+ # "description": snippet.get("description", "No description available"),
130
+ # }
131
+ # return {}
132
+
133
+ # except Exception as e:
134
+ # return {"title": "Error fetching metadata", "description": str(e)}
135
+
136
+ # def clean_text_for_analysis(text):
137
+ # return " ".join(text.split())
138
+
139
+ # def generate_summary(content):
140
+ # return "\n".join([
141
+ # "**Key Insights**",
142
+ # "",
143
+ # content[:400] + "..." if len(content) > 400 else content
144
+ # ])
145
+
146
+ # def analyze_sentiment(text):
147
+ # sentiment = TextBlob(text).sentiment
148
+ # sentiment_label = (
149
+ # "Positive" if sentiment.polarity > 0 else
150
+ # "Negative" if sentiment.polarity < 0 else
151
+ # "Neutral"
152
+ # )
153
+ # return f"{sentiment_label} ({sentiment.polarity:.2f})"
154
+
155
+ # def get_recommendations(keywords, max_results=5):
156
+ # if not keywords:
157
+ # return "Please provide search keywords"
158
+ # try:
159
+ # response = requests.get(
160
+ # "https://www.googleapis.com/youtube/v3/search",
161
+ # params={
162
+ # "part": "snippet",
163
+ # "q": f"educational {keywords}",
164
+ # "type": "video",
165
+ # "maxResults": max_results,
166
+ # "relevanceLanguage": "en",
167
+ # "key": YOUTUBE_API_KEY
168
+ # }
169
+ # ).json()
170
+
171
+ # results = []
172
+ # for item in response.get("items", []):
173
+ # title = item["snippet"]["title"]
174
+ # channel = item["snippet"]["channelTitle"]
175
+ # video_id = item["id"]["videoId"]
176
+ # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
177
+
178
+ # return "\n".join(results) if results else "No recommendations found"
179
+ # except Exception as e:
180
+ # return f"Error: {str(e)}"
181
+
182
+ # def process_youtube_video(url):
183
+ # try:
184
+ # video_id = extract_video_id(url)
185
+ # if not video_id:
186
+ # return None, "Invalid YouTube URL", "N/A"
187
+
188
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
189
+
190
+ # try:
191
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
192
+ # transcript = None
193
+ # try:
194
+ # transcript = transcript_list.find_transcript(['en'])
195
+ # except:
196
+ # transcript = transcript_list.find_generated_transcript(['en'])
197
+
198
+ # text = " ".join([t['text'] for t in transcript.fetch()])
199
+ # if not text.strip():
200
+ # raise ValueError("Transcript is empty")
201
+
202
+ # cleaned_text = clean_text_for_analysis(text)
203
+ # summary = generate_summary(cleaned_text)
204
+ # sentiment_label = analyze_sentiment(cleaned_text)
205
+
206
+ # except (TranscriptsDisabled, NoTranscriptFound):
207
+ # metadata = get_video_metadata(video_id)
208
+ # summary = generate_summary(metadata.get("description", "No subtitles available"))
209
+ # sentiment_label = "N/A"
210
+
211
+ # return thumbnail, summary, sentiment_label
212
+
213
+ # except Exception as e:
214
+ # return None, f"Error: {str(e)}", "N/A"
215
+
216
+ # url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
217
+ # thumbnail, summary, sentiment = process_youtube_video(url)
218
+ # print(f"Thumbnail: {thumbnail}\n")
219
+ # print(f"Summary:\n{summary}\n")
220
+ # print(f"Sentiment: {sentiment}")
221
  def extract_video_id(url):
222
  # Improved regex to handle various YouTube URL formats
223
  match = re.search(r"(?:v=|\/|be\/|embed\/|watch\?v=)([0-9A-Za-z_-]{11})", url)
224
  return match.group(1) if match else None
225
 
 
226
  def get_video_metadata(video_id):
227
  try:
228
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
229
+ request = youtube.videos().list(part="snippet,contentDetails", id=video_id)
230
  response = request.execute()
231
 
232
  if "items" in response and len(response["items"]) > 0:
 
234
  return {
235
  "title": snippet.get("title", "No title available"),
236
  "description": snippet.get("description", "No description available"),
237
+ "publishedAt": snippet.get("publishedAt", "Unknown"),
238
+ "channelTitle": snippet.get("channelTitle", "Unknown"),
239
  }
240
  return {}
241
 
 
245
  def clean_text_for_analysis(text):
246
  return " ".join(text.split())
247
 
248
+ def generate_detailed_summary(transcript):
249
+ # Create a detailed summary from the transcript
250
+ detailed_summary = "**Transcript Breakdown:**\n"
251
+ for segment in transcript:
252
+ start_time = segment['start']
253
+ text = segment['text']
254
+ detailed_summary += f"- **[{start_time:.2f}s]:** {text}\n"
255
+ return detailed_summary
256
 
257
  def analyze_sentiment(text):
258
  sentiment = TextBlob(text).sentiment
 
263
  )
264
  return f"{sentiment_label} ({sentiment.polarity:.2f})"
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  def process_youtube_video(url):
267
  try:
268
  video_id = extract_video_id(url)
269
  if not video_id:
270
  return None, "Invalid YouTube URL", "N/A"
271
 
272
+ metadata = get_video_metadata(video_id)
273
+ title = metadata.get("title", "No title")
274
+ description = metadata.get("description", "No description available")
275
+
276
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
277
+ summary = ""
278
+ sentiment_label = "N/A"
279
 
280
  try:
281
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
285
  except:
286
  transcript = transcript_list.find_generated_transcript(['en'])
287
 
288
+ text_segments = transcript.fetch()
289
+ transcript_text = " ".join([segment['text'] for segment in text_segments])
290
+ cleaned_text = clean_text_for_analysis(transcript_text)
291
+ summary = generate_detailed_summary(text_segments)
 
 
292
  sentiment_label = analyze_sentiment(cleaned_text)
293
 
294
  except (TranscriptsDisabled, NoTranscriptFound):
295
+ summary = "No transcript available."
 
 
296
 
297
+ return thumbnail, title, description, summary, sentiment_label
298
 
299
  except Exception as e:
300
+ return None, f"Error: {str(e)}", "N/A", "N/A", "N/A"
301
+
302
  url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
303
+ thumbnail, title, description, summary, sentiment = process_youtube_video(url)
304
+
305
  print(f"Thumbnail: {thumbnail}\n")
306
+ print(f"Title: {title}\n")
307
+ print(f"Description:\n{description}\n")
308
  print(f"Summary:\n{summary}\n")
309
  print(f"Sentiment: {sentiment}")
310