Sayiqa commited on
Commit
d6fb65a
·
verified ·
1 Parent(s): 9d201df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +523 -349
app.py CHANGED
@@ -1,51 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
- subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
- subprocess.check_call(["pip", "install", "torch>=1.7.1"])
4
- subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
5
- subprocess.check_call(["pip", "install", "pytube"])
6
- subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
7
- subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
8
- subprocess.check_call(["pip", "install", "google-generativeai"])
9
- subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
10
- subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
- subprocess.check_call(["pip", "install", "genai"])
12
- subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
- import transformers
14
- import torch
15
- import os
16
- import youtube_transcript_api
17
- import pytube
18
- import gradio
19
- import PyPDF2
20
- import pathlib
21
- import pandas
22
- import numpy
23
- import textblob
24
- import gradio as gr
25
- from youtube_transcript_api import YouTubeTranscriptApi
26
- import google.generativeai as genai
27
- import requests
28
- from textblob import TextBlob
29
  import re
30
- #from google.cloud import generativeai
31
- from huggingface_hub import login
32
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
 
 
 
33
  def install_missing_packages():
34
  required_packages = {
35
- "torch":">=1.11.0",
36
- "transformers":">=4.34.0",
37
- "youtube_transcript_api" :">=0.6.3" ,
38
- "pytube":None,
39
  "huggingface_hub": ">=0.19.0",
40
  "PyPDF2": ">=3.0.1",
41
- "textblob":">=0.17.1",
42
- "python-dotenv":">=1.0.0",
43
- "genai":None,
44
  "google-generativeai": None,
45
- "google-cloud-aiplatform":"==1.34.0"
46
  }
47
 
48
-
49
  for package, version in required_packages.items():
50
  try:
51
  __import__(package)
@@ -54,162 +473,20 @@ def install_missing_packages():
54
  subprocess.check_call(["pip", "install", package_name])
55
 
56
  install_missing_packages()
57
- # Configuration
58
-
59
- hf_token = os.getenv("HF_TOKEN")
60
- if hf_token:
61
- login(hf_token)
62
- else:
63
- raise ValueError("HF_TOKEN environment variable not set.")
64
-
65
 
66
  # Configuration
67
- USER_CREDENTIALS = {
68
- "admin": "password123",
69
- "teacher": "teach2024",
70
- "student": "learn2024"
71
- }
72
-
73
- import os
74
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
75
-
76
- # Use environment variables
77
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
78
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
79
 
80
  if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
81
  raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
82
 
83
- genai.configure(api_key=GOOGLE_API_KEY)
84
-
85
- # Database
86
- students_data = [
87
- (1, "Alice", "A", "Computer Science"),
88
- (2, "Aliaa", "B", "Mathematics"),
89
- (3, "Charlie", "A", "Machine Learning"),
90
- (4, "Daan", "A", "Physics"),
91
- (5, "Jhon", "C", "Math"),
92
- (6, "Emma", "A+", "Computer Science")
93
- ]
94
-
95
- teachers_data = [
96
- (1, "Dr. Smith", "Math", "MS Mathematics"),
97
- (2, "Ms. Johnson", "Science", "MSc Physics"),
98
- (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
99
- (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
100
- ]
101
-
102
- courses_data = [
103
- (1, "Algebra", "Dr. Smith", "Advanced"),
104
- (2, "Biology", "Ms. Mia", "Intermediate"),
105
- (3, "Machine Learning", "Ms. Jack", "Intermediate"),
106
- (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
107
- (5, "Mathematics", "Ms. Smith", "Intermediate")
108
- ]
109
-
110
- def sanitize_text(text):
111
- """Remove invalid Unicode characters."""
112
- return text.encode("utf-8", "replace").decode("utf-8")
113
-
114
  def extract_video_id(url):
115
- if not url:
116
- return None
117
- patterns = [
118
- r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
119
- ]
120
- for pattern in patterns:
121
- match = re.search(pattern, url)
122
- if match:
123
- return match.group(1)
124
- return None
125
-
126
- from textblob import TextBlob
127
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
128
- import re
129
- from collections import Counter
130
- from googleapiclient.discovery import build
131
- def process_youtube_video(url="", keywords=""):
132
- try:
133
- # Initialize variables
134
- thumbnail = None
135
- summary = "No transcript available"
136
- sentiment_label = "N/A"
137
- recommendations = ""
138
- subtitle_info = "No additional information available"
139
-
140
- if not url.strip():
141
- return None, "Please enter a YouTube URL", "N/A", "", ""
142
-
143
- video_id = extract_video_id(url)
144
- if not video_id:
145
- return None, "Invalid YouTube URL", "N/A", "", ""
146
-
147
- thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
148
-
149
- try:
150
- # Fetch transcript
151
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
152
- transcript = None
153
- try:
154
- transcript = transcript_list.find_transcript(['en'])
155
- except:
156
- transcript = transcript_list.find_generated_transcript(['en'])
157
-
158
- text = " ".join([t['text'] for t in transcript.fetch()])
159
- if not text.strip():
160
- raise ValueError("Transcript is empty")
161
-
162
- # Clean up the text for sentiment analysis
163
- cleaned_text = clean_text_for_analysis(text)
164
-
165
- # Sentiment analysis
166
- sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
167
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
168
-
169
- # Generate summary
170
- model = genai.GenerativeModel("gemini-pro")
171
- summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
172
-
173
- # Extract subtitle information
174
- subtitle_info = extract_subtitle_info(cleaned_text)
175
-
176
- except TranscriptsDisabled:
177
- metadata = get_video_metadata(video_id)
178
- summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
179
- sentiment_label = "N/A"
180
- subtitle_info = "No subtitles available for analysis."
181
- except NoTranscriptFound:
182
- metadata = get_video_metadata(video_id)
183
- summary = metadata.get("description", "⚠️ No English transcript available.")
184
- sentiment_label = "N/A"
185
- subtitle_info = "No subtitles available for analysis."
186
- except Exception as e:
187
- return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
188
-
189
- # Get recommendations
190
- if keywords.strip():
191
- recommendations = get_recommendations(keywords)
192
-
193
- return thumbnail, summary, sentiment_label, subtitle_info, recommendations
194
-
195
- except Exception as e:
196
- return None, f"Error: {str(e)}", "N/A", "", ""
197
-
198
-
199
- def extract_video_id(url):
200
- """
201
- Extracts the video ID from a YouTube URL.
202
- """
203
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
204
  return match.group(1) if match else None
205
 
206
-
207
  def get_video_metadata(video_id):
208
- """
209
- Fetches video metadata such as title and description using the YouTube Data API.
210
- """
211
  try:
212
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
213
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
214
  request = youtube.videos().list(part="snippet", id=video_id)
215
  response = request.execute()
@@ -225,48 +502,20 @@ def get_video_metadata(video_id):
225
  except Exception as e:
226
  return {"title": "Error fetching metadata", "description": str(e)}
227
 
 
 
228
 
229
  def extract_subtitle_info(text):
230
- """
231
- Extracts meaningful information from the subtitles.
232
- This could include topics, key insights, or a breakdown of the content.
233
- """
234
  try:
235
- # Split text into sentences for better analysis
236
  sentences = text.split(". ")
237
-
238
- # Example: Extract key topics or keywords
239
  words = text.split()
240
  common_words = Counter(words).most_common(10)
241
  key_topics = ", ".join([word for word, count in common_words])
242
-
243
- # Example: Provide a breakdown of the content
244
  info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
245
-
246
  return info
247
  except Exception as e:
248
  return f"Error extracting subtitle information: {str(e)}"
249
 
250
-
251
- def clean_text_for_analysis(text):
252
- """
253
- Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
254
- """
255
- # Remove extra spaces and line breaks
256
- cleaned_text = " ".join(text.split())
257
- return cleaned_text
258
-
259
-
260
- def get_recommendations(keywords):
261
- """
262
- Fetches related video recommendations based on the provided keywords.
263
- This function can be expanded with a proper API or custom logic.
264
- """
265
- # Placeholder for fetching recommendations based on keywords
266
- return f"Recommendations for: {keywords}" # Dummy return for now
267
-
268
-
269
-
270
  def get_recommendations(keywords, max_results=5):
271
  if not keywords:
272
  return "Please provide search keywords"
@@ -282,161 +531,86 @@ def get_recommendations(keywords, max_results=5):
282
  "key": YOUTUBE_API_KEY
283
  }
284
  ).json()
285
-
286
  results = []
287
  for item in response.get("items", []):
288
  title = item["snippet"]["title"]
289
  channel = item["snippet"]["channelTitle"]
290
  video_id = item["id"]["videoId"]
291
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
292
-
293
  return "\n".join(results) if results else "No recommendations found"
294
  except Exception as e:
295
  return f"Error: {str(e)}"
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  # Gradio Interface
298
- with gr.Blocks(theme=gr.themes.Soft()) as app:
299
- # Login Page
300
- with gr.Group() as login_page:
301
- gr.Markdown("# 🎓 Educational Learning Management System")
302
- username = gr.Textbox(label="Username")
303
- password = gr.Textbox(label="Password", type="password")
304
- login_btn = gr.Button("Login", variant="primary")
305
- login_msg = gr.Markdown()
306
-
307
- # Main Interface
308
- with gr.Group(visible=False) as main_page:
309
- with gr.Row():
310
- with gr.Column(scale=1):
311
- gr.Markdown("### 📋 Navigation")
312
- nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
313
- nav_students = gr.Button("👥 Students")
314
- nav_teachers = gr.Button("👨‍🏫 Teachers")
315
- nav_courses = gr.Button("📚 Courses")
316
- nav_youtube = gr.Button("🎥 YouTube Tool")
317
- logout_btn = gr.Button("🚪 Logout", variant="stop")
318
-
319
- with gr.Column(scale=3):
320
- # Dashboard Content
321
- dashboard_page = gr.Group()
322
- with dashboard_page:
323
- gr.Markdown("## 📊 Dashboard")
324
- gr.Markdown(f"""
325
- ### System Overview
326
- - 👥 Total Students: {len(students_data)}
327
- - 👨‍🏫 Total Teachers: {len(teachers_data)}
328
- - 📚 Total Courses: {len(courses_data)}
329
-
330
- ### Quick Actions
331
- - View student performance
332
- - Access course materials
333
- - Generate learning insights
334
- """)
335
-
336
- # Students Content
337
- students_page = gr.Group(visible=False)
338
- with students_page:
339
- gr.Markdown("## 👥 Students")
340
- gr.DataFrame(
341
- value=students_data,
342
- headers=["ID", "Name", "Grade", "Program"]
343
- )
344
-
345
- # Teachers Content
346
- teachers_page = gr.Group(visible=False)
347
- with teachers_page:
348
- gr.Markdown("## 👨‍🏫 Teachers")
349
- gr.DataFrame(
350
- value=teachers_data,
351
- headers=["ID", "Name", "Subject", "Qualification"]
352
- )
353
-
354
- # Courses Content
355
- courses_page = gr.Group(visible=False)
356
- with courses_page:
357
- gr.Markdown("## 📚 Courses")
358
- gr.DataFrame(
359
- value=courses_data,
360
- headers=["ID", "Name", "Instructor", "Level"]
361
- )
362
-
363
- # YouTube Tool Content
364
- youtube_page = gr.Group(visible=False)
365
- with youtube_page:
366
- gr.Markdown("## Agent for YouTube Content Exploration")
367
- with gr.Row():
368
- with gr.Column(scale=2):
369
- video_url = gr.Textbox(
370
- label="YouTube URL",
371
- placeholder="https://youtube.com/watch?v=..."
372
- )
373
- keywords = gr.Textbox(
374
- label="Keywords for Recommendations",
375
- placeholder="e.g., python programming, machine learning"
376
- )
377
- analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
378
-
379
- with gr.Column(scale=1):
380
- video_thumbnail = gr.Image(label="Video Preview")
381
-
382
- with gr.Row():
383
- with gr.Column():
384
- summary = gr.Textbox(label="📝 Summary", lines=8)
385
- sentiment = gr.Textbox(label="😊 Content Sentiment")
386
- with gr.Column():
387
- recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
388
-
389
- def login_check(user, pwd):
390
- if USER_CREDENTIALS.get(user) == pwd:
391
- return {
392
- login_page: gr.update(visible=False),
393
- main_page: gr.update(visible=True),
394
- login_msg: ""
395
- }
396
- return {
397
- login_page: gr.update(visible=True),
398
- main_page: gr.update(visible=False),
399
- login_msg: "❌ Invalid credentials"
400
- }
401
-
402
- def show_page(page_name):
403
- updates = {
404
- dashboard_page: gr.update(visible=False),
405
- students_page: gr.update(visible=False),
406
- teachers_page: gr.update(visible=False),
407
- courses_page: gr.update(visible=False),
408
- youtube_page: gr.update(visible=False)
409
- }
410
- updates[page_name] = gr.update(visible=True)
411
- return updates
412
-
413
- # Event Handlers
414
- login_btn.click(
415
- login_check,
416
- inputs=[username, password],
417
- outputs=[login_page, main_page, login_msg]
418
- )
419
-
420
- nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
421
- nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
422
- nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
423
- nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
424
- nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
425
-
426
  analyze_btn.click(
427
  process_youtube_video,
428
  inputs=[video_url, keywords],
429
  outputs=[video_thumbnail, summary, sentiment, recommendations]
430
  )
431
-
432
- logout_btn.click(
433
- lambda: {
434
- login_page: gr.update(visible=True),
435
- main_page: gr.update(visible=False)
436
- },
437
- outputs=[login_page, main_page]
438
- )
439
 
440
  if __name__ == "__main__":
441
  app.launch()
442
-
 
1
+ # import subprocess
2
+ # subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
+ # subprocess.check_call(["pip", "install", "torch>=1.7.1"])
4
+ # subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
5
+ # subprocess.check_call(["pip", "install", "pytube"])
6
+ # subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
7
+ # subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
8
+ # subprocess.check_call(["pip", "install", "google-generativeai"])
9
+ # subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
10
+ # subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
+ # subprocess.check_call(["pip", "install", "genai"])
12
+ # subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
+ # import transformers
14
+ # import torch
15
+ # import os
16
+ # import youtube_transcript_api
17
+ # import pytube
18
+ # import gradio
19
+ # import PyPDF2
20
+ # import pathlib
21
+ # import pandas
22
+ # import numpy
23
+ # import textblob
24
+ # import gradio as gr
25
+ # from youtube_transcript_api import YouTubeTranscriptApi
26
+ # import google.generativeai as genai
27
+ # import requests
28
+ # from textblob import TextBlob
29
+ # import re
30
+ # #from google.cloud import generativeai
31
+ # from huggingface_hub import login
32
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
33
+ # def install_missing_packages():
34
+ # required_packages = {
35
+ # "torch":">=1.11.0",
36
+ # "transformers":">=4.34.0",
37
+ # "youtube_transcript_api" :">=0.6.3" ,
38
+ # "pytube":None,
39
+ # "huggingface_hub": ">=0.19.0",
40
+ # "PyPDF2": ">=3.0.1",
41
+ # "textblob":">=0.17.1",
42
+ # "python-dotenv":">=1.0.0",
43
+ # "genai":None,
44
+ # "google-generativeai": None,
45
+ # "google-cloud-aiplatform":"==1.34.0"
46
+ # }
47
+
48
+
49
+ # for package, version in required_packages.items():
50
+ # try:
51
+ # __import__(package)
52
+ # except ImportError:
53
+ # package_name = f"{package}{version}" if version else package
54
+ # subprocess.check_call(["pip", "install", package_name])
55
+
56
+ # install_missing_packages()
57
+ # # Configuration
58
+
59
+ # hf_token = os.getenv("HF_TOKEN")
60
+ # if hf_token:
61
+ # login(hf_token)
62
+ # else:
63
+ # raise ValueError("HF_TOKEN environment variable not set.")
64
+
65
+
66
+ # # Configuration
67
+ # USER_CREDENTIALS = {
68
+ # "admin": "password123",
69
+ # "teacher": "teach2024",
70
+ # "student": "learn2024"
71
+ # }
72
+
73
+ # import os
74
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
75
+
76
+ # # Use environment variables
77
+ # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
78
+ # YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
79
+
80
+ # if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
81
+ # raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
82
+
83
+ # genai.configure(api_key=GOOGLE_API_KEY)
84
+
85
+ # # Database
86
+ # students_data = [
87
+ # (1, "Alice", "A", "Computer Science"),
88
+ # (2, "Aliaa", "B", "Mathematics"),
89
+ # (3, "Charlie", "A", "Machine Learning"),
90
+ # (4, "Daan", "A", "Physics"),
91
+ # (5, "Jhon", "C", "Math"),
92
+ # (6, "Emma", "A+", "Computer Science")
93
+ # ]
94
+
95
+ # teachers_data = [
96
+ # (1, "Dr. Smith", "Math", "MS Mathematics"),
97
+ # (2, "Ms. Johnson", "Science", "MSc Physics"),
98
+ # (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
99
+ # (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
100
+ # ]
101
+
102
+ # courses_data = [
103
+ # (1, "Algebra", "Dr. Smith", "Advanced"),
104
+ # (2, "Biology", "Ms. Mia", "Intermediate"),
105
+ # (3, "Machine Learning", "Ms. Jack", "Intermediate"),
106
+ # (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
107
+ # (5, "Mathematics", "Ms. Smith", "Intermediate")
108
+ # ]
109
+
110
+ # def sanitize_text(text):
111
+ # """Remove invalid Unicode characters."""
112
+ # return text.encode("utf-8", "replace").decode("utf-8")
113
+
114
+ # def extract_video_id(url):
115
+ # if not url:
116
+ # return None
117
+ # patterns = [
118
+ # r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
119
+ # ]
120
+ # for pattern in patterns:
121
+ # match = re.search(pattern, url)
122
+ # if match:
123
+ # return match.group(1)
124
+ # return None
125
+
126
+ # from textblob import TextBlob
127
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
128
+ # import re
129
+ # from collections import Counter
130
+ # from googleapiclient.discovery import build
131
+ # def process_youtube_video(url="", keywords=""):
132
+ # try:
133
+ # # Initialize variables
134
+ # thumbnail = None
135
+ # summary = "No transcript available"
136
+ # sentiment_label = "N/A"
137
+ # recommendations = ""
138
+ # subtitle_info = "No additional information available"
139
+
140
+ # if not url.strip():
141
+ # return None, "Please enter a YouTube URL", "N/A", "", ""
142
+
143
+ # video_id = extract_video_id(url)
144
+ # if not video_id:
145
+ # return None, "Invalid YouTube URL", "N/A", "", ""
146
+
147
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
148
+
149
+ # try:
150
+ # # Fetch transcript
151
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
152
+ # transcript = None
153
+ # try:
154
+ # transcript = transcript_list.find_transcript(['en'])
155
+ # except:
156
+ # transcript = transcript_list.find_generated_transcript(['en'])
157
+
158
+ # text = " ".join([t['text'] for t in transcript.fetch()])
159
+ # if not text.strip():
160
+ # raise ValueError("Transcript is empty")
161
+
162
+ # # Clean up the text for sentiment analysis
163
+ # cleaned_text = clean_text_for_analysis(text)
164
+
165
+ # # Sentiment analysis
166
+ # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
167
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
168
+
169
+ # # Generate summary
170
+ # model = genai.GenerativeModel("gemini-pro")
171
+ # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
172
+
173
+ # # Extract subtitle information
174
+ # subtitle_info = extract_subtitle_info(cleaned_text)
175
+
176
+ # except TranscriptsDisabled:
177
+ # metadata = get_video_metadata(video_id)
178
+ # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
179
+ # sentiment_label = "N/A"
180
+ # subtitle_info = "No subtitles available for analysis."
181
+ # except NoTranscriptFound:
182
+ # metadata = get_video_metadata(video_id)
183
+ # summary = metadata.get("description", "⚠️ No English transcript available.")
184
+ # sentiment_label = "N/A"
185
+ # subtitle_info = "No subtitles available for analysis."
186
+ # except Exception as e:
187
+ # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
188
+
189
+ # # Get recommendations
190
+ # if keywords.strip():
191
+ # recommendations = get_recommendations(keywords)
192
+
193
+ # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
194
+
195
+ # except Exception as e:
196
+ # return None, f"Error: {str(e)}", "N/A", "", ""
197
+
198
+
199
+ # def extract_video_id(url):
200
+ # """
201
+ # Extracts the video ID from a YouTube URL.
202
+ # """
203
+ # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
204
+ # return match.group(1) if match else None
205
+
206
+
207
+ # def get_video_metadata(video_id):
208
+ # """
209
+ # Fetches video metadata such as title and description using the YouTube Data API.
210
+ # """
211
+ # try:
212
+ # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
213
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
214
+ # request = youtube.videos().list(part="snippet", id=video_id)
215
+ # response = request.execute()
216
+
217
+ # if "items" in response and len(response["items"]) > 0:
218
+ # snippet = response["items"][0]["snippet"]
219
+ # return {
220
+ # "title": snippet.get("title", "No title available"),
221
+ # "description": snippet.get("description", "No description available"),
222
+ # }
223
+ # return {}
224
+
225
+ # except Exception as e:
226
+ # return {"title": "Error fetching metadata", "description": str(e)}
227
+
228
+
229
+ # def extract_subtitle_info(text):
230
+ # """
231
+ # Extracts meaningful information from the subtitles.
232
+ # This could include topics, key insights, or a breakdown of the content.
233
+ # """
234
+ # try:
235
+ # # Split text into sentences for better analysis
236
+ # sentences = text.split(". ")
237
+
238
+ # # Example: Extract key topics or keywords
239
+ # words = text.split()
240
+ # common_words = Counter(words).most_common(10)
241
+ # key_topics = ", ".join([word for word, count in common_words])
242
+
243
+ # # Example: Provide a breakdown of the content
244
+ # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
245
+
246
+ # return info
247
+ # except Exception as e:
248
+ # return f"Error extracting subtitle information: {str(e)}"
249
+
250
+
251
+ # def clean_text_for_analysis(text):
252
+ # """
253
+ # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
254
+ # """
255
+ # # Remove extra spaces and line breaks
256
+ # cleaned_text = " ".join(text.split())
257
+ # return cleaned_text
258
+
259
+
260
+ # def get_recommendations(keywords):
261
+ # """
262
+ # Fetches related video recommendations based on the provided keywords.
263
+ # This function can be expanded with a proper API or custom logic.
264
+ # """
265
+ # # Placeholder for fetching recommendations based on keywords
266
+ # return f"Recommendations for: {keywords}" # Dummy return for now
267
+
268
+
269
+
270
+ # def get_recommendations(keywords, max_results=5):
271
+ # if not keywords:
272
+ # return "Please provide search keywords"
273
+ # try:
274
+ # response = requests.get(
275
+ # "https://www.googleapis.com/youtube/v3/search",
276
+ # params={
277
+ # "part": "snippet",
278
+ # "q": f"educational {keywords}",
279
+ # "type": "video",
280
+ # "maxResults": max_results,
281
+ # "relevanceLanguage": "en",
282
+ # "key": YOUTUBE_API_KEY
283
+ # }
284
+ # ).json()
285
+
286
+ # results = []
287
+ # for item in response.get("items", []):
288
+ # title = item["snippet"]["title"]
289
+ # channel = item["snippet"]["channelTitle"]
290
+ # video_id = item["id"]["videoId"]
291
+ # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
292
+
293
+ # return "\n".join(results) if results else "No recommendations found"
294
+ # except Exception as e:
295
+ # return f"Error: {str(e)}"
296
+
297
+ # # Gradio Interface
298
+ # with gr.Blocks(theme=gr.themes.Soft()) as app:
299
+ # # Login Page
300
+ # with gr.Group() as login_page:
301
+ # gr.Markdown("# 🎓 Educational Learning Management System")
302
+ # username = gr.Textbox(label="Username")
303
+ # password = gr.Textbox(label="Password", type="password")
304
+ # login_btn = gr.Button("Login", variant="primary")
305
+ # login_msg = gr.Markdown()
306
+
307
+ # # Main Interface
308
+ # with gr.Group(visible=False) as main_page:
309
+ # with gr.Row():
310
+ # with gr.Column(scale=1):
311
+ # gr.Markdown("### 📋 Navigation")
312
+ # nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
313
+ # nav_students = gr.Button("👥 Students")
314
+ # nav_teachers = gr.Button("👨‍🏫 Teachers")
315
+ # nav_courses = gr.Button("📚 Courses")
316
+ # nav_youtube = gr.Button("🎥 YouTube Tool")
317
+ # logout_btn = gr.Button("🚪 Logout", variant="stop")
318
+
319
+ # with gr.Column(scale=3):
320
+ # # Dashboard Content
321
+ # dashboard_page = gr.Group()
322
+ # with dashboard_page:
323
+ # gr.Markdown("## 📊 Dashboard")
324
+ # gr.Markdown(f"""
325
+ # ### System Overview
326
+ # - 👥 Total Students: {len(students_data)}
327
+ # - 👨‍🏫 Total Teachers: {len(teachers_data)}
328
+ # - 📚 Total Courses: {len(courses_data)}
329
+
330
+ # ### Quick Actions
331
+ # - View student performance
332
+ # - Access course materials
333
+ # - Generate learning insights
334
+ # """)
335
+
336
+ # # Students Content
337
+ # students_page = gr.Group(visible=False)
338
+ # with students_page:
339
+ # gr.Markdown("## 👥 Students")
340
+ # gr.DataFrame(
341
+ # value=students_data,
342
+ # headers=["ID", "Name", "Grade", "Program"]
343
+ # )
344
+
345
+ # # Teachers Content
346
+ # teachers_page = gr.Group(visible=False)
347
+ # with teachers_page:
348
+ # gr.Markdown("## 👨‍🏫 Teachers")
349
+ # gr.DataFrame(
350
+ # value=teachers_data,
351
+ # headers=["ID", "Name", "Subject", "Qualification"]
352
+ # )
353
+
354
+ # # Courses Content
355
+ # courses_page = gr.Group(visible=False)
356
+ # with courses_page:
357
+ # gr.Markdown("## 📚 Courses")
358
+ # gr.DataFrame(
359
+ # value=courses_data,
360
+ # headers=["ID", "Name", "Instructor", "Level"]
361
+ # )
362
+
363
+ # # YouTube Tool Content
364
+ # youtube_page = gr.Group(visible=False)
365
+ # with youtube_page:
366
+ # gr.Markdown("## Agent for YouTube Content Exploration")
367
+ # with gr.Row():
368
+ # with gr.Column(scale=2):
369
+ # video_url = gr.Textbox(
370
+ # label="YouTube URL",
371
+ # placeholder="https://youtube.com/watch?v=..."
372
+ # )
373
+ # keywords = gr.Textbox(
374
+ # label="Keywords for Recommendations",
375
+ # placeholder="e.g., python programming, machine learning"
376
+ # )
377
+ # analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
378
+
379
+ # with gr.Column(scale=1):
380
+ # video_thumbnail = gr.Image(label="Video Preview")
381
+
382
+ # with gr.Row():
383
+ # with gr.Column():
384
+ # summary = gr.Textbox(label="📝 Summary", lines=8)
385
+ # sentiment = gr.Textbox(label="😊 Content Sentiment")
386
+ # with gr.Column():
387
+ # recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
388
+
389
+ # def login_check(user, pwd):
390
+ # if USER_CREDENTIALS.get(user) == pwd:
391
+ # return {
392
+ # login_page: gr.update(visible=False),
393
+ # main_page: gr.update(visible=True),
394
+ # login_msg: ""
395
+ # }
396
+ # return {
397
+ # login_page: gr.update(visible=True),
398
+ # main_page: gr.update(visible=False),
399
+ # login_msg: "❌ Invalid credentials"
400
+ # }
401
+
402
+ # def show_page(page_name):
403
+ # updates = {
404
+ # dashboard_page: gr.update(visible=False),
405
+ # students_page: gr.update(visible=False),
406
+ # teachers_page: gr.update(visible=False),
407
+ # courses_page: gr.update(visible=False),
408
+ # youtube_page: gr.update(visible=False)
409
+ # }
410
+ # updates[page_name] = gr.update(visible=True)
411
+ # return updates
412
+
413
+ # # Event Handlers
414
+ # login_btn.click(
415
+ # login_check,
416
+ # inputs=[username, password],
417
+ # outputs=[login_page, main_page, login_msg]
418
+ # )
419
+
420
+ # nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
421
+ # nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
422
+ # nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
423
+ # nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
424
+ # nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
425
+
426
+ # analyze_btn.click(
427
+ # process_youtube_video,
428
+ # inputs=[video_url, keywords],
429
+ # outputs=[video_thumbnail, summary, sentiment, recommendations]
430
+ # )
431
+
432
+ # logout_btn.click(
433
+ # lambda: {
434
+ # login_page: gr.update(visible=True),
435
+ # main_page: gr.update(visible=False)
436
+ # },
437
+ # outputs=[login_page, main_page]
438
+ # )
439
+
440
+ # if __name__ == "__main__":
441
+ # app.launch()
442
+
443
  import subprocess
444
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  import re
446
+ from collections import Counter
447
+ from textblob import TextBlob
448
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
449
+ from googleapiclient.discovery import build
450
+ import gradio as gr
451
+
452
+ # Install required packages
453
  def install_missing_packages():
454
  required_packages = {
455
+ "torch": ">=1.11.0",
456
+ "transformers": ">=4.34.0",
457
+ "youtube_transcript_api": ">=0.6.3",
458
+ "pytube": None,
459
  "huggingface_hub": ">=0.19.0",
460
  "PyPDF2": ">=3.0.1",
461
+ "textblob": ">=0.17.1",
462
+ "python-dotenv": ">=1.0.0",
463
+ "genai": None,
464
  "google-generativeai": None,
465
+ "google-cloud-aiplatform": "==1.34.0"
466
  }
467
 
 
468
  for package, version in required_packages.items():
469
  try:
470
  __import__(package)
 
473
  subprocess.check_call(["pip", "install", package_name])
474
 
475
  install_missing_packages()
 
 
 
 
 
 
 
 
476
 
477
  # Configuration
 
 
 
 
 
 
 
 
 
 
478
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
479
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
480
 
481
  if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
482
  raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  def extract_video_id(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
486
  return match.group(1) if match else None
487
 
 
488
  def get_video_metadata(video_id):
 
 
 
489
  try:
 
490
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
491
  request = youtube.videos().list(part="snippet", id=video_id)
492
  response = request.execute()
 
502
  except Exception as e:
503
  return {"title": "Error fetching metadata", "description": str(e)}
504
 
505
+ def clean_text_for_analysis(text):
506
+ return " ".join(text.split())
507
 
508
  def extract_subtitle_info(text):
 
 
 
 
509
  try:
 
510
  sentences = text.split(". ")
 
 
511
  words = text.split()
512
  common_words = Counter(words).most_common(10)
513
  key_topics = ", ".join([word for word, count in common_words])
 
 
514
  info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
 
515
  return info
516
  except Exception as e:
517
  return f"Error extracting subtitle information: {str(e)}"
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  def get_recommendations(keywords, max_results=5):
520
  if not keywords:
521
  return "Please provide search keywords"
 
531
  "key": YOUTUBE_API_KEY
532
  }
533
  ).json()
534
+
535
  results = []
536
  for item in response.get("items", []):
537
  title = item["snippet"]["title"]
538
  channel = item["snippet"]["channelTitle"]
539
  video_id = item["id"]["videoId"]
540
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
541
+
542
  return "\n".join(results) if results else "No recommendations found"
543
  except Exception as e:
544
  return f"Error: {str(e)}"
545
 
546
+ def process_youtube_video(url, keywords):
547
+ try:
548
+ thumbnail = None
549
+ summary = "No transcript available"
550
+ sentiment_label = "N/A"
551
+ recommendations = ""
552
+
553
+ video_id = extract_video_id(url)
554
+ if not video_id:
555
+ return None, "Invalid YouTube URL", "N/A", "", ""
556
+
557
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
558
+
559
+ try:
560
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
561
+ transcript = None
562
+ try:
563
+ transcript = transcript_list.find_transcript(['en'])
564
+ except:
565
+ transcript = transcript_list.find_generated_transcript(['en'])
566
+
567
+ text = " ".join([t['text'] for t in transcript.fetch()])
568
+ if not text.strip():
569
+ raise ValueError("Transcript is empty")
570
+
571
+ cleaned_text = clean_text_for_analysis(text)
572
+
573
+ sentiment = TextBlob(cleaned_text).sentiment
574
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
575
+
576
+ summary = f"Summary: {cleaned_text[:400]}..."
577
+
578
+ except (TranscriptsDisabled, NoTranscriptFound):
579
+ metadata = get_video_metadata(video_id)
580
+ summary = metadata.get("description", "No subtitles available")
581
+ sentiment_label = "N/A"
582
+
583
+ if keywords.strip():
584
+ recommendations = get_recommendations(keywords)
585
+
586
+ return thumbnail, summary, sentiment_label, recommendations
587
+
588
+ except Exception as e:
589
+ return None, f"Error: {str(e)}", "N/A", ""
590
+
591
  # Gradio Interface
592
+ with gr.Blocks() as app:
593
+ with gr.Row():
594
+ gr.Markdown("# YouTube Content Analysis Tool")
595
+
596
+ with gr.Row():
597
+ video_url = gr.Textbox(label="YouTube URL", placeholder="https://youtube.com/watch?v=...")
598
+ keywords = gr.Textbox(label="Keywords for Recommendations", placeholder="e.g., Python programming")
599
+ analyze_btn = gr.Button("Analyze")
600
+
601
+ with gr.Row():
602
+ video_thumbnail = gr.Image(label="Thumbnail")
603
+ summary = gr.Textbox(label="Summary", lines=5)
604
+
605
+ with gr.Row():
606
+ sentiment = gr.Textbox(label="Sentiment")
607
+ recommendations = gr.Textbox(label="Related Videos", lines=5)
608
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  analyze_btn.click(
610
  process_youtube_video,
611
  inputs=[video_url, keywords],
612
  outputs=[video_thumbnail, summary, sentiment, recommendations]
613
  )
 
 
 
 
 
 
 
 
614
 
615
  if __name__ == "__main__":
616
  app.launch()