Sayiqa commited on
Commit
571928b
·
verified ·
1 Parent(s): d515772

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +585 -50
app.py CHANGED
@@ -1,3 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
@@ -61,6 +507,17 @@ if hf_token:
61
  login(hf_token)
62
  else:
63
  raise ValueError("HF_TOKEN environment variable not set.")
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Configuration
66
  USER_CREDENTIALS = {
@@ -106,33 +563,126 @@ courses_data = [
106
  (5, "Mathematics", "Ms. Smith", "Intermediate")
107
  ]
108
 
109
- # def sanitize_text(text):
110
- # """Remove invalid Unicode characters."""
111
- # return text.encode("utf-8", "replace").decode("utf-8")
112
-
113
- # def extract_video_id(url):
114
- # if not url:
115
- # return None
116
- # patterns = [
117
- # r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
118
- # ]
119
- # for pattern in patterns:
120
- # match = re.search(pattern, url)
121
- # if match:
122
- # return match.group(1)
123
- # return None
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
 
126
 
127
- from textblob import TextBlob
128
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
129
- import re
130
- from collections import Counter
131
- from googleapiclient.discovery import build
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  def process_youtube_video(url="", keywords=""):
134
  try:
135
- #Initialize variables
136
  thumbnail = None
137
  summary = "No transcript available"
138
  sentiment_label = "N/A"
@@ -161,19 +711,16 @@ def process_youtube_video(url="", keywords=""):
161
  if not text.strip():
162
  raise ValueError("Transcript is empty")
163
 
164
- # Clean up the text for sentiment analysis
165
- cleaned_text = clean_text_for_analysis(text)
166
-
167
- # Sentiment analysis
168
- sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
-
171
  # Generate summary
172
  model = genai.GenerativeModel("gemini-pro")
173
- summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
 
175
  # Extract subtitle information
176
- subtitle_info = extract_subtitle_info(cleaned_text)
 
 
 
 
177
 
178
  except TranscriptsDisabled:
179
  metadata = get_video_metadata(video_id)
@@ -202,6 +749,7 @@ def extract_video_id(url):
202
  """
203
  Extracts the video ID from a YouTube URL.
204
  """
 
205
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
  return match.group(1) if match else None
207
 
@@ -211,7 +759,10 @@ def get_video_metadata(video_id):
211
  Fetches video metadata such as title and description using the YouTube Data API.
212
  """
213
  try:
214
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
 
 
 
215
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
  request = youtube.videos().list(part="snippet", id=video_id)
217
  response = request.execute()
@@ -238,6 +789,7 @@ def extract_subtitle_info(text):
238
  sentences = text.split(". ")
239
 
240
  # Example: Extract key topics or keywords
 
241
  words = text.split()
242
  common_words = Counter(words).most_common(10)
243
  key_topics = ", ".join([word for word, count in common_words])
@@ -250,22 +802,10 @@ def extract_subtitle_info(text):
250
  return f"Error extracting subtitle information: {str(e)}"
251
 
252
 
253
- def clean_text_for_analysis(text):
254
- """
255
- Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
- """
257
- # Remove extra spaces and line breaks
258
- cleaned_text = " ".join(text.split())
259
- return cleaned_text
260
 
261
 
262
- def get_recommendations(keywords):
263
- """
264
- Fetches related video recommendations based on the provided keywords.
265
- This function can be expanded with a proper API or custom logic.
266
- """
267
- # Placeholder for fetching recommendations based on keywords
268
- return f"Recommendations for: {keywords}" # Dummy return for now
269
 
270
 
271
  def get_recommendations(keywords, max_results=5):
@@ -440,8 +980,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
440
 
441
  if __name__ == "__main__":
442
  app.launch()
443
-
444
-
445
-
446
- #############################
447
-
 
1
+ # import subprocess
2
+ # subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
+ # subprocess.check_call(["pip", "install", "torch>=1.7.1"])
4
+ # subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
5
+ # subprocess.check_call(["pip", "install", "pytube"])
6
+ # subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
7
+ # subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
8
+ # subprocess.check_call(["pip", "install", "google-generativeai"])
9
+ # subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
10
+ # subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
+ # subprocess.check_call(["pip", "install", "genai"])
12
+ # subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
+ # import transformers
14
+ # import torch
15
+ # import os
16
+ # import youtube_transcript_api
17
+ # import pytube
18
+ # import gradio
19
+ # import PyPDF2
20
+ # import pathlib
21
+ # import pandas
22
+ # import numpy
23
+ # import textblob
24
+ # import gradio as gr
25
+ # from youtube_transcript_api import YouTubeTranscriptApi
26
+ # import google.generativeai as genai
27
+ # import requests
28
+ # from textblob import TextBlob
29
+ # import re
30
+ # #from google.cloud import generativeai
31
+ # from huggingface_hub import login
32
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
33
+ # def install_missing_packages():
34
+ # required_packages = {
35
+ # "torch":">=1.11.0",
36
+ # "transformers":">=4.34.0",
37
+ # "youtube_transcript_api" :">=0.6.3" ,
38
+ # "pytube":None,
39
+ # "huggingface_hub": ">=0.19.0",
40
+ # "PyPDF2": ">=3.0.1",
41
+ # "textblob":">=0.17.1",
42
+ # "python-dotenv":">=1.0.0",
43
+ # "genai":None,
44
+ # "google-generativeai": None,
45
+ # "google-cloud-aiplatform":"==1.34.0"
46
+ # }
47
+
48
+
49
+ # for package, version in required_packages.items():
50
+ # try:
51
+ # __import__(package)
52
+ # except ImportError:
53
+ # package_name = f"{package}{version}" if version else package
54
+ # subprocess.check_call(["pip", "install", package_name])
55
+
56
+ # install_missing_packages()
57
+ # # Configuration
58
+
59
+ # hf_token = os.getenv("HF_TOKEN")
60
+ # if hf_token:
61
+ # login(hf_token)
62
+ # else:
63
+ # raise ValueError("HF_TOKEN environment variable not set.")
64
+
65
+ # # Configuration
66
+ # USER_CREDENTIALS = {
67
+ # "admin": "password123",
68
+ # "teacher": "teach2024",
69
+ # "student": "learn2024"
70
+ # }
71
+
72
+ # import os
73
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
74
+
75
+ # # Use environment variables
76
+ # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
77
+ # YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
78
+
79
+ # if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
80
+ # raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
81
+
82
+ # genai.configure(api_key=GOOGLE_API_KEY)
83
+
84
+ # # Database
85
+ # students_data = [
86
+ # (1, "Alice", "A", "Computer Science"),
87
+ # (2, "Aliaa", "B", "Mathematics"),
88
+ # (3, "Charlie", "A", "Machine Learning"),
89
+ # (4, "Daan", "A", "Physics"),
90
+ # (5, "Jhon", "C", "Math"),
91
+ # (6, "Emma", "A+", "Computer Science")
92
+ # ]
93
+
94
+ # teachers_data = [
95
+ # (1, "Dr. Smith", "Math", "MS Mathematics"),
96
+ # (2, "Ms. Johnson", "Science", "MSc Physics"),
97
+ # (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
98
+ # (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
99
+ # ]
100
+
101
+ # courses_data = [
102
+ # (1, "Algebra", "Dr. Smith", "Advanced"),
103
+ # (2, "Biology", "Ms. Mia", "Intermediate"),
104
+ # (3, "Machine Learning", "Ms. Jack", "Intermediate"),
105
+ # (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
106
+ # (5, "Mathematics", "Ms. Smith", "Intermediate")
107
+ # ]
108
+
109
+ # def sanitize_text(text):
110
+ # """Remove invalid Unicode characters."""
111
+ # return text.encode("utf-8", "replace").decode("utf-8")
112
+
113
+ # def extract_video_id(url):
114
+ # if not url:
115
+ # return None
116
+ # patterns = [
117
+ # r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
118
+ # ]
119
+ # for pattern in patterns:
120
+ # match = re.search(pattern, url)
121
+ # if match:
122
+ # return match.group(1)
123
+ # return None
124
+
125
+
126
+
127
+ # from textblob import TextBlob
128
+ # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
129
+ # import re
130
+ # from collections import Counter
131
+ # from googleapiclient.discovery import build
132
+
133
+ # def process_youtube_video(url="", keywords=""):
134
+ # try:
135
+ # #Initialize variables
136
+ # thumbnail = None
137
+ # summary = "No transcript available"
138
+ # sentiment_label = "N/A"
139
+ # recommendations = ""
140
+ # subtitle_info = "No additional information available"
141
+
142
+ # if not url.strip():
143
+ # return None, "Please enter a YouTube URL", "N/A", "", ""
144
+
145
+ # video_id = extract_video_id(url)
146
+ # if not video_id:
147
+ # return None, "Invalid YouTube URL", "N/A", "", ""
148
+
149
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
150
+
151
+ # try:
152
+ # # Fetch transcript
153
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
+ # transcript = None
155
+ # try:
156
+ # transcript = transcript_list.find_transcript(['en'])
157
+ # except:
158
+ # transcript = transcript_list.find_generated_transcript(['en'])
159
+
160
+ # text = " ".join([t['text'] for t in transcript.fetch()])
161
+ # if not text.strip():
162
+ # raise ValueError("Transcript is empty")
163
+
164
+ # # Clean up the text for sentiment analysis
165
+ # cleaned_text = clean_text_for_analysis(text)
166
+
167
+ # # Sentiment analysis
168
+ # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
+
171
+ # # Generate summary
172
+ # model = genai.GenerativeModel("gemini-pro")
173
+ # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
+
175
+ # # Extract subtitle information
176
+ # subtitle_info = extract_subtitle_info(cleaned_text)
177
+
178
+ # except TranscriptsDisabled:
179
+ # metadata = get_video_metadata(video_id)
180
+ # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
+ # sentiment_label = "N/A"
182
+ # subtitle_info = "No subtitles available for analysis."
183
+ # except NoTranscriptFound:
184
+ # metadata = get_video_metadata(video_id)
185
+ # summary = metadata.get("description", "⚠️ No English transcript available.")
186
+ # sentiment_label = "N/A"
187
+ # subtitle_info = "No subtitles available for analysis."
188
+ # except Exception as e:
189
+ # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
+
191
+ # # Get recommendations
192
+ # if keywords.strip():
193
+ # recommendations = get_recommendations(keywords)
194
+
195
+ # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
196
+
197
+ # except Exception as e:
198
+ # return None, f"Error: {str(e)}", "N/A", "", ""
199
+
200
+
201
+ # def extract_video_id(url):
202
+ # """
203
+ # Extracts the video ID from a YouTube URL.
204
+ # """
205
+ # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
+ # return match.group(1) if match else None
207
+
208
+
209
+ # def get_video_metadata(video_id):
210
+ # """
211
+ # Fetches video metadata such as title and description using the YouTube Data API.
212
+ # """
213
+ # try:
214
+ # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
+ # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
+ # request = youtube.videos().list(part="snippet", id=video_id)
217
+ # response = request.execute()
218
+
219
+ # if "items" in response and len(response["items"]) > 0:
220
+ # snippet = response["items"][0]["snippet"]
221
+ # return {
222
+ # "title": snippet.get("title", "No title available"),
223
+ # "description": snippet.get("description", "No description available"),
224
+ # }
225
+ # return {}
226
+
227
+ # except Exception as e:
228
+ # return {"title": "Error fetching metadata", "description": str(e)}
229
+
230
+
231
+ # def extract_subtitle_info(text):
232
+ # """
233
+ # Extracts meaningful information from the subtitles.
234
+ # This could include topics, key insights, or a breakdown of the content.
235
+ # """
236
+ # try:
237
+ # # Split text into sentences for better analysis
238
+ # sentences = text.split(". ")
239
+
240
+ # # Example: Extract key topics or keywords
241
+ # words = text.split()
242
+ # common_words = Counter(words).most_common(10)
243
+ # key_topics = ", ".join([word for word, count in common_words])
244
+
245
+ # # Example: Provide a breakdown of the content
246
+ # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
+
248
+ # return info
249
+ # except Exception as e:
250
+ # return f"Error extracting subtitle information: {str(e)}"
251
+
252
+
253
+ # def clean_text_for_analysis(text):
254
+ # """
255
+ # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
+ # """
257
+ # # Remove extra spaces and line breaks
258
+ # cleaned_text = " ".join(text.split())
259
+ # return cleaned_text
260
+
261
+
262
+ # def get_recommendations(keywords):
263
+ # """
264
+ # Fetches related video recommendations based on the provided keywords.
265
+ # This function can be expanded with a proper API or custom logic.
266
+ # """
267
+ # # Placeholder for fetching recommendations based on keywords
268
+ # return f"Recommendations for: {keywords}" # Dummy return for now
269
+
270
+
271
+ # def get_recommendations(keywords, max_results=5):
272
+ # if not keywords:
273
+ # return "Please provide search keywords"
274
+ # try:
275
+ # response = requests.get(
276
+ # "https://www.googleapis.com/youtube/v3/search",
277
+ # params={
278
+ # "part": "snippet",
279
+ # "q": f"educational {keywords}",
280
+ # "type": "video",
281
+ # "maxResults": max_results,
282
+ # "relevanceLanguage": "en",
283
+ # "key": YOUTUBE_API_KEY
284
+ # }
285
+ # ).json()
286
+
287
+ # results = []
288
+ # for item in response.get("items", []):
289
+ # title = item["snippet"]["title"]
290
+ # channel = item["snippet"]["channelTitle"]
291
+ # video_id = item["id"]["videoId"]
292
+ # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
293
+
294
+ # return "\n".join(results) if results else "No recommendations found"
295
+ # except Exception as e:
296
+ # return f"Error: {str(e)}"
297
+
298
+ # # Gradio Interface
299
+ # with gr.Blocks(theme=gr.themes.Soft()) as app:
300
+ # # Login Page
301
+ # with gr.Group() as login_page:
302
+ # gr.Markdown("# 🎓 Educational Learning Management System")
303
+ # username = gr.Textbox(label="Username")
304
+ # password = gr.Textbox(label="Password", type="password")
305
+ # login_btn = gr.Button("Login", variant="primary")
306
+ # login_msg = gr.Markdown()
307
+
308
+ # # Main Interface
309
+ # with gr.Group(visible=False) as main_page:
310
+ # with gr.Row():
311
+ # with gr.Column(scale=1):
312
+ # gr.Markdown("### 📋 Navigation")
313
+ # nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
314
+ # nav_students = gr.Button("👥 Students")
315
+ # nav_teachers = gr.Button("👨‍🏫 Teachers")
316
+ # nav_courses = gr.Button("📚 Courses")
317
+ # nav_youtube = gr.Button("🎥 YouTube Tool")
318
+ # logout_btn = gr.Button("🚪 Logout", variant="stop")
319
+
320
+ # with gr.Column(scale=3):
321
+ # # Dashboard Content
322
+ # dashboard_page = gr.Group()
323
+ # with dashboard_page:
324
+ # gr.Markdown("## 📊 Dashboard")
325
+ # gr.Markdown(f"""
326
+ # ### System Overview
327
+ # - 👥 Total Students: {len(students_data)}
328
+ # - 👨‍🏫 Total Teachers: {len(teachers_data)}
329
+ # - 📚 Total Courses: {len(courses_data)}
330
+
331
+ # ### Quick Actions
332
+ # - View student performance
333
+ # - Access course materials
334
+ # - Generate learning insights
335
+ # """)
336
+
337
+ # # Students Content
338
+ # students_page = gr.Group(visible=False)
339
+ # with students_page:
340
+ # gr.Markdown("## 👥 Students")
341
+ # gr.DataFrame(
342
+ # value=students_data,
343
+ # headers=["ID", "Name", "Grade", "Program"]
344
+ # )
345
+
346
+ # # Teachers Content
347
+ # teachers_page = gr.Group(visible=False)
348
+ # with teachers_page:
349
+ # gr.Markdown("## 👨‍🏫 Teachers")
350
+ # gr.DataFrame(
351
+ # value=teachers_data,
352
+ # headers=["ID", "Name", "Subject", "Qualification"]
353
+ # )
354
+
355
+ # # Courses Content
356
+ # courses_page = gr.Group(visible=False)
357
+ # with courses_page:
358
+ # gr.Markdown("## 📚 Courses")
359
+ # gr.DataFrame(
360
+ # value=courses_data,
361
+ # headers=["ID", "Name", "Instructor", "Level"]
362
+ # )
363
+
364
+ # # YouTube Tool Content
365
+ # youtube_page = gr.Group(visible=False)
366
+ # with youtube_page:
367
+ # gr.Markdown("## Agent for YouTube Content Exploration")
368
+ # with gr.Row():
369
+ # with gr.Column(scale=2):
370
+ # video_url = gr.Textbox(
371
+ # label="YouTube URL",
372
+ # placeholder="https://youtube.com/watch?v=..."
373
+ # )
374
+ # keywords = gr.Textbox(
375
+ # label="Keywords for Recommendations",
376
+ # placeholder="e.g., python programming, machine learning"
377
+ # )
378
+ # analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
379
+
380
+ # with gr.Column(scale=1):
381
+ # video_thumbnail = gr.Image(label="Video Preview")
382
+
383
+ # with gr.Row():
384
+ # with gr.Column():
385
+ # summary = gr.Textbox(label="📝 Summary", lines=8)
386
+ # sentiment = gr.Textbox(label="😊 Content Sentiment")
387
+ # with gr.Column():
388
+ # recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
389
+
390
+ # def login_check(user, pwd):
391
+ # if USER_CREDENTIALS.get(user) == pwd:
392
+ # return {
393
+ # login_page: gr.update(visible=False),
394
+ # main_page: gr.update(visible=True),
395
+ # login_msg: ""
396
+ # }
397
+ # return {
398
+ # login_page: gr.update(visible=True),
399
+ # main_page: gr.update(visible=False),
400
+ # login_msg: "❌ Invalid credentials"
401
+ # }
402
+
403
+ # def show_page(page_name):
404
+ # updates = {
405
+ # dashboard_page: gr.update(visible=False),
406
+ # students_page: gr.update(visible=False),
407
+ # teachers_page: gr.update(visible=False),
408
+ # courses_page: gr.update(visible=False),
409
+ # youtube_page: gr.update(visible=False)
410
+ # }
411
+ # updates[page_name] = gr.update(visible=True)
412
+ # return updates
413
+
414
+ # # Event Handlers
415
+ # login_btn.click(
416
+ # login_check,
417
+ # inputs=[username, password],
418
+ # outputs=[login_page, main_page, login_msg]
419
+ # )
420
+
421
+ # nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
422
+ # nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
423
+ # nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
424
+ # nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
425
+ # nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
426
+
427
+ # analyze_btn.click(
428
+ # process_youtube_video,
429
+ # inputs=[video_url, keywords],
430
+ # outputs=[video_thumbnail, summary, sentiment, recommendations]
431
+ # )
432
+
433
+ # logout_btn.click(
434
+ # lambda: {
435
+ # login_page: gr.update(visible=True),
436
+ # main_page: gr.update(visible=False)
437
+ # },
438
+ # outputs=[login_page, main_page]
439
+ # )
440
+
441
+ # if __name__ == "__main__":
442
+ # app.launch()
443
+
444
+
445
+
446
+ #############################
447
  import subprocess
448
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
449
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
 
507
  login(hf_token)
508
  else:
509
  raise ValueError("HF_TOKEN environment variable not set.")
510
+
511
+ # GOOGLE_API_KEY = "AIzaSyAURQb9jueh3dBQ4SITgKoR0L2_33en3yU"
512
+ # YOUTUBE_API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4"
513
+ # # genai.configure(api_key=GOOGLE_API_KEY)
514
+ # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
515
+ # genai.configure(api_key=GOOGLE_API_KEY)
516
+ # YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
517
+ # print("GOOGLE_API_KEY:", os.getenv("GOOGLE_API_KEY"))
518
+ # print("YOUTUBE_API_KEY:", os.getenv("YOUTUBE_API_KEY"))
519
+
520
+
521
 
522
  # Configuration
523
  USER_CREDENTIALS = {
 
563
  (5, "Mathematics", "Ms. Smith", "Intermediate")
564
  ]
565
 
566
+ def sanitize_text(text):
567
+ """Remove invalid Unicode characters."""
568
+ return text.encode("utf-8", "replace").decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
569
 
570
+ def extract_video_id(url):
571
+ if not url:
572
+ return None
573
+ patterns = [
574
+ r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
575
+ ]
576
+ for pattern in patterns:
577
+ match = re.search(pattern, url)
578
+ if match:
579
+ return match.group(1)
580
+ return None
581
+
582
+
583
+ # def process_youtube_video(url="", keywords=""):
584
+ # try:
585
+ # # Initialize variables
586
+ # thumbnail = None # Default value for thumbnail
587
+ # summary = "No transcript available"
588
+ # sentiment_label = "N/A"
589
+
590
+ # if not url.strip():
591
+ # return None, "Please enter a YouTube URL", "N/A", ""
592
+
593
+ # video_id = extract_video_id(url)
594
+ # if not video_id:
595
+ # return None, "Invalid YouTube URL", "N/A", ""
596
 
597
+ # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
598
 
599
+ # try:
600
+ # # Try multiple transcript options
601
+ # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
602
+
603
+ # try:
604
+ # transcript = transcript_list.find_transcript(['en'])
605
+ # except:
606
+ # try:
607
+ # transcript = transcript_list.find_transcript(['en-US'])
608
+ # except:
609
+ # try:
610
+ # # Try auto-generated
611
+ # transcript = transcript_list.find_generated_transcript(['en'])
612
+ # except:
613
+ # raise NoTranscriptFound()
614
+
615
+ # text = " ".join([t['text'] for t in transcript.fetch()])
616
+
617
+ # # Generate summary
618
+ # model = genai.GenerativeModel("gemini-pro")
619
+ # summary = model.generate_content(f"Summarize this: {text[:4000]}").text
620
+
621
+ # # Analysis
622
+ # sentiment = TextBlob(text[:1000]).sentiment
623
+ # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
624
+
625
+ # except TranscriptsDisabled:
626
+ # # Fallback: Use video metadata if available
627
+ # metadata = get_video_metadata(video_id)
628
+ # summary = metadata.get("description", "⚠️ This video has disabled subtitles. No transcript available.")
629
+ # except NoTranscriptFound:
630
+ # # Fallback: Use video metadata if available
631
+ # metadata = get_video_metadata(video_id)
632
+ # summary = metadata.get("description", "⚠️ No English transcript available. No transcript available.")
633
+ # except Exception as e:
634
+ # return thumbnail, f"⚠️ Error: {str(e)}", "N/A", ""
635
+
636
+ # # Get recommendations
637
+ # if keywords.strip():
638
+ # recommendations = get_recommendations(keywords)
639
+ # else:
640
+ # recommendations = ""
641
+
642
+ # return thumbnail, summary, sentiment_label, recommendations
643
+
644
+ # except Exception as e:
645
+ # return None, f"Error: {str(e)}", "N/A", ""
646
+
647
+
648
+ # def get_video_metadata(video_id):
649
+ # """
650
+ # Fetches video metadata such as title and description using the YouTube Data API.
651
+ # """
652
+ # try:
653
+ # from googleapiclient.discovery import build
654
+
655
+ # # Replace with your YouTube Data API key
656
+ # API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4"
657
+ # youtube = build("youtube", "v3", developerKey=API_KEY)
658
+ # request = youtube.videos().list(part="snippet", id=video_id)
659
+ # response = request.execute()
660
+
661
+ # if "items" in response and len(response["items"]) > 0:
662
+ # snippet = response["items"][0]["snippet"]
663
+ # return {
664
+ # "title": snippet.get("title", "No title available"),
665
+ # "description": snippet.get("description", "No description available"),
666
+ # }
667
+ # return {}
668
+
669
+ # except Exception as e:
670
+ # return {"title": "Error fetching metadata", "description": str(e)}
671
+
672
+ # # Get recommendations
673
+ # if keywords.strip():
674
+ # recommendations = get_recommendations(keywords)
675
+ # else:
676
+ # recommendations = ""
677
+
678
+ # return thumbnail, summary, sentiment_label, recommendations
679
+
680
+ # except Exception as e:
681
+ # return None, f"Error: {str(e)}", "N/A", ""
682
 
683
  def process_youtube_video(url="", keywords=""):
684
  try:
685
+ # Initialize variables
686
  thumbnail = None
687
  summary = "No transcript available"
688
  sentiment_label = "N/A"
 
711
  if not text.strip():
712
  raise ValueError("Transcript is empty")
713
 
 
 
 
 
 
 
 
714
  # Generate summary
715
  model = genai.GenerativeModel("gemini-pro")
716
+ summary = model.generate_content(f"Summarize this: {text[:4000]}").text
717
 
718
  # Extract subtitle information
719
+ subtitle_info = extract_subtitle_info(text)
720
+
721
+ # Sentiment analysis
722
+ sentiment = TextBlob(text[:1000]).sentiment
723
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
724
 
725
  except TranscriptsDisabled:
726
  metadata = get_video_metadata(video_id)
 
749
  """
750
  Extracts the video ID from a YouTube URL.
751
  """
752
+ import re
753
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
754
  return match.group(1) if match else None
755
 
 
759
  Fetches video metadata such as title and description using the YouTube Data API.
760
  """
761
  try:
762
+ from googleapiclient.discovery import build
763
+
764
+ # Replace with your YouTube Data API key
765
+ YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
766
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
767
  request = youtube.videos().list(part="snippet", id=video_id)
768
  response = request.execute()
 
789
  sentences = text.split(". ")
790
 
791
  # Example: Extract key topics or keywords
792
+ from collections import Counter
793
  words = text.split()
794
  common_words = Counter(words).most_common(10)
795
  key_topics = ", ".join([word for word, count in common_words])
 
802
  return f"Error extracting subtitle information: {str(e)}"
803
 
804
 
 
 
 
 
 
 
 
805
 
806
 
807
+
808
+
 
 
 
 
 
809
 
810
 
811
  def get_recommendations(keywords, max_results=5):
 
980
 
981
  if __name__ == "__main__":
982
  app.launch()