Sayiqa commited on
Commit
edf026f
Β·
verified Β·
1 Parent(s): b598aa9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -89
app.py CHANGED
@@ -68,116 +68,354 @@ if hf_token:
68
  else:
69
  raise ValueError("HF_TOKEN environment variable not set.")
70
 
71
- # GOOGLE_API_KEY = "AIzaSyAURQb9jueh3dBQ4SITgKoR0L2_33en3yU"
72
- # YOUTUBE_API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4"
73
- # genai.configure(api_key=GOOGLE_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
75
- genai.configure(api_key=GOOGLE_API_KEY)
76
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
77
- print("GOOGLE_API_KEY:", os.getenv("GOOGLE_API_KEY"))
78
- print("YOUTUBE_API_KEY:", os.getenv("YOUTUBE_API_KEY"))
79
 
 
 
80
 
81
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
82
- import requests
83
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- PROMPT = """You are a YouTube video summarizer. You will be taking the transcript text
86
- and summarizing the entire video, providing the important points within 250 words.
87
- Please provide the summary of the text given here: """
 
 
 
 
88
 
89
- def extract_transcript_details(youtube_video_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  try:
91
- # Extract video ID
92
- if "youtu.be/" in youtube_video_url:
93
- video_id = youtube_video_url.split("youtu.be/")[1].split("?")[0]
94
- else:
95
- video_id = youtube_video_url.split("v=")[1].split("&")[0]
 
96
 
97
- # Attempt to fetch transcript
98
- available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
99
- transcript_data = None
100
  try:
101
- # Try English transcript
102
- # transcript_data = available_transcripts.find_transcript(['en']).fetch()
103
- transcript = YouTubeTranscriptApi.get_transcript('jGwO_UgTS7I', languages=['en'])
104
- print(transcript)
105
- except NoTranscriptFound:
106
- # Try English (India) transcript
107
- transcript_data = available_transcripts.find_transcript(['en-IN']).fetch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  except TranscriptsDisabled:
109
- return "Error: Transcripts are disabled for this video."
 
 
 
 
110
 
111
- # Combine transcript text
112
- transcript = " ".join([entry["text"] for entry in transcript_data])
113
- return transcript
114
- except VideoUnavailable:
115
- return "Error: The video is unavailable."
116
- except Exception as e:
117
- return f"Error extracting transcript: {str(e)}"
118
 
119
- def generate_gemini_content(transcript_text, prompt=PROMPT):
120
- try:
121
- # Placeholder for AI model
122
- # Replace with actual model call if needed
123
- return f"Summary: {transcript_text[:250]}..." # Example summary
124
- except Exception as e:
125
- return f"Error generating summary: {str(e)}"
126
 
127
- def get_video_thumbnail(youtube_video_url):
 
 
 
 
128
  try:
129
- if "youtu.be/" in youtube_video_url:
130
- video_id = youtube_video_url.split("youtu.be/")[1].split("?")[0]
131
- else:
132
- video_id = youtube_video_url.split("v=")[1].split("&")[0]
133
- thumbnail_url = f"http://img.youtube.com/vi/{video_id}/0.jpg"
134
- return thumbnail_url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
- return None
137
 
138
- def process_video(youtube_video_url):
139
- thumbnail = get_video_thumbnail(youtube_video_url)
140
- transcript = extract_transcript_details(youtube_video_url)
141
- if "Error" in transcript:
142
- summary = transcript
143
- else:
144
- summary = generate_gemini_content(transcript)
145
- return thumbnail, summary
146
-
147
- def gradio_interface(youtube_url, user_preferences):
148
- thumbnail, summary = process_video(youtube_url)
149
- recommendations = "Feature under development." # Placeholder
150
- return thumbnail, summary, recommendations
151
-
152
- # Create Gradio Blocks Interface
153
- with gr.Blocks() as demo:
154
- gr.Markdown("# YouTube Transcript to Notes and Recommendations")
155
 
156
- with gr.Row():
157
- youtube_url_input = gr.Textbox(
158
- label="Enter YouTube Video Link",
159
- placeholder="https://www.youtube.com/watch?v=VIDEO_ID"
160
- )
161
- user_preferences_input = gr.Textbox(
162
- label="Enter Your Interests/Preferences",
163
- placeholder="e.g., machine learning, cooking recipes"
164
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- get_notes_button = gr.Button("Get Detailed Notes and Recommendations")
 
 
 
 
 
 
 
 
 
167
 
168
- with gr.Row():
169
- thumbnail_output = gr.Image(label="Video Thumbnail")
 
 
 
 
170
 
171
- with gr.Row():
172
- summary_output = gr.Textbox(label="Detailed Notes", lines=15)
173
- recommendations_output = gr.Textbox(label="Personalized Recommendations", lines=10)
 
 
 
 
 
 
 
 
174
 
175
- get_notes_button.click(
176
- fn=gradio_interface,
177
- inputs=[youtube_url_input, user_preferences_input],
178
- outputs=[thumbnail_output, summary_output, recommendations_output]
 
 
179
  )
180
 
181
- # Launch the Gradio app
182
  if __name__ == "__main__":
183
- demo.launch()
 
68
  else:
69
  raise ValueError("HF_TOKEN environment variable not set.")
70
 
71
+ import subprocess
72
+ subprocess.check_call(["pip", "install", "transformers==4.34.0"])
73
+ subprocess.check_call(["pip", "install", "torch>=1.7.1"])
74
+ subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
75
+ subprocess.check_call(["pip", "install", "pytube"])
76
+ subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
77
+ subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
78
+ subprocess.check_call(["pip", "install", "google-generativeai"])
79
+ subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
80
+ subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
81
+ subprocess.check_call(["pip", "install", "genai"])
82
+ subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
83
+ import transformers
84
+ import torch
85
+ import os
86
+ import youtube_transcript_api
87
+ import pytube
88
+ import gradio
89
+ import PyPDF2
90
+ import pathlib
91
+ import pandas
92
+ import numpy
93
+ import textblob
94
+ import gradio as gr
95
+ from youtube_transcript_api import YouTubeTranscriptApi
96
+ import google.generativeai as genai
97
+ import requests
98
+ from textblob import TextBlob
99
+ import re
100
+ #from google.cloud import generativeai
101
+ from huggingface_hub import login
102
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
103
+ def install_missing_packages():
104
+ required_packages = {
105
+ "torch":">=1.11.0",
106
+ "transformers":">=4.34.0",
107
+ "youtube_transcript_api" :">=0.6.3" ,
108
+ "pytube":None,
109
+ "huggingface_hub": ">=0.19.0",
110
+ "PyPDF2": ">=3.0.1",
111
+ "textblob":">=0.17.1",
112
+ "python-dotenv":">=1.0.0",
113
+ "genai":None,
114
+ "google-generativeai": None,
115
+ "google-cloud-aiplatform":"==1.34.0"
116
+ }
117
+
118
+
119
+ for package, version in required_packages.items():
120
+ try:
121
+ __import__(package)
122
+ except ImportError:
123
+ package_name = f"{package}{version}" if version else package
124
+ subprocess.check_call(["pip", "install", package_name])
125
+
126
+ install_missing_packages()
127
+ # Configuration
128
+
129
+ hf_token = os.getenv("HF_TOKEN")
130
+ if hf_token:
131
+ login(hf_token)
132
+ else:
133
+ raise ValueError("HF_TOKEN environment variable not set.")
134
+
135
+
136
+ # Configuration
137
+ USER_CREDENTIALS = {
138
+ "admin": "password123",
139
+ "teacher": "teach2024",
140
+ "student": "learn2024"
141
+ }
142
+
143
+ import os
144
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
145
+
146
+ # Use environment variables
147
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
148
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 
 
149
 
150
+ if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
151
+ raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
152
 
153
+ genai.configure(api_key=GOOGLE_API_KEY)
154
+
155
+ # Database
156
+ students_data = [
157
+ (1, "Alice", "A", "Computer Science"),
158
+ (2, "Aliaa", "B", "Mathematics"),
159
+ (3, "Charlie", "A", "Machine Learning"),
160
+ (4, "Daan", "A", "Physics"),
161
+ (5, "Jhon", "C", "Math"),
162
+ (6, "Emma", "A+", "Computer Science")
163
+ ]
164
+
165
+ teachers_data = [
166
+ (1, "Dr. Smith", "Math", "MS Mathematics"),
167
+ (2, "Ms. Johnson", "Science", "MSc Physics"),
168
+ (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
169
+ (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
170
+ ]
171
 
172
+ courses_data = [
173
+ (1, "Algebra", "Dr. Smith", "Advanced"),
174
+ (2, "Biology", "Ms. Mia", "Intermediate"),
175
+ (3, "Machine Learning", "Ms. Jack", "Intermediate"),
176
+ (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
177
+ (5, "Mathematics", "Ms. Smith", "Intermediate")
178
+ ]
179
 
180
+ def sanitize_text(text):
181
+ """Remove invalid Unicode characters."""
182
+ return text.encode("utf-8", "replace").decode("utf-8")
183
+
184
+ def extract_video_id(url):
185
+ if not url:
186
+ return None
187
+ patterns = [
188
+ r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
189
+ ]
190
+ for pattern in patterns:
191
+ match = re.search(pattern, url)
192
+ if match:
193
+ return match.group(1)
194
+ return None
195
+
196
+ def process_youtube_video(url="", keywords=""):
197
  try:
198
+ if not url.strip():
199
+ return None, "Please enter a YouTube URL", "N/A", ""
200
+
201
+ video_id = extract_video_id(url)
202
+ if not video_id:
203
+ return None, "Invalid YouTube URL", "N/A", ""
204
 
 
 
 
205
  try:
206
+ # Try multiple transcript options
207
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
208
+
209
+ try:
210
+ transcript = transcript_list.find_transcript(['en'])
211
+ except:
212
+ try:
213
+ transcript = transcript_list.find_transcript(['en-US'])
214
+ except:
215
+ try:
216
+ # Try auto-generated
217
+ transcript = transcript_list.find_generated_transcript(['en'])
218
+ except:
219
+ raise NoTranscriptFound()
220
+
221
+ text = " ".join([t['text'] for t in transcript.fetch()])
222
+
223
+ # Generate summary
224
+ model = genai.GenerativeModel("gemini-pro")
225
+ summary = model.generate_content(f"Summarize this: {text[:4000]}").text
226
+
227
+ # Analysis
228
+ sentiment = TextBlob(text[:1000]).sentiment
229
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
230
+
231
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
232
+
233
  except TranscriptsDisabled:
234
+ return thumbnail, "⚠️ This video has disabled subtitles. Please try another video.", "N/A", ""
235
+ except NoTranscriptFound:
236
+ return thumbnail, "⚠️ No English transcript available. Please try another video.", "N/A", ""
237
+ except Exception as e:
238
+ return None, f"⚠️ Error: {str(e)}", "N/A", ""
239
 
240
+ # Get recommendations
241
+ if keywords.strip():
242
+ recommendations = get_recommendations(keywords)
243
+ else:
244
+ recommendations = ""
 
 
245
 
246
+ return thumbnail, summary, sentiment_label, recommendations
 
 
 
 
 
 
247
 
248
+ except Exception as e:
249
+ return None, f"Error: {str(e)}", "N/A", ""
250
+ def get_recommendations(keywords, max_results=5):
251
+ if not keywords:
252
+ return "Please provide search keywords"
253
  try:
254
+ response = requests.get(
255
+ "https://www.googleapis.com/youtube/v3/search",
256
+ params={
257
+ "part": "snippet",
258
+ "q": f"educational {keywords}",
259
+ "type": "video",
260
+ "maxResults": max_results,
261
+ "relevanceLanguage": "en",
262
+ "key": YOUTUBE_API_KEY
263
+ }
264
+ ).json()
265
+
266
+ results = []
267
+ for item in response.get("items", []):
268
+ title = item["snippet"]["title"]
269
+ channel = item["snippet"]["channelTitle"]
270
+ video_id = item["id"]["videoId"]
271
+ results.append(f"πŸ“Ί {title}\nπŸ‘€ {channel}\nπŸ”— https://youtube.com/watch?v={video_id}\n")
272
+
273
+ return "\n".join(results) if results else "No recommendations found"
274
  except Exception as e:
275
+ return f"Error: {str(e)}"
276
 
277
+ # Gradio Interface
278
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
279
+ # Login Page
280
+ with gr.Group() as login_page:
281
+ gr.Markdown("# πŸŽ“ Educational Learning Management System")
282
+ username = gr.Textbox(label="Username")
283
+ password = gr.Textbox(label="Password", type="password")
284
+ login_btn = gr.Button("Login", variant="primary")
285
+ login_msg = gr.Markdown()
 
 
 
 
 
 
 
 
286
 
287
+ # Main Interface
288
+ with gr.Group(visible=False) as main_page:
289
+ with gr.Row():
290
+ with gr.Column(scale=1):
291
+ gr.Markdown("### πŸ“‹ Navigation")
292
+ nav_dashboard = gr.Button("πŸ“Š Dashboard", variant="primary")
293
+ nav_students = gr.Button("πŸ‘₯ Students")
294
+ nav_teachers = gr.Button("πŸ‘¨β€πŸ« Teachers")
295
+ nav_courses = gr.Button("πŸ“š Courses")
296
+ nav_youtube = gr.Button("πŸŽ₯ YouTube Tool")
297
+ logout_btn = gr.Button("πŸšͺ Logout", variant="stop")
298
+
299
+ with gr.Column(scale=3):
300
+ # Dashboard Content
301
+ dashboard_page = gr.Group()
302
+ with dashboard_page:
303
+ gr.Markdown("## πŸ“Š Dashboard")
304
+ gr.Markdown(f"""
305
+ ### System Overview
306
+ - πŸ‘₯ Total Students: {len(students_data)}
307
+ - πŸ‘¨β€πŸ« Total Teachers: {len(teachers_data)}
308
+ - πŸ“š Total Courses: {len(courses_data)}
309
+
310
+ ### Quick Actions
311
+ - View student performance
312
+ - Access course materials
313
+ - Generate learning insights
314
+ """)
315
+
316
+ # Students Content
317
+ students_page = gr.Group(visible=False)
318
+ with students_page:
319
+ gr.Markdown("## πŸ‘₯ Students")
320
+ gr.DataFrame(
321
+ value=students_data,
322
+ headers=["ID", "Name", "Grade", "Program"]
323
+ )
324
+
325
+ # Teachers Content
326
+ teachers_page = gr.Group(visible=False)
327
+ with teachers_page:
328
+ gr.Markdown("## πŸ‘¨β€πŸ« Teachers")
329
+ gr.DataFrame(
330
+ value=teachers_data,
331
+ headers=["ID", "Name", "Subject", "Qualification"]
332
+ )
333
+
334
+ # Courses Content
335
+ courses_page = gr.Group(visible=False)
336
+ with courses_page:
337
+ gr.Markdown("## πŸ“š Courses")
338
+ gr.DataFrame(
339
+ value=courses_data,
340
+ headers=["ID", "Name", "Instructor", "Level"]
341
+ )
342
+
343
+ # YouTube Tool Content
344
+ youtube_page = gr.Group(visible=False)
345
+ with youtube_page:
346
+ gr.Markdown("## Agent for YouTube Content Exploration")
347
+ with gr.Row():
348
+ with gr.Column(scale=2):
349
+ video_url = gr.Textbox(
350
+ label="YouTube URL",
351
+ placeholder="https://youtube.com/watch?v=..."
352
+ )
353
+ keywords = gr.Textbox(
354
+ label="Keywords for Recommendations",
355
+ placeholder="e.g., python programming, machine learning"
356
+ )
357
+ analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
358
+
359
+ with gr.Column(scale=1):
360
+ video_thumbnail = gr.Image(label="Video Preview")
361
+
362
+ with gr.Row():
363
+ with gr.Column():
364
+ summary = gr.Textbox(label="πŸ“ Summary", lines=8)
365
+ sentiment = gr.Textbox(label="😊 Content Sentiment")
366
+ with gr.Column():
367
+ recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
368
+
369
+ def login_check(user, pwd):
370
+ if USER_CREDENTIALS.get(user) == pwd:
371
+ return {
372
+ login_page: gr.update(visible=False),
373
+ main_page: gr.update(visible=True),
374
+ login_msg: ""
375
+ }
376
+ return {
377
+ login_page: gr.update(visible=True),
378
+ main_page: gr.update(visible=False),
379
+ login_msg: "❌ Invalid credentials"
380
+ }
381
 
382
+ def show_page(page_name):
383
+ updates = {
384
+ dashboard_page: gr.update(visible=False),
385
+ students_page: gr.update(visible=False),
386
+ teachers_page: gr.update(visible=False),
387
+ courses_page: gr.update(visible=False),
388
+ youtube_page: gr.update(visible=False)
389
+ }
390
+ updates[page_name] = gr.update(visible=True)
391
+ return updates
392
 
393
+ # Event Handlers
394
+ login_btn.click(
395
+ login_check,
396
+ inputs=[username, password],
397
+ outputs=[login_page, main_page, login_msg]
398
+ )
399
 
400
+ nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
401
+ nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
402
+ nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
403
+ nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
404
+ nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
405
+
406
+ analyze_btn.click(
407
+ process_youtube_video,
408
+ inputs=[video_url, keywords],
409
+ outputs=[video_thumbnail, summary, sentiment, recommendations]
410
+ )
411
 
412
+ logout_btn.click(
413
+ lambda: {
414
+ login_page: gr.update(visible=True),
415
+ main_page: gr.update(visible=False)
416
+ },
417
+ outputs=[login_page, main_page]
418
  )
419
 
 
420
  if __name__ == "__main__":
421
+ app.launch()