Sayiqa commited on
Commit
1258843
·
verified ·
1 Parent(s): 7d41522

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -777
app.py CHANGED
@@ -1,449 +1,3 @@
1
- # import subprocess
2
- # subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
- # subprocess.check_call(["pip", "install", "torch>=1.7.1"])
4
- # subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
5
- # subprocess.check_call(["pip", "install", "pytube"])
6
- # subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
7
- # subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
8
- # subprocess.check_call(["pip", "install", "google-generativeai"])
9
- # subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
10
- # subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
- # subprocess.check_call(["pip", "install", "genai"])
12
- # subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
- # import transformers
14
- # import torch
15
- # import os
16
- # import youtube_transcript_api
17
- # import pytube
18
- # import gradio
19
- # import PyPDF2
20
- # import pathlib
21
- # import pandas
22
- # import numpy
23
- # import textblob
24
- # import gradio as gr
25
- # from youtube_transcript_api import YouTubeTranscriptApi
26
- # import google.generativeai as genai
27
- # import requests
28
- # from textblob import TextBlob
29
- # import re
30
- # #from google.cloud import generativeai
31
- # from huggingface_hub import login
32
- # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
33
- # def install_missing_packages():
34
- # required_packages = {
35
- # "torch":">=1.11.0",
36
- # "transformers":">=4.34.0",
37
- # "youtube_transcript_api" :">=0.6.3" ,
38
- # "pytube":None,
39
- # "huggingface_hub": ">=0.19.0",
40
- # "PyPDF2": ">=3.0.1",
41
- # "textblob":">=0.17.1",
42
- # "python-dotenv":">=1.0.0",
43
- # "genai":None,
44
- # "google-generativeai": None,
45
- # "google-cloud-aiplatform":"==1.34.0"
46
- # }
47
-
48
-
49
- # for package, version in required_packages.items():
50
- # try:
51
- # __import__(package)
52
- # except ImportError:
53
- # package_name = f"{package}{version}" if version else package
54
- # subprocess.check_call(["pip", "install", package_name])
55
-
56
- # install_missing_packages()
57
- # # Configuration
58
-
59
- # hf_token = os.getenv("HF_TOKEN")
60
- # if hf_token:
61
- # login(hf_token)
62
- # else:
63
- # raise ValueError("HF_TOKEN environment variable not set.")
64
-
65
- # # Configuration
66
- # USER_CREDENTIALS = {
67
- # "admin": "password123",
68
- # "teacher": "teach2024",
69
- # "student": "learn2024"
70
- # }
71
-
72
- # import os
73
- # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
74
-
75
- # # Use environment variables
76
- # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
77
- # YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
78
-
79
- # if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
80
- # raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
81
-
82
- # genai.configure(api_key=GOOGLE_API_KEY)
83
-
84
- # # Database
85
- # students_data = [
86
- # (1, "Alice", "A", "Computer Science"),
87
- # (2, "Aliaa", "B", "Mathematics"),
88
- # (3, "Charlie", "A", "Machine Learning"),
89
- # (4, "Daan", "A", "Physics"),
90
- # (5, "Jhon", "C", "Math"),
91
- # (6, "Emma", "A+", "Computer Science")
92
- # ]
93
-
94
- # teachers_data = [
95
- # (1, "Dr. Smith", "Math", "MS Mathematics"),
96
- # (2, "Ms. Johnson", "Science", "MSc Physics"),
97
- # (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
98
- # (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
99
- # ]
100
-
101
- # courses_data = [
102
- # (1, "Algebra", "Dr. Smith", "Advanced"),
103
- # (2, "Biology", "Ms. Mia", "Intermediate"),
104
- # (3, "Machine Learning", "Ms. Jack", "Intermediate"),
105
- # (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
106
- # (5, "Mathematics", "Ms. Smith", "Intermediate")
107
- # ]
108
-
109
- # def sanitize_text(text):
110
- # """Remove invalid Unicode characters."""
111
- # return text.encode("utf-8", "replace").decode("utf-8")
112
-
113
- # def extract_video_id(url):
114
- # if not url:
115
- # return None
116
- # patterns = [
117
- # r'(?:v=|\/videos\/|embed\/|youtu.be\/|\/v\/|\/e\/|watch\?v=|\/watch\?v=)([^#\&\?]*)'
118
- # ]
119
- # for pattern in patterns:
120
- # match = re.search(pattern, url)
121
- # if match:
122
- # return match.group(1)
123
- # return None
124
-
125
-
126
-
127
- # from textblob import TextBlob
128
- # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
129
- # import re
130
- # from collections import Counter
131
- # from googleapiclient.discovery import build
132
-
133
- # def process_youtube_video(url="", keywords=""):
134
- # try:
135
- # #Initialize variables
136
- # thumbnail = None
137
- # summary = "No transcript available"
138
- # sentiment_label = "N/A"
139
- # recommendations = ""
140
- # subtitle_info = "No additional information available"
141
-
142
- # if not url.strip():
143
- # return None, "Please enter a YouTube URL", "N/A", "", ""
144
-
145
- # video_id = extract_video_id(url)
146
- # if not video_id:
147
- # return None, "Invalid YouTube URL", "N/A", "", ""
148
-
149
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
150
-
151
- # try:
152
- # # Fetch transcript
153
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
154
- # transcript = None
155
- # try:
156
- # transcript = transcript_list.find_transcript(['en'])
157
- # except:
158
- # transcript = transcript_list.find_generated_transcript(['en'])
159
-
160
- # text = " ".join([t['text'] for t in transcript.fetch()])
161
- # if not text.strip():
162
- # raise ValueError("Transcript is empty")
163
-
164
- # # Clean up the text for sentiment analysis
165
- # cleaned_text = clean_text_for_analysis(text)
166
-
167
- # # Sentiment analysis
168
- # sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
169
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
170
-
171
- # # Generate summary
172
- # model = genai.GenerativeModel("gemini-pro")
173
- # summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
174
-
175
- # # Extract subtitle information
176
- # subtitle_info = extract_subtitle_info(cleaned_text)
177
-
178
- # except TranscriptsDisabled:
179
- # metadata = get_video_metadata(video_id)
180
- # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
181
- # sentiment_label = "N/A"
182
- # subtitle_info = "No subtitles available for analysis."
183
- # except NoTranscriptFound:
184
- # metadata = get_video_metadata(video_id)
185
- # summary = metadata.get("description", "⚠️ No English transcript available.")
186
- # sentiment_label = "N/A"
187
- # subtitle_info = "No subtitles available for analysis."
188
- # except Exception as e:
189
- # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
190
-
191
- # # Get recommendations
192
- # if keywords.strip():
193
- # recommendations = get_recommendations(keywords)
194
-
195
- # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
196
-
197
- # except Exception as e:
198
- # return None, f"Error: {str(e)}", "N/A", "", ""
199
-
200
-
201
- # def extract_video_id(url):
202
- # """
203
- # Extracts the video ID from a YouTube URL.
204
- # """
205
- # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
206
- # return match.group(1) if match else None
207
-
208
-
209
- # def get_video_metadata(video_id):
210
- # """
211
- # Fetches video metadata such as title and description using the YouTube Data API.
212
- # """
213
- # try:
214
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
215
- # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
216
- # request = youtube.videos().list(part="snippet", id=video_id)
217
- # response = request.execute()
218
-
219
- # if "items" in response and len(response["items"]) > 0:
220
- # snippet = response["items"][0]["snippet"]
221
- # return {
222
- # "title": snippet.get("title", "No title available"),
223
- # "description": snippet.get("description", "No description available"),
224
- # }
225
- # return {}
226
-
227
- # except Exception as e:
228
- # return {"title": "Error fetching metadata", "description": str(e)}
229
-
230
-
231
- # def extract_subtitle_info(text):
232
- # """
233
- # Extracts meaningful information from the subtitles.
234
- # This could include topics, key insights, or a breakdown of the content.
235
- # """
236
- # try:
237
- # # Split text into sentences for better analysis
238
- # sentences = text.split(". ")
239
-
240
- # # Example: Extract key topics or keywords
241
- # words = text.split()
242
- # common_words = Counter(words).most_common(10)
243
- # key_topics = ", ".join([word for word, count in common_words])
244
-
245
- # # Example: Provide a breakdown of the content
246
- # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
247
-
248
- # return info
249
- # except Exception as e:
250
- # return f"Error extracting subtitle information: {str(e)}"
251
-
252
-
253
- # def clean_text_for_analysis(text):
254
- # """
255
- # Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
256
- # """
257
- # # Remove extra spaces and line breaks
258
- # cleaned_text = " ".join(text.split())
259
- # return cleaned_text
260
-
261
-
262
- # def get_recommendations(keywords):
263
- # """
264
- # Fetches related video recommendations based on the provided keywords.
265
- # This function can be expanded with a proper API or custom logic.
266
- # """
267
- # # Placeholder for fetching recommendations based on keywords
268
- # return f"Recommendations for: {keywords}" # Dummy return for now
269
-
270
-
271
- # def get_recommendations(keywords, max_results=5):
272
- # if not keywords:
273
- # return "Please provide search keywords"
274
- # try:
275
- # response = requests.get(
276
- # "https://www.googleapis.com/youtube/v3/search",
277
- # params={
278
- # "part": "snippet",
279
- # "q": f"educational {keywords}",
280
- # "type": "video",
281
- # "maxResults": max_results,
282
- # "relevanceLanguage": "en",
283
- # "key": YOUTUBE_API_KEY
284
- # }
285
- # ).json()
286
-
287
- # results = []
288
- # for item in response.get("items", []):
289
- # title = item["snippet"]["title"]
290
- # channel = item["snippet"]["channelTitle"]
291
- # video_id = item["id"]["videoId"]
292
- # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
293
-
294
- # return "\n".join(results) if results else "No recommendations found"
295
- # except Exception as e:
296
- # return f"Error: {str(e)}"
297
-
298
- # # Gradio Interface
299
- # with gr.Blocks(theme=gr.themes.Soft()) as app:
300
- # # Login Page
301
- # with gr.Group() as login_page:
302
- # gr.Markdown("# 🎓 Educational Learning Management System")
303
- # username = gr.Textbox(label="Username")
304
- # password = gr.Textbox(label="Password", type="password")
305
- # login_btn = gr.Button("Login", variant="primary")
306
- # login_msg = gr.Markdown()
307
-
308
- # # Main Interface
309
- # with gr.Group(visible=False) as main_page:
310
- # with gr.Row():
311
- # with gr.Column(scale=1):
312
- # gr.Markdown("### 📋 Navigation")
313
- # nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
314
- # nav_students = gr.Button("👥 Students")
315
- # nav_teachers = gr.Button("👨‍🏫 Teachers")
316
- # nav_courses = gr.Button("📚 Courses")
317
- # nav_youtube = gr.Button("🎥 YouTube Tool")
318
- # logout_btn = gr.Button("🚪 Logout", variant="stop")
319
-
320
- # with gr.Column(scale=3):
321
- # # Dashboard Content
322
- # dashboard_page = gr.Group()
323
- # with dashboard_page:
324
- # gr.Markdown("## 📊 Dashboard")
325
- # gr.Markdown(f"""
326
- # ### System Overview
327
- # - 👥 Total Students: {len(students_data)}
328
- # - 👨‍🏫 Total Teachers: {len(teachers_data)}
329
- # - 📚 Total Courses: {len(courses_data)}
330
-
331
- # ### Quick Actions
332
- # - View student performance
333
- # - Access course materials
334
- # - Generate learning insights
335
- # """)
336
-
337
- # # Students Content
338
- # students_page = gr.Group(visible=False)
339
- # with students_page:
340
- # gr.Markdown("## 👥 Students")
341
- # gr.DataFrame(
342
- # value=students_data,
343
- # headers=["ID", "Name", "Grade", "Program"]
344
- # )
345
-
346
- # # Teachers Content
347
- # teachers_page = gr.Group(visible=False)
348
- # with teachers_page:
349
- # gr.Markdown("## 👨‍🏫 Teachers")
350
- # gr.DataFrame(
351
- # value=teachers_data,
352
- # headers=["ID", "Name", "Subject", "Qualification"]
353
- # )
354
-
355
- # # Courses Content
356
- # courses_page = gr.Group(visible=False)
357
- # with courses_page:
358
- # gr.Markdown("## 📚 Courses")
359
- # gr.DataFrame(
360
- # value=courses_data,
361
- # headers=["ID", "Name", "Instructor", "Level"]
362
- # )
363
-
364
- # # YouTube Tool Content
365
- # youtube_page = gr.Group(visible=False)
366
- # with youtube_page:
367
- # gr.Markdown("## Agent for YouTube Content Exploration")
368
- # with gr.Row():
369
- # with gr.Column(scale=2):
370
- # video_url = gr.Textbox(
371
- # label="YouTube URL",
372
- # placeholder="https://youtube.com/watch?v=..."
373
- # )
374
- # keywords = gr.Textbox(
375
- # label="Keywords for Recommendations",
376
- # placeholder="e.g., python programming, machine learning"
377
- # )
378
- # analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
379
-
380
- # with gr.Column(scale=1):
381
- # video_thumbnail = gr.Image(label="Video Preview")
382
-
383
- # with gr.Row():
384
- # with gr.Column():
385
- # summary = gr.Textbox(label="📝 Summary", lines=8)
386
- # sentiment = gr.Textbox(label="😊 Content Sentiment")
387
- # with gr.Column():
388
- # recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
389
-
390
- # def login_check(user, pwd):
391
- # if USER_CREDENTIALS.get(user) == pwd:
392
- # return {
393
- # login_page: gr.update(visible=False),
394
- # main_page: gr.update(visible=True),
395
- # login_msg: ""
396
- # }
397
- # return {
398
- # login_page: gr.update(visible=True),
399
- # main_page: gr.update(visible=False),
400
- # login_msg: "❌ Invalid credentials"
401
- # }
402
-
403
- # def show_page(page_name):
404
- # updates = {
405
- # dashboard_page: gr.update(visible=False),
406
- # students_page: gr.update(visible=False),
407
- # teachers_page: gr.update(visible=False),
408
- # courses_page: gr.update(visible=False),
409
- # youtube_page: gr.update(visible=False)
410
- # }
411
- # updates[page_name] = gr.update(visible=True)
412
- # return updates
413
-
414
- # # Event Handlers
415
- # login_btn.click(
416
- # login_check,
417
- # inputs=[username, password],
418
- # outputs=[login_page, main_page, login_msg]
419
- # )
420
-
421
- # nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
422
- # nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
423
- # nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
424
- # nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
425
- # nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
426
-
427
- # analyze_btn.click(
428
- # process_youtube_video,
429
- # inputs=[video_url, keywords],
430
- # outputs=[video_thumbnail, summary, sentiment, recommendations]
431
- # )
432
-
433
- # logout_btn.click(
434
- # lambda: {
435
- # login_page: gr.update(visible=True),
436
- # main_page: gr.update(visible=False)
437
- # },
438
- # outputs=[login_page, main_page]
439
- # )
440
-
441
- # if __name__ == "__main__":
442
- # app.launch()
443
-
444
-
445
-
446
- #############################
447
  import subprocess
448
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
449
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
@@ -456,7 +10,7 @@ subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
456
  subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
457
  subprocess.check_call(["pip", "install", "genai"])
458
  subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
459
- subprocess.check_call(["pip", "install", "google-api-python-client"])
460
  import transformers
461
  import torch
462
  import os
@@ -471,10 +25,12 @@ import textblob
471
  import gradio as gr
472
  from youtube_transcript_api import YouTubeTranscriptApi
473
  import google.generativeai as genai
 
474
  import requests
475
  from textblob import TextBlob
476
  import re
477
  #from google.cloud import generativeai
 
478
  from huggingface_hub import login
479
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
480
  def install_missing_packages():
@@ -489,7 +45,8 @@ def install_missing_packages():
489
  "python-dotenv":">=1.0.0",
490
  "genai":None,
491
  "google-generativeai": None,
492
- "google-cloud-aiplatform":"==1.34.0"
 
493
  }
494
 
495
 
@@ -570,316 +127,19 @@ def extract_video_id(url):
570
  return match.group(1)
571
  return None
572
 
573
-
574
- # def process_youtube_video(url="", keywords=""):
575
- # try:
576
- # # Initialize variables
577
- # thumbnail = None # Default value for thumbnail
578
- # summary = "No transcript available"
579
- # sentiment_label = "N/A"
580
-
581
- # if not url.strip():
582
- # return None, "Please enter a YouTube URL", "N/A", ""
583
-
584
- # video_id = extract_video_id(url)
585
- # if not video_id:
586
- # return None, "Invalid YouTube URL", "N/A", ""
587
-
588
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
589
-
590
- # try:
591
- # # Try multiple transcript options
592
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
593
-
594
- # try:
595
- # transcript = transcript_list.find_transcript(['en'])
596
- # except:
597
- # try:
598
- # transcript = transcript_list.find_transcript(['en-US'])
599
- # except:
600
- # try:
601
- # # Try auto-generated
602
- # transcript = transcript_list.find_generated_transcript(['en'])
603
- # except:
604
- # raise NoTranscriptFound()
605
-
606
- # text = " ".join([t['text'] for t in transcript.fetch()])
607
-
608
- # # Generate summary
609
- # model = genai.GenerativeModel("gemini-pro")
610
- # summary = model.generate_content(f"Summarize this: {text[:4000]}").text
611
-
612
- # # Analysis
613
- # sentiment = TextBlob(text[:1000]).sentiment
614
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
615
-
616
- # except TranscriptsDisabled:
617
- # # Fallback: Use video metadata if available
618
- # metadata = get_video_metadata(video_id)
619
- # summary = metadata.get("description", "⚠️ This video has disabled subtitles. No transcript available.")
620
- # except NoTranscriptFound:
621
- # # Fallback: Use video metadata if available
622
- # metadata = get_video_metadata(video_id)
623
- # summary = metadata.get("description", "⚠️ No English transcript available. No transcript available.")
624
- # except Exception as e:
625
- # return thumbnail, f"⚠️ Error: {str(e)}", "N/A", ""
626
-
627
- # # Get recommendations
628
- # if keywords.strip():
629
- # recommendations = get_recommendations(keywords)
630
- # else:
631
- # recommendations = ""
632
-
633
- # return thumbnail, summary, sentiment_label, recommendations
634
-
635
- # except Exception as e:
636
- # return None, f"Error: {str(e)}", "N/A", ""
637
-
638
-
639
- # def get_video_metadata(video_id):
640
- # """
641
- # Fetches video metadata such as title and description using the YouTube Data API.
642
- # """
643
- # try:
644
- # from googleapiclient.discovery import build
645
-
646
- # # Replace with your YouTube Data API key
647
- # API_KEY = "AIzaSyB7X-RYjZmUuDSMTQsvCfyzURw5bhqOto4"
648
- # youtube = build("youtube", "v3", developerKey=API_KEY)
649
- # request = youtube.videos().list(part="snippet", id=video_id)
650
- # response = request.execute()
651
-
652
- # if "items" in response and len(response["items"]) > 0:
653
- # snippet = response["items"][0]["snippet"]
654
- # return {
655
- # "title": snippet.get("title", "No title available"),
656
- # "description": snippet.get("description", "No description available"),
657
- # }
658
- # return {}
659
-
660
- # except Exception as e:
661
- # return {"title": "Error fetching metadata", "description": str(e)}
662
-
663
- # # Get recommendations
664
- # if keywords.strip():
665
- # recommendations = get_recommendations(keywords)
666
- # else:
667
- # recommendations = ""
668
-
669
- # return thumbnail, summary, sentiment_label, recommendations
670
-
671
- # except Exception as e:
672
- # return None, f"Error: {str(e)}", "N/A", ""
673
-
674
  from textblob import TextBlob
675
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
676
  import re
677
  from collections import Counter
678
  from googleapiclient.discovery import build
679
- # def process_youtube_video(url="", keywords=""):
680
- # try:
681
- # # Initialize variables
682
- # thumbnail = None
683
- # summary = "No transcript available"
684
- # sentiment_label = "N/A"
685
- # recommendations = ""
686
- # subtitle_info = "No additional information available"
687
-
688
- # if not url.strip():
689
- # return None, "Please enter a YouTube URL", "N/A", "", ""
690
-
691
- # video_id = extract_video_id(url)
692
- # if not video_id:
693
- # return None, "Invalid YouTube URL", "N/A", "", ""
694
-
695
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
696
-
697
- # try:
698
- # # Fetch transcript
699
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
700
- # transcript = None
701
- # try:
702
- # transcript = transcript_list.find_transcript(['en'])
703
- # except:
704
- # transcript = transcript_list.find_generated_transcript(['en'])
705
-
706
- # text = " ".join([t['text'] for t in transcript.fetch()])
707
- # if not text.strip():
708
- # raise ValueError("Transcript is empty")
709
-
710
- # # Generate summary
711
- # model = genai.GenerativeModel("gemini-pro")
712
- # summary = model.generate_content(f"Summarize this: {text[:4000]}").text
713
-
714
- # # Extract subtitle information
715
- # subtitle_info = extract_subtitle_info(text)
716
-
717
- # # Sentiment analysis
718
- # sentiment = TextBlob(text[:1000]).sentiment
719
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
720
-
721
- # except TranscriptsDisabled:
722
- # metadata = get_video_metadata(video_id)
723
- # summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
724
- # sentiment_label = "N/A"
725
- # subtitle_info = "No subtitles available for analysis."
726
- # except NoTranscriptFound:
727
- # metadata = get_video_metadata(video_id)
728
- # summary = metadata.get("description", "⚠️ No English transcript available.")
729
- # sentiment_label = "N/A"
730
- # subtitle_info = "No subtitles available for analysis."
731
- # except Exception as e:
732
- # return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
733
-
734
- # # Get recommendations
735
- # if keywords.strip():
736
- # recommendations = get_recommendations(keywords)
737
-
738
- # return thumbnail, summary, sentiment_label, subtitle_info, recommendations
739
-
740
- # except Exception as e:
741
- # return None, f"Error: {str(e)}", "N/A", "", ""
742
-
743
-
744
- # def extract_video_id(url):
745
- # """
746
- # Extracts the video ID from a YouTube URL.
747
- # """
748
- # import re
749
- # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
750
- # return match.group(1) if match else None
751
-
752
-
753
- # def get_video_metadata(video_id):
754
- # """
755
- # Fetches video metadata such as title and description using the YouTube Data API.
756
- # """
757
- # try:
758
- # from googleapiclient.discovery import build
759
-
760
- # # Replace with your YouTube Data API key
761
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98"
762
- # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
763
- # request = youtube.videos().list(part="snippet", id=video_id)
764
- # response = request.execute()
765
-
766
- # if "items" in response and len(response["items"]) > 0:
767
- # snippet = response["items"][0]["snippet"]
768
- # return {
769
- # "title": snippet.get("title", "No title available"),
770
- # "description": snippet.get("description", "No description available"),
771
- # }
772
- # return {}
773
-
774
- # except Exception as e:
775
- # return {"title": "Error fetching metadata", "description": str(e)}
776
-
777
-
778
- # def extract_subtitle_info(text):
779
- # """
780
- # Extracts meaningful information from the subtitles.
781
- # This could include topics, key insights, or a breakdown of the content.
782
- # """
783
- # try:
784
- # # Split text into sentences for better analysis
785
- # sentences = text.split(". ")
786
-
787
- # # Example: Extract key topics or keywords
788
- # from collections import Counter
789
- # words = text.split()
790
- # common_words = Counter(words).most_common(10)
791
- # key_topics = ", ".join([word for word, count in common_words])
792
-
793
- # # Example: Provide a breakdown of the content
794
- # info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
795
-
796
- # return info
797
- # except Exception as e:
798
- # return f"Error extracting subtitle information: {str(e)}"
799
-
800
- ##########
801
- def process_youtube_video(url="", keywords=""):
802
- try:
803
- # Initialize variables
804
- thumbnail = None
805
- summary = "No transcript available"
806
- sentiment_label = "N/A"
807
- recommendations = ""
808
- subtitle_info = "No additional information available"
809
-
810
- if not url.strip():
811
- return None, "Please enter a YouTube URL", "N/A", "", ""
812
-
813
- video_id = extract_video_id(url)
814
- if not video_id:
815
- return None, "Invalid YouTube URL", "N/A", "", ""
816
-
817
- thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
818
-
819
- try:
820
- # Fetch transcript
821
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
822
- transcript = None
823
- try:
824
- transcript = transcript_list.find_transcript(['en'])
825
- except:
826
- transcript = transcript_list.find_generated_transcript(['en'])
827
-
828
- text = " ".join([t['text'] for t in transcript.fetch()])
829
- if not text.strip():
830
- raise ValueError("Transcript is empty")
831
-
832
- # Clean up the text for sentiment analysis
833
- cleaned_text = clean_text_for_analysis(text)
834
-
835
- # Sentiment analysis
836
- sentiment = TextBlob(cleaned_text).sentiment # Use cleaned text for sentiment analysis
837
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
838
-
839
- # Generate summary
840
- model = genai.GenerativeModel("gemini-pro")
841
- summary = model.generate_content(f"Summarize this: {cleaned_text[:4000]}").text
842
-
843
- # Extract subtitle information
844
- subtitle_info = extract_subtitle_info(cleaned_text)
845
-
846
- except TranscriptsDisabled:
847
- metadata = get_video_metadata(video_id)
848
- summary = metadata.get("description", "⚠️ This video has disabled subtitles.")
849
- sentiment_label = "N/A"
850
- subtitle_info = "No subtitles available for analysis."
851
- except NoTranscriptFound:
852
- metadata = get_video_metadata(video_id)
853
- summary = metadata.get("description", "⚠️ No English transcript available.")
854
- sentiment_label = "N/A"
855
- subtitle_info = "No subtitles available for analysis."
856
- except Exception as e:
857
- return thumbnail, f"⚠️ Error processing transcript: {str(e)}", "N/A", "", ""
858
-
859
- # Get recommendations
860
- if keywords.strip():
861
- recommendations = get_recommendations(keywords)
862
-
863
- return thumbnail, summary, sentiment_label, subtitle_info, recommendations
864
-
865
- except Exception as e:
866
- return None, f"Error: {str(e)}", "N/A", "", ""
867
 
868
 
869
  def extract_video_id(url):
870
- """
871
- Extracts the video ID from a YouTube URL.
872
- """
873
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
874
  return match.group(1) if match else None
875
 
876
-
877
  def get_video_metadata(video_id):
878
- """
879
- Fetches video metadata such as title and description using the YouTube Data API.
880
- """
881
  try:
882
- YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube Data API key
883
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
884
  request = youtube.videos().list(part="snippet", id=video_id)
885
  response = request.execute()
@@ -895,48 +155,20 @@ def get_video_metadata(video_id):
895
  except Exception as e:
896
  return {"title": "Error fetching metadata", "description": str(e)}
897
 
 
 
898
 
899
  def extract_subtitle_info(text):
900
- """
901
- Extracts meaningful information from the subtitles.
902
- This could include topics, key insights, or a breakdown of the content.
903
- """
904
  try:
905
- # Split text into sentences for better analysis
906
  sentences = text.split(". ")
907
-
908
- # Example: Extract key topics or keywords
909
  words = text.split()
910
  common_words = Counter(words).most_common(10)
911
  key_topics = ", ".join([word for word, count in common_words])
912
-
913
- # Example: Provide a breakdown of the content
914
  info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
915
-
916
  return info
917
  except Exception as e:
918
  return f"Error extracting subtitle information: {str(e)}"
919
 
920
-
921
- def clean_text_for_analysis(text):
922
- """
923
- Cleans the transcript text by removing extra spaces, line breaks, and non-text elements.
924
- """
925
- # Remove extra spaces and line breaks
926
- cleaned_text = " ".join(text.split())
927
- return cleaned_text
928
-
929
-
930
- def get_recommendations(keywords):
931
- """
932
- Fetches related video recommendations based on the provided keywords.
933
- This function can be expanded with a proper API or custom logic.
934
- """
935
- # Placeholder for fetching recommendations based on keywords
936
- return f"Recommendations for: {keywords}" # Dummy return for now
937
-
938
-
939
-
940
  def get_recommendations(keywords, max_results=5):
941
  if not keywords:
942
  return "Please provide search keywords"
@@ -952,18 +184,91 @@ def get_recommendations(keywords, max_results=5):
952
  "key": YOUTUBE_API_KEY
953
  }
954
  ).json()
955
-
956
  results = []
957
  for item in response.get("items", []):
958
  title = item["snippet"]["title"]
959
  channel = item["snippet"]["channelTitle"]
960
  video_id = item["id"]["videoId"]
961
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
962
-
963
  return "\n".join(results) if results else "No recommendations found"
964
  except Exception as e:
965
  return f"Error: {str(e)}"
966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
  # Gradio Interface
968
  with gr.Blocks(theme=gr.themes.Soft()) as app:
969
  # Login Page
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
 
10
  subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
  subprocess.check_call(["pip", "install", "genai"])
12
  subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
+ subprocess.check_call(["pip", "install", "google-api-python-client>=2.0.0"])
14
  import transformers
15
  import torch
16
  import os
 
25
  import gradio as gr
26
  from youtube_transcript_api import YouTubeTranscriptApi
27
  import google.generativeai as genai
28
+ from googleapiclient.discovery import build
29
  import requests
30
  from textblob import TextBlob
31
  import re
32
  #from google.cloud import generativeai
33
+ from googleapiclient.discovery import build
34
  from huggingface_hub import login
35
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
36
  def install_missing_packages():
 
45
  "python-dotenv":">=1.0.0",
46
  "genai":None,
47
  "google-generativeai": None,
48
+ "google-cloud-aiplatform":"==1.34.0",
49
+ "google-api-python-client": ">=2.0.0"
50
  }
51
 
52
 
 
127
  return match.group(1)
128
  return None
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  from textblob import TextBlob
131
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
132
  import re
133
  from collections import Counter
134
  from googleapiclient.discovery import build
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
 
137
  def extract_video_id(url):
 
 
 
138
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
139
  return match.group(1) if match else None
140
 
 
141
  def get_video_metadata(video_id):
 
 
 
142
  try:
 
143
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
144
  request = youtube.videos().list(part="snippet", id=video_id)
145
  response = request.execute()
 
155
  except Exception as e:
156
  return {"title": "Error fetching metadata", "description": str(e)}
157
 
158
+ def clean_text_for_analysis(text):
159
+ return " ".join(text.split())
160
 
161
  def extract_subtitle_info(text):
 
 
 
 
162
  try:
 
163
  sentences = text.split(". ")
 
 
164
  words = text.split()
165
  common_words = Counter(words).most_common(10)
166
  key_topics = ", ".join([word for word, count in common_words])
 
 
167
  info = f"Key topics discussed: {key_topics}. \nNumber of sentences: {len(sentences)}. \nTotal words: {len(words)}."
 
168
  return info
169
  except Exception as e:
170
  return f"Error extracting subtitle information: {str(e)}"
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def get_recommendations(keywords, max_results=5):
173
  if not keywords:
174
  return "Please provide search keywords"
 
184
  "key": YOUTUBE_API_KEY
185
  }
186
  ).json()
187
+
188
  results = []
189
  for item in response.get("items", []):
190
  title = item["snippet"]["title"]
191
  channel = item["snippet"]["channelTitle"]
192
  video_id = item["id"]["videoId"]
193
  results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
194
+
195
  return "\n".join(results) if results else "No recommendations found"
196
  except Exception as e:
197
  return f"Error: {str(e)}"
198
 
199
+ def process_youtube_video(url, keywords):
200
+ try:
201
+ thumbnail = None
202
+ summary = "No transcript available"
203
+ sentiment_label = "N/A"
204
+ recommendations = ""
205
+
206
+ video_id = extract_video_id(url)
207
+ if not video_id:
208
+ return None, "Invalid YouTube URL", "N/A", "", ""
209
+
210
+ thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
211
+
212
+ try:
213
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
214
+ transcript = None
215
+ try:
216
+ transcript = transcript_list.find_transcript(['en'])
217
+ except:
218
+ transcript = transcript_list.find_generated_transcript(['en'])
219
+
220
+ text = " ".join([t['text'] for t in transcript.fetch()])
221
+ if not text.strip():
222
+ raise ValueError("Transcript is empty")
223
+
224
+ cleaned_text = clean_text_for_analysis(text)
225
+
226
+ sentiment = TextBlob(cleaned_text).sentiment
227
+ sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
228
+
229
+ summary = f"Summary: {cleaned_text[:400]}..."
230
+
231
+ except (TranscriptsDisabled, NoTranscriptFound):
232
+ metadata = get_video_metadata(video_id)
233
+ summary = metadata.get("description", "No subtitles available")
234
+ sentiment_label = "N/A"
235
+
236
+ if keywords.strip():
237
+ recommendations = get_recommendations(keywords)
238
+
239
+ return thumbnail, summary, sentiment_label, recommendations
240
+
241
+ except Exception as e:
242
+ return None, f"Error: {str(e)}", "N/A", ""
243
+
244
+
245
+ # def get_recommendations(keywords, max_results=5):
246
+ # if not keywords:
247
+ # return "Please provide search keywords"
248
+ # try:
249
+ # response = requests.get(
250
+ # "https://www.googleapis.com/youtube/v3/search",
251
+ # params={
252
+ # "part": "snippet",
253
+ # "q": f"educational {keywords}",
254
+ # "type": "video",
255
+ # "maxResults": max_results,
256
+ # "relevanceLanguage": "en",
257
+ # "key": YOUTUBE_API_KEY
258
+ # }
259
+ # ).json()
260
+
261
+ # results = []
262
+ # for item in response.get("items", []):
263
+ # title = item["snippet"]["title"]
264
+ # channel = item["snippet"]["channelTitle"]
265
+ # video_id = item["id"]["videoId"]
266
+ # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
267
+
268
+ # return "\n".join(results) if results else "No recommendations found"
269
+ # except Exception as e:
270
+ # return f"Error: {str(e)}"
271
+
272
  # Gradio Interface
273
  with gr.Blocks(theme=gr.themes.Soft()) as app:
274
  # Login Page