Sayiqa7 commited on
Commit
7d1dcdc
·
verified ·
1 Parent(s): 7a199b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -399
app.py CHANGED
@@ -1,359 +1,3 @@
1
- # import subprocess
2
- # subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
- # subprocess.check_call(["pip", "install", "torch>=1.7.1"])
4
- # subprocess.check_call(["pip", "install", "youtube_transcript_api>=0.6.3"])
5
- # subprocess.check_call(["pip", "install", "pytube"])
6
- # subprocess.check_call(["pip", "install", "huggingface_hub>=0.19.0"])
7
- # subprocess.check_call(["pip", "install", "PyPDF2>=3.0.1"])
8
- # subprocess.check_call(["pip", "install", "google-generativeai"])
9
- # subprocess.check_call(["pip", "install", "textblob>=0.17.1"])
10
- # subprocess.check_call(["pip", "install", "python-dotenv>=1.0.0"])
11
- # subprocess.check_call(["pip", "install", "genai"])
12
- # subprocess.check_call(["pip", "install", "google-cloud-aiplatform==1.34.0"])
13
- # subprocess.check_call(["pip", "install", "google-api-python-client>=2.0.0"])
14
- # import transformers
15
- # import torch
16
- # import os
17
- # import youtube_transcript_api
18
- # import pytube
19
- # import gradio
20
- # import PyPDF2
21
- # import pathlib
22
- # import pandas
23
- # import numpy
24
- # import textblob
25
- # import gradio as gr
26
- # from youtube_transcript_api import YouTubeTranscriptApi
27
- # import google.generativeai as genai
28
- # from googleapiclient.discovery import build
29
- # import requests
30
- # from textblob import TextBlob
31
- # import re
32
- # #from google.cloud import generativeai
33
- # from googleapiclient.discovery import build
34
- # from huggingface_hub import login
35
- # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
36
- # def install_missing_packages():
37
- # required_packages = {
38
- # "torch":">=1.11.0",
39
- # "transformers":">=4.34.0",
40
- # "youtube_transcript_api" :">=0.6.3" ,
41
- # "pytube":None,
42
- # "huggingface_hub": ">=0.19.0",
43
- # "PyPDF2": ">=3.0.1",
44
- # "textblob":">=0.17.1",
45
- # "python-dotenv":">=1.0.0",
46
- # "genai":None,
47
- # "google-generativeai": None,
48
- # "google-cloud-aiplatform":"==1.34.0",
49
- # "google-api-python-client": ">=2.0.0"
50
- # }
51
-
52
-
53
- # for package, version in required_packages.items():
54
- # try:
55
- # __import__(package)
56
- # except ImportError:
57
- # package_name = f"{package}{version}" if version else package
58
- # subprocess.check_call(["pip", "install", package_name])
59
-
60
- # install_missing_packages()
61
- # # Configuration
62
-
63
- # hf_token = os.getenv("HF_TOKEN")
64
- # if hf_token:
65
- # login(hf_token)
66
- # else:
67
- # raise ValueError("HF_TOKEN environment variable not set.")
68
-
69
-
70
- # YOUTUBE_API_KEY = "AIzaSyD_SDF4lC3vpHVAMnBOcN2ZCTz7dRjUc98" # Replace with your YouTube API Key
71
-
72
- # USER_CREDENTIALS = {"admin": "password"} # Example user credentials
73
-
74
- # import os
75
- # from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
76
-
77
- # # Use environment variables
78
- # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
79
- # YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
80
-
81
- # if not GOOGLE_API_KEY or not YOUTUBE_API_KEY:
82
- # raise ValueError("Please set GOOGLE_API_KEY and YOUTUBE_API_KEY environment variables")
83
-
84
- # genai.configure(api_key=GOOGLE_API_KEY)
85
-
86
- # # Database
87
- # students_data = [
88
- # (1, "Alice", "A", "Computer Science"),
89
- # (2, "Aliaa", "B", "Mathematics"),
90
- # (3, "Charlie", "A", "Machine Learning"),
91
- # (4, "Daan", "A", "Physics"),
92
- # (5, "Jhon", "C", "Math"),
93
- # (6, "Emma", "A+", "Computer Science")
94
- # ]
95
-
96
- # teachers_data = [
97
- # (1, "Dr. Smith", "Math", "MS Mathematics"),
98
- # (2, "Ms. Johnson", "Science", "MSc Physics"),
99
- # (3, "Ms. Jack", "Artificial Intelligence Engineer", "MSc AI"),
100
- # (4, "Ms. Evelyn", "Computer Science", "MSc Computer Science"),
101
- # ]
102
-
103
- # courses_data = [
104
- # (1, "Algebra", "Dr. Smith", "Advanced"),
105
- # (2, "Biology", "Ms. Mia", "Intermediate"),
106
- # (3, "Machine Learning", "Ms. Jack", "Intermediate"),
107
- # (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
108
- # (5, "Mathematics", "Ms. Smith", "Intermediate")
109
- # ]
110
-
111
- # def extract_video_id(url):
112
- # match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
113
- # return match.group(1) if match else None
114
-
115
- # def get_video_metadata(video_id):
116
- # try:
117
- # youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
118
- # request = youtube.videos().list(part="snippet", id=video_id)
119
- # response = request.execute()
120
-
121
- # if "items" in response and len(response["items"]) > 0:
122
- # snippet = response["items"][0]["snippet"]
123
- # return {
124
- # "title": snippet.get("title", "No title available"),
125
- # "description": snippet.get("description", "No description available"),
126
- # }
127
- # return {}
128
-
129
- # except Exception as e:
130
- # return {"title": "Error fetching metadata", "description": str(e)}
131
-
132
- # def clean_text_for_analysis(text):
133
- # return " ".join(text.split())
134
-
135
- # def get_recommendations(keywords, max_results=5):
136
- # if not keywords:
137
- # return "Please provide search keywords"
138
- # try:
139
- # response = requests.get(
140
- # "https://www.googleapis.com/youtube/v3/search",
141
- # params={
142
- # "part": "snippet",
143
- # "q": f"educational {keywords}",
144
- # "type": "video",
145
- # "maxResults": max_results,
146
- # "relevanceLanguage": "en",
147
- # "key": YOUTUBE_API_KEY
148
- # }
149
- # ).json()
150
-
151
- # results = []
152
- # for item in response.get("items", []):
153
- # title = item["snippet"]["title"]
154
- # channel = item["snippet"]["channelTitle"]
155
- # video_id = item["id"]["videoId"]
156
- # results.append(f"📺 {title}\n👤 {channel}\n🔗 https://youtube.com/watch?v={video_id}\n")
157
-
158
- # return "\n".join(results) if results else "No recommendations found"
159
- # except Exception as e:
160
- # return f"Error: {str(e)}"
161
-
162
- # def process_youtube_video(url):
163
- # try:
164
- # thumbnail = None
165
- # summary = "No transcript available"
166
- # sentiment_label = "N/A"
167
-
168
- # video_id = extract_video_id(url)
169
- # if not video_id:
170
- # return None, "Invalid YouTube URL", "N/A"
171
-
172
- # thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
173
-
174
- # try:
175
- # transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
176
- # transcript = None
177
- # try:
178
- # transcript = transcript_list.find_transcript(['en'])
179
- # except:
180
- # transcript = transcript_list.find_generated_transcript(['en'])
181
-
182
- # text = " ".join([t['text'] for t in transcript.fetch()])
183
- # if not text.strip():
184
- # raise ValueError("Transcript is empty")
185
-
186
- # cleaned_text = clean_text_for_analysis(text)
187
-
188
- # sentiment = TextBlob(cleaned_text).sentiment
189
- # sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
190
-
191
- # summary = f"Summary: {cleaned_text[:400]}..."
192
-
193
- # except (TranscriptsDisabled, NoTranscriptFound):
194
- # metadata = get_video_metadata(video_id)
195
- # summary = metadata.get("description", "No subtitles available")
196
- # sentiment_label = "N/A"
197
-
198
- # return thumbnail, summary, sentiment_label
199
-
200
- # except Exception as e:
201
- # return None, f"Error: {str(e)}", "N/A"
202
-
203
- # # Gradio Interface
204
- # with gr.Blocks(theme=gr.themes.Soft()) as app:
205
- # # Login Page
206
- # with gr.Group() as login_page:
207
- # gr.Markdown("# 🎓 Educational Learning Management System")
208
- # username = gr.Textbox(label="Username")
209
- # password = gr.Textbox(label="Password", type="password")
210
- # login_btn = gr.Button("Login", variant="primary")
211
- # login_msg = gr.Markdown()
212
-
213
- # # Main Interface
214
- # with gr.Group(visible=False) as main_page:
215
- # with gr.Row():
216
- # with gr.Column(scale=1):
217
- # gr.Markdown("### 📋 Navigation")
218
- # nav_dashboard = gr.Button("📊 Dashboard", variant="primary")
219
- # nav_students = gr.Button("👥 Students")
220
- # nav_teachers = gr.Button("👨‍🏫 Teachers")
221
- # nav_courses = gr.Button("📚 Courses")
222
- # nav_youtube = gr.Button("🎥 YouTube Tool")
223
- # logout_btn = gr.Button("🚪 Logout", variant="stop")
224
-
225
- # with gr.Column(scale=3):
226
- # # Dashboard Content
227
- # dashboard_page = gr.Group()
228
- # with dashboard_page:
229
- # gr.Markdown("## 📊 Dashboard")
230
- # gr.Markdown(f"""
231
- # ### System Overview
232
- # - 👥 Total Students: {len(students_data)}
233
- # - 👨‍🏫 Total Teachers: {len(teachers_data)}
234
- # - 📚 Total Courses: {len(courses_data)}
235
- # ### Quick Actions
236
- # - View student performance
237
- # - Access course materials
238
- # - Generate learning insights
239
- # """)
240
-
241
- # # Students Content
242
- # students_page = gr.Group(visible=False)
243
- # with students_page:
244
- # gr.Markdown("## 👥 Students")
245
- # gr.DataFrame(
246
- # value=students_data,
247
- # headers=["ID", "Name", "Grade", "Program"]
248
- # )
249
-
250
- # # Teachers Content
251
- # teachers_page = gr.Group(visible=False)
252
- # with teachers_page:
253
- # gr.Markdown("## 👨‍🏫 Teachers")
254
- # gr.DataFrame(
255
- # value=teachers_data,
256
- # headers=["ID", "Name", "Subject", "Qualification"]
257
- # )
258
-
259
- # # Courses Content
260
- # courses_page = gr.Group(visible=False)
261
- # with courses_page:
262
- # gr.Markdown("## 📚 Courses")
263
- # gr.DataFrame(
264
- # value=courses_data,
265
- # headers=["ID", "Name", "Instructor", "Level"]
266
- # )
267
-
268
- # # YouTube Tool Content
269
- # youtube_page = gr.Group(visible=False)
270
- # with youtube_page:
271
- # gr.Markdown("## Agent for YouTube Content Exploration")
272
- # with gr.Row():
273
- # with gr.Column(scale=2):
274
- # video_url = gr.Textbox(
275
- # label="YouTube URL",
276
- # placeholder="https://youtube.com/watch?v=..."
277
- # )
278
- # keywords = gr.Textbox(
279
- # label="Keywords for Recommendations",
280
- # placeholder="e.g., python programming, machine learning"
281
- # )
282
- # analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
283
- # recommend_btn = gr.Button("🔎 Get Recommendations", variant="primary")
284
-
285
- # with gr.Column(scale=1):
286
- # video_thumbnail = gr.Image(label="Video Preview")
287
-
288
- # with gr.Row():
289
- # with gr.Column():
290
- # summary = gr.Textbox(label="📝 Summary", lines=8)
291
- # sentiment = gr.Textbox(label="😊 Content Sentiment")
292
- # with gr.Column():
293
- # recommendations = gr.Textbox(label="🎯 Related Videos", lines=10)
294
-
295
- # def login_check(user, pwd):
296
- # if USER_CREDENTIALS.get(user) == pwd:
297
- # return {
298
- # login_page: gr.update(visible=False),
299
- # main_page: gr.update(visible=True),
300
- # login_msg: ""
301
- # }
302
- # return {
303
- # login_page: gr.update(visible=True),
304
- # main_page: gr.update(visible=False),
305
- # login_msg: "❌ Invalid credentials"
306
- # }
307
-
308
- # def show_page(page_name):
309
- # updates = {
310
- # dashboard_page: gr.update(visible=False),
311
- # students_page: gr.update(visible=False),
312
- # teachers_page: gr.update(visible=False),
313
- # courses_page: gr.update(visible=False),
314
- # youtube_page: gr.update(visible=False)
315
- # }
316
- # updates[page_name] = gr.update(visible=True)
317
- # return updates
318
-
319
- # # Event Handlers
320
- # login_btn.click(
321
- # login_check,
322
- # inputs=[username, password],
323
- # outputs=[login_page, main_page, login_msg]
324
- # )
325
-
326
- # nav_dashboard.click(lambda: show_page(dashboard_page), outputs=list(show_page(dashboard_page).keys()))
327
- # nav_students.click(lambda: show_page(students_page), outputs=list(show_page(students_page).keys()))
328
- # nav_teachers.click(lambda: show_page(teachers_page), outputs=list(show_page(teachers_page).keys()))
329
- # nav_courses.click(lambda: show_page(courses_page), outputs=list(show_page(courses_page).keys()))
330
- # nav_youtube.click(lambda: show_page(youtube_page), outputs=list(show_page(youtube_page).keys()))
331
-
332
- # analyze_btn.click(
333
- # process_youtube_video,
334
- # inputs=[video_url],
335
- # outputs=[video_thumbnail, summary, sentiment]
336
- # )
337
-
338
- # recommend_btn.click(
339
- # get_recommendations,
340
- # inputs=[keywords],
341
- # outputs=[recommendations]
342
- # )
343
-
344
- # logout_btn.click(
345
- # lambda: {
346
- # login_page: gr.update(visible=True),
347
- # main_page: gr.update(visible=False)
348
- # },
349
- # outputs=[login_page, main_page]
350
- # )
351
-
352
- # if __name__ == "__main__":
353
- # app.launch()
354
-
355
-
356
- ##############################
357
  import subprocess
358
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
359
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
@@ -463,14 +107,11 @@ courses_data = [
463
  (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
464
  (5, "Mathematics", "Ms. Smith", "Intermediate")
465
  ]
466
- from transformers import pipeline
467
  def extract_video_id(url):
468
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
469
  return match.group(1) if match else None
470
 
471
- def clean_text(text):
472
- return " ".join(text.split())
473
-
474
  def get_video_metadata(video_id):
475
  try:
476
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
@@ -484,23 +125,28 @@ def get_video_metadata(video_id):
484
  "description": snippet.get("description", "No description available"),
485
  }
486
  return {}
 
487
  except Exception as e:
488
  return {"title": "Error fetching metadata", "description": str(e)}
489
 
490
- def segment_transcript(transcript_text):
491
- """Segment transcript into sections like intro, body, and conclusion."""
492
- lines = transcript_text.split(". ")
493
- intro = ". ".join(lines[:3]) # First 3 lines for intro
494
- body = ". ".join(lines[3:-2]) # Middle lines for body
495
- conclusion = ". ".join(lines[-2:]) # Last 2 lines for conclusion
496
- return {"intro": intro, "body": body, "conclusion": conclusion}
497
-
498
- def summarize_text(text, summarizer):
499
- """Summarize text using the provided summarization model."""
500
- max_chunk_size = 512
501
- chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
502
- summaries = summarizer(chunks, max_length=150, min_length=40, do_sample=False)
503
- return " ".join(summary["summary_text"] for summary in summaries)
 
 
 
 
504
 
505
  def process_youtube_video(url):
506
  try:
@@ -510,40 +156,28 @@ def process_youtube_video(url):
510
 
511
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
512
 
513
- # Load summarization model
514
- summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
515
-
516
  try:
517
- # Fetch transcript
518
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
519
- transcript = transcript_list.find_transcript(['en']).fetch()
520
- transcript_text = " ".join([t['text'] for t in transcript])
521
- cleaned_text = clean_text(transcript_text)
522
-
523
- # Segment transcript into sections
524
- segments = segment_transcript(cleaned_text)
525
-
526
- # Summarize each section
527
- intro_summary = summarize_text(segments["intro"], summarizer)
528
- body_summary = summarize_text(segments["body"], summarizer)
529
- conclusion_summary = summarize_text(segments["conclusion"], summarizer)
530
 
531
- # Sentiment analysis
532
- sentiment = TextBlob(cleaned_text).sentiment
533
- sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
534
 
535
- detailed_summary = (
536
- f"### Introduction\n{intro_summary}\n\n"
537
- f"### Main Body\n{body_summary}\n\n"
538
- f"### Conclusion\n{conclusion_summary}"
539
- )
540
 
541
  except (TranscriptsDisabled, NoTranscriptFound):
542
  metadata = get_video_metadata(video_id)
543
- detailed_summary = metadata.get("description", "No subtitles available")
544
  sentiment_label = "N/A"
545
 
546
- return thumbnail, detailed_summary, sentiment_label
547
 
548
  except Exception as e:
549
  return None, f"Error: {str(e)}", "N/A"
@@ -700,3 +334,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
700
  if __name__ == "__main__":
701
  app.launch()
702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
  subprocess.check_call(["pip", "install", "transformers==4.34.0"])
3
  subprocess.check_call(["pip", "install", "torch>=1.7.1"])
 
107
  (4, "Computer Science", "Ms. Evelyn", "Intermediate"),
108
  (5, "Mathematics", "Ms. Smith", "Intermediate")
109
  ]
110
+
111
  def extract_video_id(url):
112
  match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
113
  return match.group(1) if match else None
114
 
 
 
 
115
  def get_video_metadata(video_id):
116
  try:
117
  youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
 
125
  "description": snippet.get("description", "No description available"),
126
  }
127
  return {}
128
+
129
  except Exception as e:
130
  return {"title": "Error fetching metadata", "description": str(e)}
131
 
132
+ def clean_text_for_analysis(text):
133
+ return " ".join(text.split())
134
+
135
+ def generate_summary(content):
136
+ return "\n".join([
137
+ "**Key Insights**",
138
+ "",
139
+ content[:400] + "..." if len(content) > 400 else content
140
+ ])
141
+
142
+ def analyze_sentiment(text):
143
+ sentiment = TextBlob(text).sentiment
144
+ sentiment_label = (
145
+ "Positive" if sentiment.polarity > 0 else
146
+ "Negative" if sentiment.polarity < 0 else
147
+ "Neutral"
148
+ )
149
+ return f"{sentiment_label} ({sentiment.polarity:.2f})"
150
 
151
  def process_youtube_video(url):
152
  try:
 
156
 
157
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
158
 
 
 
 
159
  try:
 
160
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
161
+ transcript = None
162
+ try:
163
+ transcript = transcript_list.find_transcript(['en'])
164
+ except:
165
+ transcript = transcript_list.find_generated_transcript(['en'])
 
 
 
 
 
 
166
 
167
+ text = " ".join([t['text'] for t in transcript.fetch()])
168
+ if not text.strip():
169
+ raise ValueError("Transcript is empty")
170
 
171
+ cleaned_text = clean_text_for_analysis(text)
172
+ summary = generate_summary(cleaned_text)
173
+ sentiment_label = analyze_sentiment(cleaned_text)
 
 
174
 
175
  except (TranscriptsDisabled, NoTranscriptFound):
176
  metadata = get_video_metadata(video_id)
177
+ summary = generate_summary(metadata.get("description", "No subtitles available"))
178
  sentiment_label = "N/A"
179
 
180
+ return thumbnail, summary, sentiment_label
181
 
182
  except Exception as e:
183
  return None, f"Error: {str(e)}", "N/A"
 
334
  if __name__ == "__main__":
335
  app.launch()
336
 
337
+
338
+ ##############################
339
+
340
+