ilsa15 commited on
Commit
8592e17
ยท
verified ยท
1 Parent(s): 122e63b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -113
app.py CHANGED
@@ -1,95 +1,23 @@
1
 
2
 
3
- import nest_asyncio
4
- from youtube_transcript_api import YouTubeTranscriptApi
5
- import streamlit as st
6
- import os
7
- from groq import Groq
8
-
9
- nest_asyncio.apply()
10
-
11
- # --- CONFIGURATION ---
12
- YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
- channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
14
-
15
- # Initialize Groq client once
16
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
-
18
- # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
- def get_latest_video_ids(channel_id, max_results=5):
20
- import requests
21
- url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
- response = requests.get(url)
23
- videos = response.json().get('items', [])
24
- return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
25
-
26
- # --- FUNCTION: Get video transcripts ---
27
- def get_video_transcripts(video_ids):
28
- all_transcripts = []
29
- for vid in video_ids:
30
- try:
31
- transcript = YouTubeTranscriptApi.get_transcript(vid)
32
- text = " ".join([t['text'] for t in transcript])
33
- all_transcripts.append(text)
34
- except:
35
- continue
36
- return all_transcripts
37
-
38
- # --- FUNCTION: Ask Groq API using official client ---
39
- def ask_groq(context, question):
40
- messages = [
41
- {"role": "system", "content": "You are a helpful assistant."},
42
- {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
- ]
44
- chat_completion = groq_client.chat.completions.create(
45
- model="llama-3.3-70b-versatile", # Or the model you have access to
46
- messages=messages,
47
- )
48
- return chat_completion.choices[0].message.content.strip()
49
-
50
- # --- STREAMLIT APP ---
51
- def main():
52
- st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
- st.title("๐ŸŽ“ EduBot for @icodeguru0")
54
- st.markdown("Ask anything based on the channelโ€™s recent videos.")
55
-
56
- question = st.text_input("๐Ÿ’ฌ Ask your question here:")
57
- if question:
58
- with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
59
- video_ids = get_latest_video_ids(channel_id)
60
- transcripts = get_video_transcripts(video_ids)
61
- full_context = "\n\n".join(transcripts)
62
- with st.spinner("๐Ÿง  Thinking..."):
63
- answer = ask_groq(full_context, question)
64
- st.success(answer)
65
-
66
- st.markdown("---")
67
- st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
-
69
- if __name__ == "__main__":
70
- main()
71
-
72
-
73
  # import nest_asyncio
74
  # from youtube_transcript_api import YouTubeTranscriptApi
75
  # import streamlit as st
76
  # import os
77
  # from groq import Groq
78
- # import requests
79
- # from bs4 import BeautifulSoup
80
 
81
  # nest_asyncio.apply()
82
 
83
  # # --- CONFIGURATION ---
84
  # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
85
  # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
86
- # BASE_URL = "https://icode.guru"
87
 
88
  # # Initialize Groq client once
89
  # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
90
 
91
  # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
92
  # def get_latest_video_ids(channel_id, max_results=5):
 
93
  # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
94
  # response = requests.get(url)
95
  # videos = response.json().get('items', [])
@@ -107,38 +35,11 @@ if __name__ == "__main__":
107
  # continue
108
  # return all_transcripts
109
 
110
- # # --- NEW FUNCTION: Scrape icode.guru ---
111
- # def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
112
- # visited = set()
113
- # blocks = []
114
-
115
- # def crawl(url):
116
- # if url in visited or len(visited) >= max_pages:
117
- # return
118
- # visited.add(url)
119
- # try:
120
- # res = requests.get(url, timeout=10)
121
- # soup = BeautifulSoup(res.content, "html.parser")
122
- # page_text = soup.get_text(separator=" ", strip=True)
123
- # if len(page_text) > 100:
124
- # blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
125
- # for link in soup.find_all("a", href=True):
126
- # href = link['href']
127
- # if href.startswith("/"):
128
- # href = base_url + href
129
- # if href.startswith(base_url):
130
- # crawl(href)
131
- # except:
132
- # pass
133
-
134
- # crawl(base_url)
135
- # return blocks
136
-
137
  # # --- FUNCTION: Ask Groq API using official client ---
138
  # def ask_groq(context, question):
139
  # messages = [
140
- # {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
141
- # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
142
  # ]
143
  # chat_completion = groq_client.chat.completions.create(
144
  # model="llama-3.3-70b-versatile", # Or the model you have access to
@@ -148,29 +49,254 @@ if __name__ == "__main__":
148
 
149
  # # --- STREAMLIT APP ---
150
  # def main():
151
- # st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
152
  # st.title("๐ŸŽ“ EduBot for @icodeguru0")
153
- # st.markdown("Ask anything based on the channelโ€™s recent videos and website content from [icode.guru](https://icode.guru).")
154
 
155
  # question = st.text_input("๐Ÿ’ฌ Ask your question here:")
156
  # if question:
157
  # with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
158
  # video_ids = get_latest_video_ids(channel_id)
159
  # transcripts = get_video_transcripts(video_ids)
160
- # yt_context = "\n\n".join(transcripts)
161
-
162
- # with st.spinner("๐ŸŒ Scraping icode.guru..."):
163
- # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
164
- # site_context = "\n\n".join(site_blocks)
165
-
166
- # full_context = yt_context + "\n\n" + site_context
167
-
168
  # with st.spinner("๐Ÿง  Thinking..."):
169
  # answer = ask_groq(full_context, question)
170
  # st.success(answer)
171
 
172
  # st.markdown("---")
173
- # st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
174
 
175
  # if __name__ == "__main__":
176
  # main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # import nest_asyncio
4
  # from youtube_transcript_api import YouTubeTranscriptApi
5
  # import streamlit as st
6
  # import os
7
  # from groq import Groq
 
 
8
 
9
  # nest_asyncio.apply()
10
 
11
  # # --- CONFIGURATION ---
12
  # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
13
  # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
 
14
 
15
  # # Initialize Groq client once
16
  # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
 
18
  # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
19
  # def get_latest_video_ids(channel_id, max_results=5):
20
+ # import requests
21
  # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
22
  # response = requests.get(url)
23
  # videos = response.json().get('items', [])
 
35
  # continue
36
  # return all_transcripts
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # # --- FUNCTION: Ask Groq API using official client ---
39
  # def ask_groq(context, question):
40
  # messages = [
41
+ # {"role": "system", "content": "You are a helpful assistant."},
42
+ # {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
43
  # ]
44
  # chat_completion = groq_client.chat.completions.create(
45
  # model="llama-3.3-70b-versatile", # Or the model you have access to
 
49
 
50
  # # --- STREAMLIT APP ---
51
  # def main():
52
+ # st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
53
  # st.title("๐ŸŽ“ EduBot for @icodeguru0")
54
+ # st.markdown("Ask anything based on the channelโ€™s recent videos.")
55
 
56
  # question = st.text_input("๐Ÿ’ฌ Ask your question here:")
57
  # if question:
58
  # with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
59
  # video_ids = get_latest_video_ids(channel_id)
60
  # transcripts = get_video_transcripts(video_ids)
61
+ # full_context = "\n\n".join(transcripts)
 
 
 
 
 
 
 
62
  # with st.spinner("๐Ÿง  Thinking..."):
63
  # answer = ask_groq(full_context, question)
64
  # st.success(answer)
65
 
66
  # st.markdown("---")
67
+ # st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
68
 
69
  # if __name__ == "__main__":
70
  # main()
71
+
72
+
73
+ # # import nest_asyncio
74
+ # # from youtube_transcript_api import YouTubeTranscriptApi
75
+ # # import streamlit as st
76
+ # # import os
77
+ # # from groq import Groq
78
+ # # import requests
79
+ # # from bs4 import BeautifulSoup
80
+
81
+ # # nest_asyncio.apply()
82
+
83
+ # # # --- CONFIGURATION ---
84
+ # # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
85
+ # # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
86
+ # # BASE_URL = "https://icode.guru"
87
+
88
+ # # # Initialize Groq client once
89
+ # # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
90
+
91
+ # # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
92
+ # # def get_latest_video_ids(channel_id, max_results=5):
93
+ # # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
94
+ # # response = requests.get(url)
95
+ # # videos = response.json().get('items', [])
96
+ # # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
97
+
98
+ # # # --- FUNCTION: Get video transcripts ---
99
+ # # def get_video_transcripts(video_ids):
100
+ # # all_transcripts = []
101
+ # # for vid in video_ids:
102
+ # # try:
103
+ # # transcript = YouTubeTranscriptApi.get_transcript(vid)
104
+ # # text = " ".join([t['text'] for t in transcript])
105
+ # # all_transcripts.append(text)
106
+ # # except:
107
+ # # continue
108
+ # # return all_transcripts
109
+
110
+ # # # --- NEW FUNCTION: Scrape icode.guru ---
111
+ # # def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
112
+ # # visited = set()
113
+ # # blocks = []
114
+
115
+ # # def crawl(url):
116
+ # # if url in visited or len(visited) >= max_pages:
117
+ # # return
118
+ # # visited.add(url)
119
+ # # try:
120
+ # # res = requests.get(url, timeout=10)
121
+ # # soup = BeautifulSoup(res.content, "html.parser")
122
+ # # page_text = soup.get_text(separator=" ", strip=True)
123
+ # # if len(page_text) > 100:
124
+ # # blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
125
+ # # for link in soup.find_all("a", href=True):
126
+ # # href = link['href']
127
+ # # if href.startswith("/"):
128
+ # # href = base_url + href
129
+ # # if href.startswith(base_url):
130
+ # # crawl(href)
131
+ # # except:
132
+ # # pass
133
+
134
+ # # crawl(base_url)
135
+ # # return blocks
136
+
137
+ # # # --- FUNCTION: Ask Groq API using official client ---
138
+ # # def ask_groq(context, question):
139
+ # # messages = [
140
+ # # {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
141
+ # # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
142
+ # # ]
143
+ # # chat_completion = groq_client.chat.completions.create(
144
+ # # model="llama-3.3-70b-versatile", # Or the model you have access to
145
+ # # messages=messages,
146
+ # # )
147
+ # # return chat_completion.choices[0].message.content.strip()
148
+
149
+ # # # --- STREAMLIT APP ---
150
+ # # def main():
151
+ # # st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
152
+ # # st.title("๐ŸŽ“ EduBot for @icodeguru0")
153
+ # # st.markdown("Ask anything based on the channelโ€™s recent videos and website content from [icode.guru](https://icode.guru).")
154
+
155
+ # # question = st.text_input("๐Ÿ’ฌ Ask your question here:")
156
+ # # if question:
157
+ # # with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
158
+ # # video_ids = get_latest_video_ids(channel_id)
159
+ # # transcripts = get_video_transcripts(video_ids)
160
+ # # yt_context = "\n\n".join(transcripts)
161
+
162
+ # # with st.spinner("๐ŸŒ Scraping icode.guru..."):
163
+ # # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
164
+ # # site_context = "\n\n".join(site_blocks)
165
+
166
+ # # full_context = yt_context + "\n\n" + site_context
167
+
168
+ # # with st.spinner("๐Ÿง  Thinking..."):
169
+ # # answer = ask_groq(full_context, question)
170
+ # # st.success(answer)
171
+
172
+ # # st.markdown("---")
173
+ # # st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
174
+
175
+ # # if __name__ == "__main__":
176
+ # # main()
177
+
178
+
179
+
180
+ import nest_asyncio
181
+ import streamlit as st
182
+ import os
183
+ import requests
184
+ from youtube_transcript_api import YouTubeTranscriptApi
185
+ from groq import Groq
186
+ from bs4 import BeautifulSoup
187
+
188
+ nest_asyncio.apply()
189
+
190
+ # --- CONFIGURATION ---
191
+ YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
192
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
193
+ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
194
+ BASE_URL = "https://icode.guru"
195
+
196
+ groq_client = Groq(api_key=GROQ_API_KEY)
197
+
198
+ # --- Fetch recent video IDs from YouTube channel ---
199
+ def get_latest_video_ids(channel_id, max_results=5):
200
+ url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
201
+ response = requests.get(url)
202
+ videos = response.json().get('items', [])
203
+ return [(v['id']['videoId'], v['snippet']['title']) for v in videos if v['id']['kind'] == 'youtube#video']
204
+
205
+ # --- Get video transcripts ---
206
+ def get_video_transcripts(video_info):
207
+ results = []
208
+ for vid, title in video_info:
209
+ try:
210
+ transcript = YouTubeTranscriptApi.get_transcript(vid)
211
+ text = " ".join([t['text'] for t in transcript])
212
+ video_link = f"https://www.youtube.com/watch?v={vid}"
213
+ results.append({
214
+ "video_id": vid,
215
+ "title": title,
216
+ "link": video_link,
217
+ "transcript": text
218
+ })
219
+ except Exception as e:
220
+ continue
221
+ return results
222
+
223
+ # --- Scrape icode.guru ---
224
+ def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
225
+ visited = set()
226
+ blocks = []
227
+
228
+ def crawl(url):
229
+ if url in visited or len(visited) >= max_pages:
230
+ return
231
+ visited.add(url)
232
+ try:
233
+ res = requests.get(url, timeout=10)
234
+ soup = BeautifulSoup(res.content, "html.parser")
235
+ page_text = soup.get_text(separator=" ", strip=True)
236
+ if len(page_text) > 100:
237
+ blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
238
+ for link in soup.find_all("a", href=True):
239
+ href = link['href']
240
+ if href.startswith("/"):
241
+ href = base_url + href
242
+ if href.startswith(base_url):
243
+ crawl(href)
244
+ except:
245
+ pass
246
+
247
+ crawl(base_url)
248
+ return blocks
249
+
250
+ # --- Ask Groq ---
251
+ def ask_groq(context, question):
252
+ messages = [
253
+ {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
254
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
255
+ ]
256
+ chat_completion = groq_client.chat.completions.create(
257
+ model="llama-3-70b-8192",
258
+ messages=messages,
259
+ )
260
+ return chat_completion.choices[0].message.content.strip()
261
+
262
+ # --- STREAMLIT APP ---
263
+ def main():
264
+ st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
265
+ st.title("๐ŸŽ“ EduBot for @icodeguru0")
266
+ st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
267
+
268
+ question = st.text_input("๐Ÿ’ฌ Ask your question:")
269
+ if question:
270
+ with st.spinner("๐Ÿ“บ Fetching YouTube videos..."):
271
+ video_info = get_latest_video_ids(channel_id, max_results=5)
272
+ transcripts = get_video_transcripts(video_info)
273
+
274
+ yt_context = ""
275
+ relevant_links = []
276
+ for vid in transcripts:
277
+ yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
278
+ if question.lower() in vid['transcript'].lower():
279
+ relevant_links.append(vid['link'])
280
+
281
+ with st.spinner("๐ŸŒ Scraping icode.guru..."):
282
+ site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
283
+ site_context = "\n\n".join(site_blocks)
284
+
285
+ full_context = yt_context + "\n\n" + site_context
286
+
287
+ with st.spinner("๐Ÿง  Thinking..."):
288
+ answer = ask_groq(full_context, question)
289
+
290
+ st.success(answer)
291
+
292
+ if relevant_links:
293
+ st.markdown("### ๐Ÿ”— Related YouTube Links")
294
+ for link in relevant_links:
295
+ st.markdown(f"- [Watch Video]({link})")
296
+
297
+ st.markdown("---")
298
+ st.caption("Powered by YouTube, iCodeGuru, and Groq")
299
+
300
+ if __name__ == "__main__":
301
+ main()
302
+