ilsa15 commited on
Commit
ca748ce
ยท
verified ยท
1 Parent(s): 4fd9d2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -115
app.py CHANGED
@@ -176,139 +176,289 @@
176
  # # main()
177
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- import nest_asyncio
181
- import streamlit as st
182
  import os
 
 
183
  import requests
184
- from youtube_transcript_api import YouTubeTranscriptApi
185
- from groq import Groq
186
  from bs4 import BeautifulSoup
 
 
 
 
187
 
188
- nest_asyncio.apply()
189
-
190
- # --- CONFIGURATION ---
191
- YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
192
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
193
- channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
194
- BASE_URL = "https://icode.guru"
195
-
196
  groq_client = Groq(api_key=GROQ_API_KEY)
 
197
 
198
- # --- Fetch recent video IDs from YouTube channel ---
199
- def get_latest_video_ids(channel_id, max_results=5):
200
- url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
201
- response = requests.get(url)
202
- videos = response.json().get('items', [])
203
-
204
- valid_videos = []
205
- for v in videos:
206
- if v['id']['kind'] == 'youtube#video':
207
- title = v['snippet']['title']
208
- channel_title = v['snippet']['channelTitle']
209
- video_id = v['id']['videoId']
210
- if "icodeguru" in channel_title.lower(): # โœ… Extra validation
211
- valid_videos.append((video_id, title))
212
- return valid_videos
213
-
214
-
215
- # --- Get video transcripts ---
216
- def get_video_transcripts(video_info):
217
  results = []
218
- for vid, title in video_info:
219
- try:
220
- transcript = YouTubeTranscriptApi.get_transcript(vid)
221
- text = " ".join([t['text'] for t in transcript])
222
- video_link = f"https://www.youtube.com/watch?v={vid}"
223
- results.append({
224
- "video_id": vid,
225
- "title": title,
226
- "link": video_link,
227
- "transcript": text
228
- })
229
- except Exception as e:
230
- continue
231
  return results
232
 
233
- # --- Scrape icode.guru ---
234
- def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
235
- visited = set()
236
- blocks = []
237
-
238
- def crawl(url):
239
- if url in visited or len(visited) >= max_pages:
240
- return
241
- visited.add(url)
242
- try:
243
- res = requests.get(url, timeout=10)
244
- soup = BeautifulSoup(res.content, "html.parser")
245
- page_text = soup.get_text(separator=" ", strip=True)
246
- if len(page_text) > 100:
247
- blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
248
- for link in soup.find_all("a", href=True):
249
- href = link['href']
250
- if href.startswith("/"):
251
- href = base_url + href
252
- if href.startswith(base_url):
253
- crawl(href)
254
- except:
255
- pass
256
-
257
- crawl(base_url)
258
- return blocks
259
-
260
- # --- Ask Groq ---
261
- def ask_groq(context, question):
262
- messages = [
263
- {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
264
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
265
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  chat_completion = groq_client.chat.completions.create(
267
- model="llama-3.3-70b-versatile",
268
- messages=messages,
269
  )
270
- return chat_completion.choices[0].message.content.strip()
271
-
272
- #--- STREAMLIT APP ---
273
- def main():
274
- st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
275
- st.title("๐ŸŽ“ EduBot for @icodeguru0")
276
- st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
277
-
278
- question = st.text_input("๐Ÿ’ฌ Ask your question:")
279
- if question:
280
- with st.spinner("๐Ÿ“บ Fetching YouTube videos..."):
281
- video_info = get_latest_video_ids(channel_id, max_results=5)
282
- transcripts = get_video_transcripts(video_info)
283
-
284
- yt_context = ""
285
- relevant_links = []
286
- for vid in transcripts:
287
- yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
288
- if question.lower() in vid['transcript'].lower():
289
- relevant_links.append(vid['link'])
290
 
291
- with st.spinner("๐ŸŒ Scraping icode.guru..."):
292
- site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
293
- site_context = "\n\n".join(site_blocks)
294
 
295
- full_context = yt_context + "\n\n" + site_context
 
296
 
297
- with st.spinner("๐Ÿง  Thinking..."):
298
- answer = ask_groq(full_context, question)
299
 
300
- st.success(answer)
301
 
302
- if relevant_links:
303
- st.markdown("### ๐Ÿ”— Related YouTube Links")
304
- for link in relevant_links:
305
- st.markdown(f"- [Watch Video]({link})")
306
-
307
- st.markdown("---")
308
- st.caption("Powered by YouTube, iCodeGuru, and Groq")
 
 
309
 
310
  if __name__ == "__main__":
311
  main()
312
 
313
-
314
-
 
176
  # # main()
177
 
178
 
179
+ #(youtube+web)
180
+
181
+
182
+ # import nest_asyncio
183
+ # import streamlit as st
184
+ # import os
185
+ # import requests
186
+ # from youtube_transcript_api import YouTubeTranscriptApi
187
+ # from groq import Groq
188
+ # from bs4 import BeautifulSoup
189
+
190
+ # nest_asyncio.apply()
191
+
192
+ # # --- CONFIGURATION ---
193
+ # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
194
+ # GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
195
+ # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
196
+ # BASE_URL = "https://icode.guru"
197
+
198
+ # groq_client = Groq(api_key=GROQ_API_KEY)
199
+
200
+ # # --- Fetch recent video IDs from YouTube channel ---
201
+ # def get_latest_video_ids(channel_id, max_results=5):
202
+ # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
203
+ # response = requests.get(url)
204
+ # videos = response.json().get('items', [])
205
+
206
+ # valid_videos = []
207
+ # for v in videos:
208
+ # if v['id']['kind'] == 'youtube#video':
209
+ # title = v['snippet']['title']
210
+ # channel_title = v['snippet']['channelTitle']
211
+ # video_id = v['id']['videoId']
212
+ # if "icodeguru" in channel_title.lower(): # โœ… Extra validation
213
+ # valid_videos.append((video_id, title))
214
+ # return valid_videos
215
+
216
+
217
+ # # --- Get video transcripts ---
218
+ # def get_video_transcripts(video_info):
219
+ # results = []
220
+ # for vid, title in video_info:
221
+ # try:
222
+ # transcript = YouTubeTranscriptApi.get_transcript(vid)
223
+ # text = " ".join([t['text'] for t in transcript])
224
+ # video_link = f"https://www.youtube.com/watch?v={vid}"
225
+ # results.append({
226
+ # "video_id": vid,
227
+ # "title": title,
228
+ # "link": video_link,
229
+ # "transcript": text
230
+ # })
231
+ # except Exception as e:
232
+ # continue
233
+ # return results
234
+
235
+ # # --- Scrape icode.guru ---
236
+ # def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
237
+ # visited = set()
238
+ # blocks = []
239
+
240
+ # def crawl(url):
241
+ # if url in visited or len(visited) >= max_pages:
242
+ # return
243
+ # visited.add(url)
244
+ # try:
245
+ # res = requests.get(url, timeout=10)
246
+ # soup = BeautifulSoup(res.content, "html.parser")
247
+ # page_text = soup.get_text(separator=" ", strip=True)
248
+ # if len(page_text) > 100:
249
+ # blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
250
+ # for link in soup.find_all("a", href=True):
251
+ # href = link['href']
252
+ # if href.startswith("/"):
253
+ # href = base_url + href
254
+ # if href.startswith(base_url):
255
+ # crawl(href)
256
+ # except:
257
+ # pass
258
+
259
+ # crawl(base_url)
260
+ # return blocks
261
+
262
+ # # --- Ask Groq ---
263
+ # def ask_groq(context, question):
264
+ # messages = [
265
+ # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
266
+ # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
267
+ # ]
268
+ # chat_completion = groq_client.chat.completions.create(
269
+ # model="llama-3.3-70b-versatile",
270
+ # messages=messages,
271
+ # )
272
+ # return chat_completion.choices[0].message.content.strip()
273
+
274
+ # #--- STREAMLIT APP ---
275
+ # def main():
276
+ # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
277
+ # st.title("๐ŸŽ“ EduBot for @icodeguru0")
278
+ # st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
279
+
280
+ # question = st.text_input("๐Ÿ’ฌ Ask your question:")
281
+ # if question:
282
+ # with st.spinner("๐Ÿ“บ Fetching YouTube videos..."):
283
+ # video_info = get_latest_video_ids(channel_id, max_results=5)
284
+ # transcripts = get_video_transcripts(video_info)
285
+
286
+ # yt_context = ""
287
+ # relevant_links = []
288
+ # for vid in transcripts:
289
+ # yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
290
+ # if question.lower() in vid['transcript'].lower():
291
+ # relevant_links.append(vid['link'])
292
+
293
+ # with st.spinner("๐ŸŒ Scraping icode.guru..."):
294
+ # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
295
+ # site_context = "\n\n".join(site_blocks)
296
+
297
+ # full_context = yt_context + "\n\n" + site_context
298
+
299
+ # with st.spinner("๐Ÿง  Thinking..."):
300
+ # answer = ask_groq(full_context, question)
301
+
302
+ # st.success(answer)
303
+
304
+ # if relevant_links:
305
+ # st.markdown("### ๐Ÿ”— Related YouTube Links")
306
+ # for link in relevant_links:
307
+ # st.markdown(f"- [Watch Video]({link})")
308
+
309
+ # st.markdown("---")
310
+ # st.caption("Powered by YouTube, iCodeGuru, and Groq")
311
+
312
+ # if __name__ == "__main__":
313
+ # main()
314
+
315
+
316
+
317
 
 
 
318
  import os
319
+ import gradio as gr
320
+ import json
321
  import requests
 
 
322
  from bs4 import BeautifulSoup
323
+ from sentence_transformers import SentenceTransformer
324
+ import numpy as np
325
+ from sklearn.metrics.pairwise import cosine_similarity
326
+ from groq import Groq
327
 
328
+ # --- CONFIG ---
329
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 
 
 
 
 
 
330
  groq_client = Groq(api_key=GROQ_API_KEY)
331
+ embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
332
 
333
+ # --- UTILS ---
334
+ def embed(text):
335
+ return embed_model.encode([text])[0]
336
+
337
+ def find_best_match(query, documents, embeddings, top_k=3, threshold=0.5):
338
+ query_vec = embed(query).reshape(1, -1)
339
+ sims = cosine_similarity(query_vec, embeddings)[0]
340
+ top_indices = np.argsort(sims)[::-1]
 
 
 
 
 
 
 
 
 
 
 
341
  results = []
342
+ for idx in top_indices[:top_k]:
343
+ if sims[idx] >= threshold:
344
+ results.append((documents[idx], float(sims[idx])))
 
 
 
 
 
 
 
 
 
 
345
  return results
346
 
347
+ def search_uploaded_files(query, uploaded_files):
348
+ texts = []
349
+ sources = []
350
+
351
+ for file in uploaded_files:
352
+ name = file.name.lower()
353
+ content = file.read().decode("utf-8")
354
+ if name.endswith(".json"):
355
+ data = json.loads(content)
356
+ for item in data if isinstance(data, list) else [data]:
357
+ text = json.dumps(item) if isinstance(item, dict) else str(item)
358
+ texts.append(text)
359
+ sources.append(name)
360
+ else:
361
+ texts.append(content)
362
+ sources.append(name)
363
+
364
+ if not texts:
365
+ return []
366
+
367
+ embeddings = [embed(t) for t in texts]
368
+ top_results = find_best_match(query, texts, embeddings)
369
+ return [f"๐Ÿ“„ From {sources[i]}:\n{text}" for i, (text, _) in enumerate(top_results)]
370
+
371
+ # --- FALLBACK SOURCES ---
372
+
373
+ def scrape_icodeguru(query, max_results=3):
374
+ base_url = "https://icode.guru"
375
+ try:
376
+ resp = requests.get(base_url, timeout=10)
377
+ soup = BeautifulSoup(resp.text, "html.parser")
378
+ links = soup.find_all("a", href=True)
379
+ matching = []
380
+ for a in links:
381
+ if query.lower() in a.text.lower():
382
+ href = a['href']
383
+ full_url = href if href.startswith("http") else base_url + href
384
+ matching.append(f"{a.text.strip()}\n๐Ÿ”— {full_url}")
385
+ if len(matching) >= max_results:
386
+ break
387
+ return matching
388
+ except Exception as e:
389
+ return [f"๐ŸŒ Error fetching website: {e}"]
390
+
391
+ def search_youtube_icodeguru(query, max_results=3):
392
+ try:
393
+ api_key = os.getenv("YOUTUBE_API_KEY")
394
+ if not api_key:
395
+ return ["โš ๏ธ YouTube API key not set."]
396
+
397
+ url = f"https://www.googleapis.com/youtube/v3/search?part=snippet&channelId=UCsv3kmQ5k9z7CdFtnqp1U_A&q={query}&type=video&maxResults={max_results}&key={api_key}"
398
+ r = requests.get(url)
399
+ data = r.json()
400
+
401
+ if "items" not in data:
402
+ return ["โš ๏ธ YouTube API error or quota exceeded."]
403
+
404
+ results = []
405
+ for item in data["items"]:
406
+ title = item["snippet"]["title"]
407
+ video_id = item["id"]["videoId"]
408
+ video_url = f"https://www.youtube.com/watch?v={video_id}"
409
+ results.append(f"โ–ถ๏ธ {title}\n๐Ÿ”— {video_url}")
410
+
411
+ return results
412
+ except Exception as e:
413
+ return [f"๐Ÿ“บ Error searching YouTube: {e}"]
414
+
415
+ # --- CHAT HANDLER ---
416
+
417
+ def chat_handler(message, history, files=None):
418
+ fallback_used = False
419
+
420
+ file_results = search_uploaded_files(message, files) if files else []
421
+ context_text = "\n\n".join(file_results)
422
+
423
+ if not context_text.strip():
424
+ fallback_used = True
425
+ yt_results = search_youtube_icodeguru(message)
426
+ web_results = scrape_icodeguru(message)
427
+ context_text = "\n\n".join(yt_results + web_results)
428
+
429
+ prompt = f"""You are SmartiBot, a helpful assistant trained only on iCodeGuru content.
430
+ Answer the following question **strictly** based on this context:
431
+
432
+ Context:
433
+ {context_text}
434
+
435
+ Question: {message}
436
+ Answer:"""
437
+
438
  chat_completion = groq_client.chat.completions.create(
439
+ model="llama3-8b-8192",
440
+ messages=[{"role": "user", "content": prompt}],
441
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
+ reply = chat_completion.choices[0].message.content.strip()
 
 
444
 
445
+ if fallback_used:
446
+ reply += "\n\n๐Ÿ“ก Fetched from iCodeGuru YouTube or Website."
447
 
448
+ return reply
 
449
 
450
+ # --- STREAMLIT APP ---
451
 
452
+ def main():
453
+ gr.ChatInterface(
454
+ fn=chat_handler,
455
+ title="๐Ÿค– SmartiBot for iCodeGuru",
456
+ description="Ask any tech question โ€” get answers only from iCodeGuru's YouTube, Website, or uploaded files!",
457
+ additional_inputs=[
458
+ gr.File(file_types=[".txt", ".json"], label="Upload Knowledge Files", file_count="multiple")
459
+ ],
460
+ ).launch()
461
 
462
  if __name__ == "__main__":
463
  main()
464