ilsa15 commited on
Commit
4d9037a
Β·
verified Β·
1 Parent(s): 6a159ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -311,6 +311,10 @@
311
 
312
  # if __name__ == "__main__":
313
  # main()
 
 
 
 
314
  import nest_asyncio
315
  import streamlit as st
316
  import os
@@ -321,6 +325,7 @@ from bs4 import BeautifulSoup
321
  from sentence_transformers import SentenceTransformer
322
  import chromadb
323
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 
324
  import json
325
 
326
  nest_asyncio.apply()
@@ -332,21 +337,36 @@ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
332
  BASE_URL = "https://icode.guru"
333
 
334
  groq_client = Groq(api_key=GROQ_API_KEY)
335
- embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
 
336
 
337
  chroma_client = chromadb.Client()
338
- collection = chroma_client.get_or_create_collection(
339
- "icodeguru_knowledge", embedding_function=embedding_function
340
- )
341
-
342
- # --- Search stored vector DB ---
343
- def search_vector_data(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  results = collection.query(query_texts=[query], n_results=3)
345
  if results and results["documents"]:
346
- return "\n\n".join(results["documents"][0])
347
  return None
348
 
349
- # --- Fetch recent videos from YouTube channel ---
350
  def get_latest_video_ids(channel_id, max_results=5):
351
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
352
  response = requests.get(url)
@@ -425,19 +445,22 @@ def main():
425
  st.title("πŸŽ“ EduBot for @icodeguru0")
426
  st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
427
 
 
428
  user_question = st.text_input("πŸ’¬ Ask your question:")
429
 
430
  if user_question:
431
- # 1. Try vector DB first
432
- vector_context = search_vector_data(user_question)
 
 
433
  if vector_context:
434
- with st.spinner("🧠 Answering from stored vector knowledge..."):
435
  answer = ask_groq(vector_context, user_question)
436
  st.success(answer)
437
  else:
438
- # 2. Fallback to real-time data
439
- with st.spinner("πŸ“Ί Fetching latest iCodeGuru YouTube videos..."):
440
- video_info = get_latest_video_ids(channel_id)
441
  transcripts = get_video_transcripts(video_info)
442
 
443
  yt_context = ""
@@ -447,8 +470,8 @@ def main():
447
  if user_question.lower() in vid['transcript'].lower():
448
  relevant_links.append(vid['link'])
449
 
450
- with st.spinner("🌐 Scraping iCodeGuru website..."):
451
- site_blocks = scrape_icodeguru()
452
  site_context = "\n\n".join(site_blocks)
453
 
454
  full_context = yt_context + "\n\n" + site_context
@@ -467,4 +490,3 @@ def main():
467
 
468
  if __name__ == "__main__":
469
  main()
470
-
 
311
 
312
  # if __name__ == "__main__":
313
  # main()
314
+
315
+
316
+
317
+
318
  import nest_asyncio
319
  import streamlit as st
320
  import os
 
325
  from sentence_transformers import SentenceTransformer
326
  import chromadb
327
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
328
+
329
  import json
330
 
331
  nest_asyncio.apply()
 
337
  BASE_URL = "https://icode.guru"
338
 
339
  groq_client = Groq(api_key=GROQ_API_KEY)
340
+ embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
341
+ embedding_function = SentenceTransformerEmbeddingFunction(embed_model)
342
 
343
  chroma_client = chromadb.Client()
344
+ collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
345
+
346
+ # --- Upload + load files as vector DB ---
347
+ def load_uploaded_vectors(uploaded_files):
348
+ data = []
349
+ for file in uploaded_files:
350
+ if file.name.endswith(".txt"):
351
+ text = file.read().decode()
352
+ data.append({"id": file.name, "content": text})
353
+ elif file.name.endswith(".json"):
354
+ content = json.load(file)
355
+ for i, chunk in enumerate(content):
356
+ data.append({"id": f"{file.name}-{i}", "content": chunk})
357
+ return data
358
+
359
+ def search_vector_data(query, data):
360
+ if not data:
361
+ return None
362
+ collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
363
+ collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
364
  results = collection.query(query_texts=[query], n_results=3)
365
  if results and results["documents"]:
366
+ return "\n\n".join([doc for doc in results["documents"][0]])
367
  return None
368
 
369
+ # --- Fetch recent video IDs from YouTube channel ---
370
  def get_latest_video_ids(channel_id, max_results=5):
371
  url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
372
  response = requests.get(url)
 
445
  st.title("πŸŽ“ EduBot for @icodeguru0")
446
  st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
447
 
448
+ uploaded_files = st.file_uploader("πŸ“ Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
449
  user_question = st.text_input("πŸ’¬ Ask your question:")
450
 
451
  if user_question:
452
+ vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
453
+
454
+ # Try vector DB first
455
+ vector_context = search_vector_data(user_question, vector_data)
456
  if vector_context:
457
+ with st.spinner("🧠 Answering from uploaded knowledge..."):
458
  answer = ask_groq(vector_context, user_question)
459
  st.success(answer)
460
  else:
461
+ # Fallback to real-time data
462
+ with st.spinner("πŸ“Ί Fetching YouTube videos..."):
463
+ video_info = get_latest_video_ids(channel_id, max_results=5)
464
  transcripts = get_video_transcripts(video_info)
465
 
466
  yt_context = ""
 
470
  if user_question.lower() in vid['transcript'].lower():
471
  relevant_links.append(vid['link'])
472
 
473
+ with st.spinner("🌐 Scraping icode.guru..."):
474
+ site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
475
  site_context = "\n\n".join(site_blocks)
476
 
477
  full_context = yt_context + "\n\n" + site_context
 
490
 
491
  if __name__ == "__main__":
492
  main()