ilsa15 commited on
Commit
242921d
Β·
verified Β·
1 Parent(s): bb7c680

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -27
app.py CHANGED
@@ -312,6 +312,185 @@
312
  # if __name__ == "__main__":
313
  # main()
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
 
317
 
@@ -326,8 +505,6 @@ from sentence_transformers import SentenceTransformer
326
  import chromadb
327
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
328
 
329
- import json
330
-
331
  nest_asyncio.apply()
332
 
333
  # --- CONFIGURATION ---
@@ -337,31 +514,13 @@ channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
337
  BASE_URL = "https://icode.guru"
338
 
339
  groq_client = Groq(api_key=GROQ_API_KEY)
340
- from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
341
-
342
  embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
343
 
344
  chroma_client = chromadb.Client()
345
  collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
346
 
347
- # --- Upload + load files as vector DB ---
348
- def load_uploaded_vectors(uploaded_files):
349
- data = []
350
- for file in uploaded_files:
351
- if file.name.endswith(".txt"):
352
- text = file.read().decode()
353
- data.append({"id": file.name, "content": text})
354
- elif file.name.endswith(".json"):
355
- content = json.load(file)
356
- for i, chunk in enumerate(content):
357
- data.append({"id": f"{file.name}-{i}", "content": chunk})
358
- return data
359
-
360
- def search_vector_data(query, data):
361
- if not data:
362
- return None
363
- collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
364
- collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
365
  results = collection.query(query_texts=[query], n_results=3)
366
  if results and results["documents"]:
367
  return "\n\n".join([doc for doc in results["documents"][0]])
@@ -446,16 +605,13 @@ def main():
446
  st.title("πŸŽ“ EduBot for @icodeguru0")
447
  st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
448
 
449
- uploaded_files = st.file_uploader("πŸ“ Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
450
  user_question = st.text_input("πŸ’¬ Ask your question:")
451
 
452
  if user_question:
453
- vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
454
-
455
  # Try vector DB first
456
- vector_context = search_vector_data(user_question, vector_data)
457
  if vector_context:
458
- with st.spinner("🧠 Answering from uploaded knowledge..."):
459
  answer = ask_groq(vector_context, user_question)
460
  st.success(answer)
461
  else:
@@ -491,3 +647,4 @@ def main():
491
 
492
  if __name__ == "__main__":
493
  main()
 
 
312
  # if __name__ == "__main__":
313
  # main()
314
 
315
+ #vectordb
316
+
317
+
318
+ # import nest_asyncio
319
+ # import streamlit as st
320
+ # import os
321
+ # import requests
322
+ # from youtube_transcript_api import YouTubeTranscriptApi
323
+ # from groq import Groq
324
+ # from bs4 import BeautifulSoup
325
+ # from sentence_transformers import SentenceTransformer
326
+ # import chromadb
327
+ # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
328
+
329
+ # import json
330
+
331
+ # nest_asyncio.apply()
332
+
333
+ # # --- CONFIGURATION ---
334
+ # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
335
+ # GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
336
+ # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
337
+ # BASE_URL = "https://icode.guru"
338
+
339
+ # groq_client = Groq(api_key=GROQ_API_KEY)
340
+ # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
341
+
342
+ # embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
343
+
344
+ # chroma_client = chromadb.Client()
345
+ # collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
346
+
347
+ # # --- Upload + load files as vector DB ---
348
+ # def load_uploaded_vectors(uploaded_files):
349
+ # data = []
350
+ # for file in uploaded_files:
351
+ # if file.name.endswith(".txt"):
352
+ # text = file.read().decode()
353
+ # data.append({"id": file.name, "content": text})
354
+ # elif file.name.endswith(".json"):
355
+ # content = json.load(file)
356
+ # for i, chunk in enumerate(content):
357
+ # data.append({"id": f"{file.name}-{i}", "content": chunk})
358
+ # return data
359
+
360
+ # def search_vector_data(query, data):
361
+ # if not data:
362
+ # return None
363
+ # collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
364
+ # collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
365
+ # results = collection.query(query_texts=[query], n_results=3)
366
+ # if results and results["documents"]:
367
+ # return "\n\n".join([doc for doc in results["documents"][0]])
368
+ # return None
369
+
370
+ # # --- Fetch recent video IDs from YouTube channel ---
371
+ # def get_latest_video_ids(channel_id, max_results=5):
372
+ # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
373
+ # response = requests.get(url)
374
+ # videos = response.json().get('items', [])
375
+
376
+ # valid_videos = []
377
+ # for v in videos:
378
+ # if v['id']['kind'] == 'youtube#video':
379
+ # title = v['snippet']['title']
380
+ # channel_title = v['snippet']['channelTitle']
381
+ # video_id = v['id']['videoId']
382
+ # if "icodeguru" in channel_title.lower():
383
+ # valid_videos.append((video_id, title))
384
+ # return valid_videos
385
+
386
+ # # --- Get video transcripts ---
387
+ # def get_video_transcripts(video_info):
388
+ # results = []
389
+ # for vid, title in video_info:
390
+ # try:
391
+ # transcript = YouTubeTranscriptApi.get_transcript(vid)
392
+ # text = " ".join([t['text'] for t in transcript])
393
+ # video_link = f"https://www.youtube.com/watch?v={vid}"
394
+ # results.append({
395
+ # "video_id": vid,
396
+ # "title": title,
397
+ # "link": video_link,
398
+ # "transcript": text
399
+ # })
400
+ # except:
401
+ # continue
402
+ # return results
403
+
404
+ # # --- Scrape icode.guru ---
405
+ # def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
406
+ # visited = set()
407
+ # blocks = []
408
+
409
+ # def crawl(url):
410
+ # if url in visited or len(visited) >= max_pages:
411
+ # return
412
+ # visited.add(url)
413
+ # try:
414
+ # res = requests.get(url, timeout=10)
415
+ # soup = BeautifulSoup(res.content, "html.parser")
416
+ # page_text = soup.get_text(separator=" ", strip=True)
417
+ # if len(page_text) > 100:
418
+ # blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
419
+ # for link in soup.find_all("a", href=True):
420
+ # href = link['href']
421
+ # if href.startswith("/"):
422
+ # href = base_url + href
423
+ # if href.startswith(base_url):
424
+ # crawl(href)
425
+ # except:
426
+ # pass
427
+
428
+ # crawl(base_url)
429
+ # return blocks
430
+
431
+ # # --- Ask Groq ---
432
+ # def ask_groq(context, question):
433
+ # messages = [
434
+ # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
435
+ # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
436
+ # ]
437
+ # chat_completion = groq_client.chat.completions.create(
438
+ # model="llama3-8b-8192",
439
+ # messages=messages,
440
+ # )
441
+ # return chat_completion.choices[0].message.content.strip()
442
+
443
+ # #--- STREAMLIT APP ---
444
+ # def main():
445
+ # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
446
+ # st.title("πŸŽ“ EduBot for @icodeguru0")
447
+ # st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
448
+
449
+ # uploaded_files = st.file_uploader("πŸ“ Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
450
+ # user_question = st.text_input("πŸ’¬ Ask your question:")
451
+
452
+ # if user_question:
453
+ # vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
454
+
455
+ # # Try vector DB first
456
+ # vector_context = search_vector_data(user_question, vector_data)
457
+ # if vector_context:
458
+ # with st.spinner("🧠 Answering from uploaded knowledge..."):
459
+ # answer = ask_groq(vector_context, user_question)
460
+ # st.success(answer)
461
+ # else:
462
+ # # Fallback to real-time data
463
+ # with st.spinner("πŸ“Ί Fetching YouTube videos..."):
464
+ # video_info = get_latest_video_ids(channel_id, max_results=5)
465
+ # transcripts = get_video_transcripts(video_info)
466
+
467
+ # yt_context = ""
468
+ # relevant_links = []
469
+ # for vid in transcripts:
470
+ # yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
471
+ # if user_question.lower() in vid['transcript'].lower():
472
+ # relevant_links.append(vid['link'])
473
+
474
+ # with st.spinner("🌐 Scraping icode.guru..."):
475
+ # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
476
+ # site_context = "\n\n".join(site_blocks)
477
+
478
+ # full_context = yt_context + "\n\n" + site_context
479
+
480
+ # with st.spinner("🧠 Thinking..."):
481
+ # answer = ask_groq(full_context, user_question)
482
+ # st.success(answer)
483
+
484
+ # if relevant_links:
485
+ # st.markdown("### πŸ”— Related YouTube Links")
486
+ # for link in relevant_links:
487
+ # st.markdown(f"- [Watch Video]({link})")
488
+
489
+ # st.markdown("---")
490
+ # st.caption("Powered by YouTube, iCodeGuru, and Groq")
491
+
492
+ # if __name__ == "__main__":
493
+ # main()
494
 
495
 
496
 
 
505
  import chromadb
506
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
507
 
 
 
508
  nest_asyncio.apply()
509
 
510
  # --- CONFIGURATION ---
 
514
  BASE_URL = "https://icode.guru"
515
 
516
  groq_client = Groq(api_key=GROQ_API_KEY)
 
 
517
  embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
518
 
519
  chroma_client = chromadb.Client()
520
  collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
521
 
522
+ # --- Search persistent vector DB ---
523
+ def search_vector_data(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  results = collection.query(query_texts=[query], n_results=3)
525
  if results and results["documents"]:
526
  return "\n\n".join([doc for doc in results["documents"][0]])
 
605
  st.title("πŸŽ“ EduBot for @icodeguru0")
606
  st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
607
 
 
608
  user_question = st.text_input("πŸ’¬ Ask your question:")
609
 
610
  if user_question:
 
 
611
  # Try vector DB first
612
+ vector_context = search_vector_data(user_question)
613
  if vector_context:
614
+ with st.spinner("🧠 Answering from knowledge base..."):
615
  answer = ask_groq(vector_context, user_question)
616
  st.success(answer)
617
  else:
 
647
 
648
  if __name__ == "__main__":
649
  main()
650
+