ilsa15 commited on
Commit
3ab93d5
·
verified ·
1 Parent(s): ecffc5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -181
app.py CHANGED
@@ -494,13 +494,166 @@
494
 
495
 
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  import nest_asyncio
498
  import streamlit as st
499
  import os
500
- import requests
501
- from youtube_transcript_api import YouTubeTranscriptApi
502
  from groq import Groq
503
- from bs4 import BeautifulSoup
504
  from sentence_transformers import SentenceTransformer
505
  import chromadb
506
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
@@ -508,15 +661,10 @@ from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunct
508
  nest_asyncio.apply()
509
 
510
  # --- CONFIGURATION ---
511
- YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
512
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
513
- channel_id="UCsv3kmQ5k1eIRG2R9mWN-QA" #channelId
514
-
515
- BASE_URL = "https://icode.guru"
516
-
517
  groq_client = Groq(api_key=GROQ_API_KEY)
518
- embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
519
 
 
520
  chroma_client = chromadb.Client()
521
  collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
522
 
@@ -527,72 +675,11 @@ def search_vector_data(query):
527
  return "\n\n".join([doc for doc in results["documents"][0]])
528
  return None
529
 
530
- # --- Fetch recent video IDs from YouTube channel ---
531
- def get_latest_video_ids(channel_id, max_results=5):
532
- url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
533
- response = requests.get(url)
534
- videos = response.json().get('items', [])
535
-
536
- valid_videos = []
537
- for v in videos:
538
- if v['id']['kind'] == 'youtube#video':
539
- title = v['snippet']['title']
540
- channel_title = v['snippet']['channelTitle']
541
- video_id = v['id']['videoId']
542
- if "icodeguru" in channel_title.lower():
543
- valid_videos.append((video_id, title))
544
- return valid_videos
545
-
546
- # --- Get video transcripts ---
547
- def get_video_transcripts(video_info):
548
- results = []
549
- for vid, title in video_info:
550
- try:
551
- transcript = YouTubeTranscriptApi.get_transcript(vid)
552
- text = " ".join([t['text'] for t in transcript])
553
- video_link = f"https://www.youtube.com/watch?v={vid}"
554
- results.append({
555
- "video_id": vid,
556
- "title": title,
557
- "link": video_link,
558
- "transcript": text
559
- })
560
- except:
561
- continue
562
- return results
563
-
564
- # --- Scrape icode.guru ---
565
- def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
566
- visited = set()
567
- blocks = []
568
-
569
- def crawl(url):
570
- if url in visited or len(visited) >= max_pages:
571
- return
572
- visited.add(url)
573
- try:
574
- res = requests.get(url, timeout=10)
575
- soup = BeautifulSoup(res.content, "html.parser")
576
- page_text = soup.get_text(separator=" ", strip=True)
577
- if len(page_text) > 100:
578
- blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
579
- for link in soup.find_all("a", href=True):
580
- href = link['href']
581
- if href.startswith("/"):
582
- href = base_url + href
583
- if href.startswith(base_url):
584
- crawl(href)
585
- except:
586
- pass
587
-
588
- crawl(base_url)
589
- return blocks
590
-
591
  # --- Ask Groq ---
592
  def ask_groq(context, question):
593
  messages = [
594
- {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
595
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
596
  ]
597
  chat_completion = groq_client.chat.completions.create(
598
  model="llama3-8b-8192",
@@ -600,127 +687,28 @@ def ask_groq(context, question):
600
  )
601
  return chat_completion.choices[0].message.content.strip()
602
 
603
- #--- STREAMLIT APP ---
604
  def main():
605
  st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
606
  st.title("🎓 EduBot for @icodeguru0")
607
- st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
608
 
609
  user_question = st.text_input("💬 Ask your question:")
610
 
611
  if user_question:
612
- # Try vector DB first
613
- vector_context = search_vector_data(user_question)
614
- if vector_context:
615
- with st.spinner("🧠 Answering from knowledge base..."):
616
- answer = ask_groq(vector_context, user_question)
 
617
  st.success(answer)
618
  else:
619
- # Fallback to real-time data
620
- with st.spinner("📺 Fetching YouTube videos..."):
621
- video_info = get_latest_video_ids(channel_id, max_results=5)
622
- transcripts = get_video_transcripts(video_info)
623
-
624
- yt_context = ""
625
- relevant_links = []
626
- for vid in transcripts:
627
- yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
628
- if user_question.lower() in vid['transcript'].lower():
629
- relevant_links.append(vid['link'])
630
-
631
- with st.spinner("🌐 Scraping icode.guru..."):
632
- site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
633
- site_context = "\n\n".join(site_blocks)
634
-
635
- full_context = yt_context + "\n\n" + site_context
636
-
637
- with st.spinner("🧠 Thinking..."):
638
- answer = ask_groq(full_context, user_question)
639
- st.success(answer)
640
-
641
- if relevant_links:
642
- st.markdown("### 🔗 Related YouTube Links")
643
- for link in relevant_links:
644
- st.markdown(f"- [Watch Video]({link})")
645
 
646
  st.markdown("---")
647
- st.caption("Powered by YouTube, iCodeGuru, and Groq")
648
 
649
  if __name__ == "__main__":
650
  main()
651
 
652
-
653
-
654
-
655
-
656
- # import os
657
- # import streamlit as st
658
- # import nest_asyncio
659
- # import chromadb
660
- # from groq import Groq
661
- # from sentence_transformers import SentenceTransformer
662
- # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
663
-
664
- # # --- Setup ---
665
- # nest_asyncio.apply()
666
- # GROQ_API_KEY = os.getenv("GROQ_API_KEY") or "your-groq-api-key-here"
667
- # groq_client = Groq(api_key=GROQ_API_KEY)
668
- # embed_model = "all-MiniLM-L6-v2"
669
- # embedding_function = SentenceTransformerEmbeddingFunction(embed_model)
670
- # chroma_client = chromadb.Client()
671
- # collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
672
-
673
- # # --- Load Uploaded Files into ChromaDB ---
674
- # def process_uploaded_files(uploaded_files):
675
- # for file in uploaded_files:
676
- # content = file.read().decode("utf-8", errors="ignore")
677
- # doc_id = f"{file.name}_{abs(hash(content))}"
678
- # try:
679
- # collection.add(documents=[content], metadatas=[{"source": file.name}], ids=[doc_id])
680
- # except chromadb.errors.IDAlreadyExistsError:
681
- # pass
682
- # st.success("✅ Files processed and stored in vector DB.")
683
-
684
- # # --- Search Vector DB ---
685
- # def search_context(query):
686
- # results = collection.query(query_texts=[query], n_results=3)
687
- # if results and results["documents"]:
688
- # return "\n\n".join(results["documents"][0])
689
- # return None
690
-
691
- # # --- Ask Groq LLaMA-3 with Retrieved Context ---
692
- # def ask_groq(context, question):
693
- # messages = [
694
- # {"role": "system", "content": "You are a helpful assistant. Only answer using the provided context."},
695
- # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
696
- # ]
697
- # response = groq_client.chat.completions.create(model="llama3-8b-8192", messages=messages)
698
- # return response.choices[0].message.content.strip()
699
-
700
- # # --- Streamlit UI ---
701
- # def main():
702
- # st.set_page_config(page_title="📚 EduBot for iCodeGuru", layout="wide")
703
- # st.title("🤖 EduBot for iCodeGuru")
704
- # st.markdown("Ask anything based on uploaded knowledge files (text, notes, JSON, etc).")
705
-
706
- # uploaded_files = st.file_uploader("📂 Upload Knowledge Files", type=["txt", "json", "md"], accept_multiple_files=True)
707
- # if uploaded_files:
708
- # process_uploaded_files(uploaded_files)
709
-
710
- # user_question = st.text_input("💬 Ask your question:")
711
- # if user_question:
712
- # with st.spinner("🔍 Searching knowledge base..."):
713
- # context = search_context(user_question)
714
-
715
- # if context:
716
- # with st.spinner("🤖 Generating answer..."):
717
- # answer = ask_groq(context, user_question)
718
- # st.success(answer)
719
- # else:
720
- # st.warning("⚠️ No relevant answer found in uploaded files.")
721
-
722
- # st.markdown("---")
723
- # st.caption("🔗 Powered by ChromaDB, Groq API, and Sentence Transformers")
724
-
725
- # if __name__ == "__main__":
726
- # main()
 
494
 
495
 
496
 
497
+ # import nest_asyncio
498
+ # import streamlit as st
499
+ # import os
500
+ # import requests
501
+ # from youtube_transcript_api import YouTubeTranscriptApi
502
+ # from groq import Groq
503
+ # from bs4 import BeautifulSoup
504
+ # from sentence_transformers import SentenceTransformer
505
+ # import chromadb
506
+ # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
507
+
508
+ # nest_asyncio.apply()
509
+
510
+ # # --- CONFIGURATION ---
511
+ # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
512
+ # GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
513
+ # channel_id="UCsv3kmQ5k1eIRG2R9mWN-QA" #channelId
514
+
515
+ # BASE_URL = "https://icode.guru"
516
+
517
+ # groq_client = Groq(api_key=GROQ_API_KEY)
518
+ # embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
519
+
520
+ # chroma_client = chromadb.Client()
521
+ # collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
522
+
523
+ # # --- Search persistent vector DB ---
524
+ # def search_vector_data(query):
525
+ # results = collection.query(query_texts=[query], n_results=3)
526
+ # if results and results["documents"]:
527
+ # return "\n\n".join([doc for doc in results["documents"][0]])
528
+ # return None
529
+
530
+ # # --- Fetch recent video IDs from YouTube channel ---
531
+ # def get_latest_video_ids(channel_id, max_results=5):
532
+ # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
533
+ # response = requests.get(url)
534
+ # videos = response.json().get('items', [])
535
+
536
+ # valid_videos = []
537
+ # for v in videos:
538
+ # if v['id']['kind'] == 'youtube#video':
539
+ # title = v['snippet']['title']
540
+ # channel_title = v['snippet']['channelTitle']
541
+ # video_id = v['id']['videoId']
542
+ # if "icodeguru" in channel_title.lower():
543
+ # valid_videos.append((video_id, title))
544
+ # return valid_videos
545
+
546
+ # # --- Get video transcripts ---
547
+ # def get_video_transcripts(video_info):
548
+ # results = []
549
+ # for vid, title in video_info:
550
+ # try:
551
+ # transcript = YouTubeTranscriptApi.get_transcript(vid)
552
+ # text = " ".join([t['text'] for t in transcript])
553
+ # video_link = f"https://www.youtube.com/watch?v={vid}"
554
+ # results.append({
555
+ # "video_id": vid,
556
+ # "title": title,
557
+ # "link": video_link,
558
+ # "transcript": text
559
+ # })
560
+ # except:
561
+ # continue
562
+ # return results
563
+
564
+ # # --- Scrape icode.guru ---
565
+ # def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
566
+ # visited = set()
567
+ # blocks = []
568
+
569
+ # def crawl(url):
570
+ # if url in visited or len(visited) >= max_pages:
571
+ # return
572
+ # visited.add(url)
573
+ # try:
574
+ # res = requests.get(url, timeout=10)
575
+ # soup = BeautifulSoup(res.content, "html.parser")
576
+ # page_text = soup.get_text(separator=" ", strip=True)
577
+ # if len(page_text) > 100:
578
+ # blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
579
+ # for link in soup.find_all("a", href=True):
580
+ # href = link['href']
581
+ # if href.startswith("/"):
582
+ # href = base_url + href
583
+ # if href.startswith(base_url):
584
+ # crawl(href)
585
+ # except:
586
+ # pass
587
+
588
+ # crawl(base_url)
589
+ # return blocks
590
+
591
+ # # --- Ask Groq ---
592
+ # def ask_groq(context, question):
593
+ # messages = [
594
+ # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
595
+ # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
596
+ # ]
597
+ # chat_completion = groq_client.chat.completions.create(
598
+ # model="llama3-8b-8192",
599
+ # messages=messages,
600
+ # )
601
+ # return chat_completion.choices[0].message.content.strip()
602
+
603
+ # #--- STREAMLIT APP ---
604
+ # def main():
605
+ # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
606
+ # st.title("🎓 EduBot for @icodeguru0")
607
+ # st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
608
+
609
+ # user_question = st.text_input("💬 Ask your question:")
610
+
611
+ # if user_question:
612
+ # # Try vector DB first
613
+ # vector_context = search_vector_data(user_question)
614
+ # if vector_context:
615
+ # with st.spinner("🧠 Answering from knowledge base..."):
616
+ # answer = ask_groq(vector_context, user_question)
617
+ # st.success(answer)
618
+ # else:
619
+ # # Fallback to real-time data
620
+ # with st.spinner("📺 Fetching YouTube videos..."):
621
+ # video_info = get_latest_video_ids(channel_id, max_results=5)
622
+ # transcripts = get_video_transcripts(video_info)
623
+
624
+ # yt_context = ""
625
+ # relevant_links = []
626
+ # for vid in transcripts:
627
+ # yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
628
+ # if user_question.lower() in vid['transcript'].lower():
629
+ # relevant_links.append(vid['link'])
630
+
631
+ # with st.spinner("🌐 Scraping icode.guru..."):
632
+ # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
633
+ # site_context = "\n\n".join(site_blocks)
634
+
635
+ # full_context = yt_context + "\n\n" + site_context
636
+
637
+ # with st.spinner("🧠 Thinking..."):
638
+ # answer = ask_groq(full_context, user_question)
639
+ # st.success(answer)
640
+
641
+ # if relevant_links:
642
+ # st.markdown("### 🔗 Related YouTube Links")
643
+ # for link in relevant_links:
644
+ # st.markdown(f"- [Watch Video]({link})")
645
+
646
+ # st.markdown("---")
647
+ # st.caption("Powered by YouTube, iCodeGuru, and Groq")
648
+
649
+ # if __name__ == "__main__":
650
+ # main()
651
+
652
+
653
  import nest_asyncio
654
  import streamlit as st
655
  import os
 
 
656
  from groq import Groq
 
657
  from sentence_transformers import SentenceTransformer
658
  import chromadb
659
  from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 
661
  nest_asyncio.apply()
662
 
663
  # --- CONFIGURATION ---
 
664
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 
 
 
 
665
  groq_client = Groq(api_key=GROQ_API_KEY)
 
666
 
667
+ embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
668
  chroma_client = chromadb.Client()
669
  collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
670
 
 
675
  return "\n\n".join([doc for doc in results["documents"][0]])
676
  return None
677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
  # --- Ask Groq ---
679
  def ask_groq(context, question):
680
  messages = [
681
+ {"role": "system", "content": "You are a helpful assistant. Answer only using the provided context."},
682
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
683
  ]
684
  chat_completion = groq_client.chat.completions.create(
685
  model="llama3-8b-8192",
 
687
  )
688
  return chat_completion.choices[0].message.content.strip()
689
 
690
+ # --- Streamlit App ---
691
  def main():
692
  st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
693
  st.title("🎓 EduBot for @icodeguru0")
694
+ st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge (YouTube, JSON, and site data).")
695
 
696
  user_question = st.text_input("💬 Ask your question:")
697
 
698
  if user_question:
699
+ with st.spinner("🔍 Searching knowledge base..."):
700
+ context = search_vector_data(user_question)
701
+
702
+ if context:
703
+ with st.spinner("🤖 Generating answer..."):
704
+ answer = ask_groq(context, user_question)
705
  st.success(answer)
706
  else:
707
+ st.warning("⚠️ No relevant answer found in the embedded knowledge.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
 
709
  st.markdown("---")
710
+ st.caption("Powered by ChromaDB 🧠 and Groq")
711
 
712
  if __name__ == "__main__":
713
  main()
714