chatbot / app.py
ilsa15's picture
Update app.py
5090c0a verified
# import nest_asyncio
# from youtube_transcript_api import YouTubeTranscriptApi
# import streamlit as st
# import os
# from groq import Groq
# nest_asyncio.apply()
# # --- CONFIGURATION ---
# YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
# channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
# # Initialize Groq client once
# groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
# def get_latest_video_ids(channel_id, max_results=5):
# import requests
# url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
# response = requests.get(url)
# videos = response.json().get('items', [])
# return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
# # --- FUNCTION: Get video transcripts ---
# def get_video_transcripts(video_ids):
# all_transcripts = []
# for vid in video_ids:
# try:
# transcript = YouTubeTranscriptApi.get_transcript(vid)
# text = " ".join([t['text'] for t in transcript])
# all_transcripts.append(text)
# except:
# continue
# return all_transcripts
# # --- FUNCTION: Ask Groq API using official client ---
# def ask_groq(context, question):
# messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
# ]
# chat_completion = groq_client.chat.completions.create(
# model="llama-3.3-70b-versatile", # Or the model you have access to
# messages=messages,
# )
# return chat_completion.choices[0].message.content.strip()
# # --- STREAMLIT APP ---
# def main():
# st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide")
# st.title("๐ŸŽ“ EduBot for @icodeguru0")
# st.markdown("Ask anything based on the channelโ€™s recent videos.")
# question = st.text_input("๐Ÿ’ฌ Ask your question here:")
# if question:
# with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
# video_ids = get_latest_video_ids(channel_id)
# transcripts = get_video_transcripts(video_ids)
# full_context = "\n\n".join(transcripts)
# with st.spinner("๐Ÿง  Thinking..."):
# answer = ask_groq(full_context, question)
# st.success(answer)
# st.markdown("---")
# st.caption("Powered by YouTube + Groq | Built for @icodeguru0")
# if __name__ == "__main__":
# main()
# # import nest_asyncio
# # from youtube_transcript_api import YouTubeTranscriptApi
# # import streamlit as st
# # import os
# # from groq import Groq
# # import requests
# # from bs4 import BeautifulSoup
# # nest_asyncio.apply()
# # # --- CONFIGURATION ---
# # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets
# # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0
# # BASE_URL = "https://icode.guru"
# # # Initialize Groq client once
# # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# # # --- FUNCTION: Fetch recent video IDs from YouTube channel ---
# # def get_latest_video_ids(channel_id, max_results=5):
# # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
# # response = requests.get(url)
# # videos = response.json().get('items', [])
# # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video']
# # # --- FUNCTION: Get video transcripts ---
# # def get_video_transcripts(video_ids):
# # all_transcripts = []
# # for vid in video_ids:
# # try:
# # transcript = YouTubeTranscriptApi.get_transcript(vid)
# # text = " ".join([t['text'] for t in transcript])
# # all_transcripts.append(text)
# # except:
# # continue
# # return all_transcripts
# # # --- NEW FUNCTION: Scrape icode.guru ---
# # def scrape_icodeguru(base_url="https://icode.guru", max_pages=5):
# # visited = set()
# # blocks = []
# # def crawl(url):
# # if url in visited or len(visited) >= max_pages:
# # return
# # visited.add(url)
# # try:
# # res = requests.get(url, timeout=10)
# # soup = BeautifulSoup(res.content, "html.parser")
# # page_text = soup.get_text(separator=" ", strip=True)
# # if len(page_text) > 100:
# # blocks.append(f"[Source]({url}):\n{page_text[:2000]}")
# # for link in soup.find_all("a", href=True):
# # href = link['href']
# # if href.startswith("/"):
# # href = base_url + href
# # if href.startswith(base_url):
# # crawl(href)
# # except:
# # pass
# # crawl(base_url)
# # return blocks
# # # --- FUNCTION: Ask Groq API using official client ---
# # def ask_groq(context, question):
# # messages = [
# # {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."},
# # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
# # ]
# # chat_completion = groq_client.chat.completions.create(
# # model="llama-3.3-70b-versatile", # Or the model you have access to
# # messages=messages,
# # )
# # return chat_completion.choices[0].message.content.strip()
# # # --- STREAMLIT APP ---
# # def main():
# # st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide")
# # st.title("๐ŸŽ“ EduBot for @icodeguru0")
# # st.markdown("Ask anything based on the channelโ€™s recent videos and website content from [icode.guru](https://icode.guru).")
# # question = st.text_input("๐Ÿ’ฌ Ask your question here:")
# # if question:
# # with st.spinner("๐Ÿ” Fetching videos and transcripts..."):
# # video_ids = get_latest_video_ids(channel_id)
# # transcripts = get_video_transcripts(video_ids)
# # yt_context = "\n\n".join(transcripts)
# # with st.spinner("๐ŸŒ Scraping icode.guru..."):
# # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
# # site_context = "\n\n".join(site_blocks)
# # full_context = yt_context + "\n\n" + site_context
# # with st.spinner("๐Ÿง  Thinking..."):
# # answer = ask_groq(full_context, question)
# # st.success(answer)
# # st.markdown("---")
# # st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0")
# # if __name__ == "__main__":
# # main()
#(youtube+web)
# import nest_asyncio
# import streamlit as st
# import os
# import requests
# from youtube_transcript_api import YouTubeTranscriptApi
# from groq import Groq
# from bs4 import BeautifulSoup
# nest_asyncio.apply()
# # --- CONFIGURATION ---
# YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
# GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
# BASE_URL = "https://icode.guru"
# groq_client = Groq(api_key=GROQ_API_KEY)
# # --- Fetch recent video IDs from YouTube channel ---
# def get_latest_video_ids(channel_id, max_results=5):
# url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
# response = requests.get(url)
# videos = response.json().get('items', [])
# valid_videos = []
# for v in videos:
# if v['id']['kind'] == 'youtube#video':
# title = v['snippet']['title']
# channel_title = v['snippet']['channelTitle']
# video_id = v['id']['videoId']
# if "icodeguru" in channel_title.lower(): # โœ… Extra validation
# valid_videos.append((video_id, title))
# return valid_videos
# # --- Get video transcripts ---
# def get_video_transcripts(video_info):
# results = []
# for vid, title in video_info:
# try:
# transcript = YouTubeTranscriptApi.get_transcript(vid)
# text = " ".join([t['text'] for t in transcript])
# video_link = f"https://www.youtube.com/watch?v={vid}"
# results.append({
# "video_id": vid,
# "title": title,
# "link": video_link,
# "transcript": text
# })
# except Exception as e:
# continue
# return results
# # --- Scrape icode.guru ---
# def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
# visited = set()
# blocks = []
# def crawl(url):
# if url in visited or len(visited) >= max_pages:
# return
# visited.add(url)
# try:
# res = requests.get(url, timeout=10)
# soup = BeautifulSoup(res.content, "html.parser")
# page_text = soup.get_text(separator=" ", strip=True)
# if len(page_text) > 100:
# blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
# for link in soup.find_all("a", href=True):
# href = link['href']
# if href.startswith("/"):
# href = base_url + href
# if href.startswith(base_url):
# crawl(href)
# except:
# pass
# crawl(base_url)
# return blocks
# # --- Ask Groq ---
# def ask_groq(context, question):
# messages = [
# {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
# {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
# ]
# chat_completion = groq_client.chat.completions.create(
# model="llama-3.3-70b-versatile",
# messages=messages,
# )
# return chat_completion.choices[0].message.content.strip()
# #--- STREAMLIT APP ---
# def main():
# st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
# st.title("๐ŸŽ“ EduBot for @icodeguru0")
# st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
# question = st.text_input("๐Ÿ’ฌ Ask your question:")
# if question:
# with st.spinner("๐Ÿ“บ Fetching YouTube videos..."):
# video_info = get_latest_video_ids(channel_id, max_results=5)
# transcripts = get_video_transcripts(video_info)
# yt_context = ""
# relevant_links = []
# for vid in transcripts:
# yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
# if question.lower() in vid['transcript'].lower():
# relevant_links.append(vid['link'])
# with st.spinner("๐ŸŒ Scraping icode.guru..."):
# site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
# site_context = "\n\n".join(site_blocks)
# full_context = yt_context + "\n\n" + site_context
# with st.spinner("๐Ÿง  Thinking..."):
# answer = ask_groq(full_context, question)
# st.success(answer)
# if relevant_links:
# st.markdown("### ๐Ÿ”— Related YouTube Links")
# for link in relevant_links:
# st.markdown(f"- [Watch Video]({link})")
# st.markdown("---")
# st.caption("Powered by YouTube, iCodeGuru, and Groq")
# if __name__ == "__main__":
# main()
# (vectordb)
# import nest_asyncio
# import streamlit as st
# import os
# import requests
# from youtube_transcript_api import YouTubeTranscriptApi
# from groq import Groq
# from bs4 import BeautifulSoup
# from sentence_transformers import SentenceTransformer
# import chromadb
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
# import json
# nest_asyncio.apply()
# # --- CONFIGURATION ---
# YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY")
# GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru
# BASE_URL = "https://icode.guru"
# groq_client = Groq(api_key=GROQ_API_KEY)
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
# embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
# chroma_client = chromadb.Client()
# collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
# # --- Upload + load files as vector DB ---
# def load_uploaded_vectors(uploaded_files):
# data = []
# for file in uploaded_files:
# if file.name.endswith(".txt"):
# text = file.read().decode()
# data.append({"id": file.name, "content": text})
# elif file.name.endswith(".json"):
# content = json.load(file)
# for i, chunk in enumerate(content):
# data.append({"id": f"{file.name}-{i}", "content": chunk})
# return data
# def search_vector_data(query, data):
# if not data:
# return None
# collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function)
# collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data])
# results = collection.query(query_texts=[query], n_results=3)
# if results and results["documents"]:
# return "\n\n".join([doc for doc in results["documents"][0]])
# return None
# # --- Fetch recent video IDs from YouTube channel ---
# def get_latest_video_ids(channel_id, max_results=5):
# url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}"
# response = requests.get(url)
# videos = response.json().get('items', [])
# valid_videos = []
# for v in videos:
# if v['id']['kind'] == 'youtube#video':
# title = v['snippet']['title']
# channel_title = v['snippet']['channelTitle']
# video_id = v['id']['videoId']
# if "icodeguru" in channel_title.lower():
# valid_videos.append((video_id, title))
# return valid_videos
# # --- Get video transcripts ---
# def get_video_transcripts(video_info):
# results = []
# for vid, title in video_info:
# try:
# transcript = YouTubeTranscriptApi.get_transcript(vid)
# text = " ".join([t['text'] for t in transcript])
# video_link = f"https://www.youtube.com/watch?v={vid}"
# results.append({
# "video_id": vid,
# "title": title,
# "link": video_link,
# "transcript": text
# })
# except:
# continue
# return results
# # --- Scrape icode.guru ---
# def scrape_icodeguru(base_url=BASE_URL, max_pages=5):
# visited = set()
# blocks = []
# def crawl(url):
# if url in visited or len(visited) >= max_pages:
# return
# visited.add(url)
# try:
# res = requests.get(url, timeout=10)
# soup = BeautifulSoup(res.content, "html.parser")
# page_text = soup.get_text(separator=" ", strip=True)
# if len(page_text) > 100:
# blocks.append(f"[{url}]({url}):\n{page_text[:1500]}")
# for link in soup.find_all("a", href=True):
# href = link['href']
# if href.startswith("/"):
# href = base_url + href
# if href.startswith(base_url):
# crawl(href)
# except:
# pass
# crawl(base_url)
# return blocks
# # --- Ask Groq ---
# def ask_groq(context, question):
# messages = [
# {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
# {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
# ]
# chat_completion = groq_client.chat.completions.create(
# model="llama3-8b-8192",
# messages=messages,
# )
# return chat_completion.choices[0].message.content.strip()
# #--- STREAMLIT APP ---
# def main():
# st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
# st.title("๐ŸŽ“ EduBot for @icodeguru0")
# st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).")
# uploaded_files = st.file_uploader("๐Ÿ“ Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True)
# user_question = st.text_input("๐Ÿ’ฌ Ask your question:")
# if user_question:
# vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else []
# # Try vector DB first
# vector_context = search_vector_data(user_question, vector_data)
# if vector_context:
# with st.spinner("๐Ÿง  Answering from uploaded knowledge..."):
# answer = ask_groq(vector_context, user_question)
# st.success(answer)
# else:
# # Fallback to real-time data
# with st.spinner("๐Ÿ“บ Fetching YouTube videos..."):
# video_info = get_latest_video_ids(channel_id, max_results=5)
# transcripts = get_video_transcripts(video_info)
# yt_context = ""
# relevant_links = []
# for vid in transcripts:
# yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}"
# if user_question.lower() in vid['transcript'].lower():
# relevant_links.append(vid['link'])
# with st.spinner("๐ŸŒ Scraping icode.guru..."):
# site_blocks = scrape_icodeguru(BASE_URL, max_pages=5)
# site_context = "\n\n".join(site_blocks)
# full_context = yt_context + "\n\n" + site_context
# with st.spinner("๐Ÿง  Thinking..."):
# answer = ask_groq(full_context, user_question)
# st.success(answer)
# if relevant_links:
# st.markdown("### ๐Ÿ”— Related YouTube Links")
# for link in relevant_links:
# st.markdown(f"- [Watch Video]({link})")
# st.markdown("---")
# st.caption("Powered by YouTube, iCodeGuru, and Groq")
# if __name__ == "__main__":
# main()
# import nest_asyncio
# import streamlit as st
# import os
# from groq import Groq
# from sentence_transformers import SentenceTransformer
# import chromadb
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
# nest_asyncio.apply()
# # --- CONFIGURATION ---
# GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# groq_client = Groq(api_key=GROQ_API_KEY)
# embedding_function = SentenceTransformerEmbeddingFunction(
# model_name="all-MiniLM-L6-v2",
# device="cpu"
# )
# chroma_client = chromadb.Client()
# collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
# # --- Search persistent vector DB ---
# def search_vector_data(query):
# results = collection.query(query_texts=[query], n_results=3)
# if results and results["documents"]:
# return "\n\n".join([doc for doc in results["documents"][0]])
# return None
# # --- Ask Groq ---
# def ask_groq(context, question):
# messages = [
# {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
# {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
# ]
# chat_completion = groq_client.chat.completions.create(
# model="llama3-8b-8192",
# messages=messages,
# )
# return chat_completion.choices[0].message.content.strip()
# #--- STREAMLIT APP ---
# def main():
# st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
# st.title("๐ŸŽ“ EduBot for @icodeguru0")
# st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.")
# user_question = st.text_input("๐Ÿ’ฌ Ask your question:")
# if user_question:
# # Try vector DB first
# vector_context = search_vector_data(user_question)
# if vector_context:
# with st.spinner("๐Ÿง  Answering from knowledge base..."):
# answer = ask_groq(vector_context, user_question)
# st.success(answer)
# else:
# st.warning("โš ๏ธ No relevant answer found in the embedded knowledge.")
# st.markdown("---")
# st.caption("Powered by ChromaDB ๐Ÿง  and Groq โšก")
# if __name__ == "__main__":
# main()
# import nest_asyncio
# import streamlit as st
# import os
# from groq import Groq
# from sentence_transformers import SentenceTransformer
# import chromadb
# from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
# from chromadb.config import Settings
# # Apply asyncio patch (required for nested event loops in Streamlit)
# nest_asyncio.apply()
# # --- CONFIGURATION ---
# GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# GROQ_MODEL = "llama3-8b-8192"
# # Initialize Groq client
# groq_client = Groq(api_key=GROQ_API_KEY)
# # Initialize embedding model
# embedding_function = SentenceTransformerEmbeddingFunction(
# model_name="all-MiniLM-L6-v2",
# device="cpu"
# )
# # Initialize ChromaDB with persistence
# chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False))
# collection = chroma_client.get_or_create_collection(
# name="icodeguru_knowledge",
# embedding_function=embedding_function
# )
# # --- Search embedded knowledge ---
# def search_vector_data(query):
# try:
# results = collection.query(query_texts=[query], n_results=3)
# if results and results["documents"]:
# return "\n\n".join(results["documents"][0])
# except Exception as e:
# st.error(f"Vector search error: {e}")
# return None
# # --- Ask Groq ---
# def ask_groq(context, question):
# messages = [
# {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
# {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
# ]
# response = groq_client.chat.completions.create(
# model=GROQ_MODEL,
# messages=messages
# )
# return response.choices[0].message.content.strip()
# # --- Streamlit UI ---
# def main():
# st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
# st.title("๐ŸŽ“ EduBot for @icodeguru0")
# st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.")
# user_question = st.text_input("๐Ÿ’ฌ Ask your question:")
# if user_question:
# vector_context = search_vector_data(user_question)
# if vector_context:
# with st.spinner("๐Ÿง  Answering from knowledge base..."):
# answer = ask_groq(vector_context, user_question)
# st.success(answer)
# else:
# st.warning("โš ๏ธ No relevant answer found in the embedded knowledge.")
# st.markdown("---")
# st.caption("Powered by ChromaDB ๐Ÿง  and Groq โšก")
# # โœ… This is the correct way to run the app
# if __name__ == "__main__":
# main()
import nest_asyncio
import streamlit as st
import os
import json
from groq import Groq
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
from chromadb.config import Settings
from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Apply asyncio patch (Streamlit fix)
nest_asyncio.apply()
# --- CONFIGURATION ---
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
GROQ_MODEL = "llama3-8b-8192"
# Initialize Groq client
groq_client = Groq(api_key=GROQ_API_KEY)
# # Explicitly load SentenceTransformer model first to avoid meta tensor bug
# embedding_model = SentenceTransformer "(all-MiniLM-L6-v2)"
# # Pass this model into Chroma's embedding function
# embedding_function = SentenceTransformerEmbeddingFunction(embedding_model=embedding_model)
embedding_function = SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2",
device="cpu"
)
# Initialize ChromaDB Persistent Client
chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False))
collection = chroma_client.get_or_create_collection(
name="icodeguru_knowledge",
embedding_function=embedding_function
)
# --- Ingest JSON Files from /docs/ ---
def ingest_docs_to_chroma():
folder_path = "./docs"
all_docs = []
for filename in os.listdir(folder_path):
if filename.endswith(".json"):
file_path = os.path.join(folder_path, filename)
loader = JSONLoader(file_path=file_path, jq_schema='.[]')
docs = loader.load()
all_docs.extend(docs)
st.write(f"Loaded {len(docs)} documents from {filename}")
# Chunk Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(all_docs)
st.write(f"Total chunks created: {len(chunks)}")
# Add Chunks to ChromaDB
for chunk in chunks:
# Flatten list content if necessary
if isinstance(chunk.page_content, list):
content = " ".join(str(item) for item in chunk.page_content).strip()
else:
content = str(chunk.page_content).strip()
metadata = chunk.metadata
doc_id = str(hash(content))
collection.add(documents=[content], metadatas=[metadata], ids=[doc_id])
st.success("โœ… Knowledge Base Updated Successfully!")
# --- Search embedded knowledge ---
def search_vector_data(query):
try:
results = collection.query(query_texts=[query], n_results=3)
if results and results["documents"]:
return "\n\n".join(results["documents"][0])
except Exception as e:
st.error(f"Vector search error: {e}")
return None
# --- Ask Groq LLM ---
def ask_groq(context, question):
messages = [
{"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
]
response = groq_client.chat.completions.create(
model=GROQ_MODEL,
messages=messages
)
return response.choices[0].message.content.strip()
# --- Streamlit UI ---
def main():
st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
st.title("๐ŸŽ“ EduBot for @icodeguru0")
st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.")
# --- Auto Update Knowledge Base at App Start ---
st.info("๐Ÿ”„ Updating Knowledge Base from /docs/...")
ingest_docs_to_chroma()
st.success("โœ… Knowledge Base Loaded Successfully!")
st.markdown("---")
user_question = st.text_input("๐Ÿ’ฌ Ask your question:")
if user_question:
vector_context = search_vector_data(user_question)
if vector_context:
with st.spinner("๐Ÿง  Answering from knowledge base..."):
answer = ask_groq(vector_context, user_question)
st.success(answer)
else:
st.warning("โš ๏ธ No relevant answer found in the embedded knowledge.")
st.markdown("---")
st.caption("Powered by ChromaDB ๐Ÿง  and Groq โšก")
if __name__ == "__main__":
main()