Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -649,66 +649,76 @@
|
|
| 649 |
# if __name__ == "__main__":
|
| 650 |
# main()
|
| 651 |
|
| 652 |
-
|
| 653 |
-
import nest_asyncio
|
| 654 |
import streamlit as st
|
| 655 |
import os
|
| 656 |
-
|
| 657 |
-
from
|
|
|
|
| 658 |
import chromadb
|
| 659 |
-
from chromadb.
|
|
|
|
|
|
|
| 660 |
|
| 661 |
-
|
|
|
|
|
|
|
|
|
|
| 662 |
|
| 663 |
-
#
|
| 664 |
-
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 665 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
|
| 670 |
-
|
| 671 |
-
# --- Search persistent vector DB ---
|
| 672 |
-
def search_vector_data(query):
|
| 673 |
-
results = collection.query(query_texts=[query], n_results=3)
|
| 674 |
-
if results and results["documents"]:
|
| 675 |
-
return "\n\n".join([doc for doc in results["documents"][0]])
|
| 676 |
-
return None
|
| 677 |
-
|
| 678 |
-
# --- Ask Groq ---
|
| 679 |
-
def ask_groq(context, question):
|
| 680 |
-
messages = [
|
| 681 |
-
{"role": "system", "content": "You are a helpful assistant. Answer only using the provided context."},
|
| 682 |
-
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
|
| 683 |
-
]
|
| 684 |
-
chat_completion = groq_client.chat.completions.create(
|
| 685 |
-
model="llama3-8b-8192",
|
| 686 |
-
messages=messages,
|
| 687 |
-
)
|
| 688 |
-
return chat_completion.choices[0].message.content.strip()
|
| 689 |
-
|
| 690 |
-
# --- Streamlit App ---
|
| 691 |
-
def main():
|
| 692 |
-
st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
|
| 693 |
-
st.title("🎓 EduBot for @icodeguru0")
|
| 694 |
-
st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge (YouTube, JSON, and site data).")
|
| 695 |
|
| 696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
-
|
| 699 |
-
with st.spinner("🔍 Searching knowledge base..."):
|
| 700 |
-
context = search_vector_data(user_question)
|
| 701 |
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
|
| 709 |
-
|
| 710 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
|
| 712 |
-
|
| 713 |
-
|
| 714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
# if __name__ == "__main__":
|
| 650 |
# main()
|
| 651 |
|
|
|
|
|
|
|
| 652 |
import streamlit as st
|
| 653 |
import os
|
| 654 |
+
import json
|
| 655 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 656 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 657 |
import chromadb
|
| 658 |
+
from chromadb.config import Settings
|
| 659 |
+
from langchain.vectorstores import Chroma
|
| 660 |
+
from groq import Groq
|
| 661 |
|
| 662 |
+
# ---- Config ----
|
| 663 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 664 |
+
MODEL_NAME = "mixtral-8x7b-32768"
|
| 665 |
+
DATA_PATH = "data" # local folder with all files from GitHub repo
|
| 666 |
|
| 667 |
+
# ---- Setup ----
|
|
|
|
| 668 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 669 |
+
embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
|
| 670 |
+
chroma_client = chromadb.Client(Settings(persist_directory="chromadb_store", anonymized_telemetry=False))
|
| 671 |
+
|
| 672 |
+
# ---- Load and Embed ----
|
| 673 |
+
@st.cache_resource
|
| 674 |
+
def load_vector_db():
|
| 675 |
+
docs = []
|
| 676 |
+
|
| 677 |
+
for fname in os.listdir(DATA_PATH):
|
| 678 |
+
fpath = os.path.join(DATA_PATH, fname)
|
| 679 |
+
if fname.endswith(".txt"):
|
| 680 |
+
with open(fpath, 'r', encoding='utf-8') as f:
|
| 681 |
+
text = f.read()
|
| 682 |
+
elif fname.endswith(".json"):
|
| 683 |
+
with open(fpath, 'r', encoding='utf-8') as f:
|
| 684 |
+
content = json.load(f)
|
| 685 |
+
text = json.dumps(content)
|
| 686 |
+
else:
|
| 687 |
+
continue
|
| 688 |
|
| 689 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
|
| 690 |
+
docs.extend(splitter.create_documents([text]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
|
| 692 |
+
# Save to ChromaDB
|
| 693 |
+
vectordb = Chroma.from_documents(
|
| 694 |
+
documents=docs,
|
| 695 |
+
embedding=embed_model,
|
| 696 |
+
persist_directory="chromadb_store"
|
| 697 |
+
)
|
| 698 |
+
vectordb.persist()
|
| 699 |
+
return vectordb
|
| 700 |
|
| 701 |
+
db = load_vector_db()
|
|
|
|
|
|
|
| 702 |
|
| 703 |
+
# ---- RAG QA ----
|
| 704 |
+
def answer_with_rag(query):
|
| 705 |
+
docs = db.similarity_search(query, k=3)
|
| 706 |
+
if not docs:
|
| 707 |
+
return "⚠️ No relevant answer found in embedded knowledge."
|
| 708 |
+
context = "\n".join([doc.page_content for doc in docs])
|
| 709 |
|
| 710 |
+
prompt = f"Answer the following using only the provided context:\n\nContext:\n{context}\n\nQuestion: {query}"
|
| 711 |
+
chat_completion = groq_client.chat.completions.create(
|
| 712 |
+
messages=[{"role": "user", "content": prompt}],
|
| 713 |
+
model=MODEL_NAME,
|
| 714 |
+
)
|
| 715 |
+
return chat_completion.choices[0].message.content
|
| 716 |
|
| 717 |
+
# ---- Streamlit UI ----
|
| 718 |
+
st.title("📚 iCodeGuru ChatBot (RAG + Chroma + Groq)")
|
| 719 |
|
| 720 |
+
user_query = st.text_input("Ask me something about iCodeGuru:")
|
| 721 |
+
if user_query:
|
| 722 |
+
with st.spinner("Thinking..."):
|
| 723 |
+
response = answer_with_rag(user_query)
|
| 724 |
+
st.success(response)
|