ilsa15 commited on
Commit
ff730fe
·
verified ·
1 Parent(s): 3ab93d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -50
app.py CHANGED
@@ -649,66 +649,76 @@
649
  # if __name__ == "__main__":
650
  # main()
651
 
652
-
653
- import nest_asyncio
654
  import streamlit as st
655
  import os
656
- from groq import Groq
657
- from sentence_transformers import SentenceTransformer
 
658
  import chromadb
659
- from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 
 
660
 
661
- nest_asyncio.apply()
 
 
 
662
 
663
- # --- CONFIGURATION ---
664
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
665
  groq_client = Groq(api_key=GROQ_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
 
667
- embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
668
- chroma_client = chromadb.Client()
669
- collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function)
670
-
671
- # --- Search persistent vector DB ---
672
- def search_vector_data(query):
673
- results = collection.query(query_texts=[query], n_results=3)
674
- if results and results["documents"]:
675
- return "\n\n".join([doc for doc in results["documents"][0]])
676
- return None
677
-
678
- # --- Ask Groq ---
679
- def ask_groq(context, question):
680
- messages = [
681
- {"role": "system", "content": "You are a helpful assistant. Answer only using the provided context."},
682
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"}
683
- ]
684
- chat_completion = groq_client.chat.completions.create(
685
- model="llama3-8b-8192",
686
- messages=messages,
687
- )
688
- return chat_completion.choices[0].message.content.strip()
689
-
690
- # --- Streamlit App ---
691
- def main():
692
- st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
693
- st.title("🎓 EduBot for @icodeguru0")
694
- st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge (YouTube, JSON, and site data).")
695
 
696
- user_question = st.text_input("💬 Ask your question:")
 
 
 
 
 
 
 
697
 
698
- if user_question:
699
- with st.spinner("🔍 Searching knowledge base..."):
700
- context = search_vector_data(user_question)
701
 
702
- if context:
703
- with st.spinner("🤖 Generating answer..."):
704
- answer = ask_groq(context, user_question)
705
- st.success(answer)
706
- else:
707
- st.warning("⚠️ No relevant answer found in the embedded knowledge.")
708
 
709
- st.markdown("---")
710
- st.caption("Powered by ChromaDB 🧠 and Groq ⚡")
 
 
 
 
711
 
712
- if __name__ == "__main__":
713
- main()
714
 
 
 
 
 
 
 
649
  # if __name__ == "__main__":
650
  # main()
651
 
 
 
652
  import streamlit as st
653
  import os
654
+ import json
655
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
656
+ from langchain.embeddings import HuggingFaceEmbeddings
657
  import chromadb
658
+ from chromadb.config import Settings
659
+ from langchain.vectorstores import Chroma
660
+ from groq import Groq
661
 
662
+ # ---- Config ----
663
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
664
+ MODEL_NAME = "mixtral-8x7b-32768"
665
+ DATA_PATH = "data" # local folder with all files from GitHub repo
666
 
667
+ # ---- Setup ----
 
668
  groq_client = Groq(api_key=GROQ_API_KEY)
669
+ embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
670
+ chroma_client = chromadb.Client(Settings(persist_directory="chromadb_store", anonymized_telemetry=False))
671
+
672
+ # ---- Load and Embed ----
673
+ @st.cache_resource
674
+ def load_vector_db():
675
+ docs = []
676
+
677
+ for fname in os.listdir(DATA_PATH):
678
+ fpath = os.path.join(DATA_PATH, fname)
679
+ if fname.endswith(".txt"):
680
+ with open(fpath, 'r', encoding='utf-8') as f:
681
+ text = f.read()
682
+ elif fname.endswith(".json"):
683
+ with open(fpath, 'r', encoding='utf-8') as f:
684
+ content = json.load(f)
685
+ text = json.dumps(content)
686
+ else:
687
+ continue
688
 
689
+ splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
690
+ docs.extend(splitter.create_documents([text]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
692
+ # Save to ChromaDB
693
+ vectordb = Chroma.from_documents(
694
+ documents=docs,
695
+ embedding=embed_model,
696
+ persist_directory="chromadb_store"
697
+ )
698
+ vectordb.persist()
699
+ return vectordb
700
 
701
+ db = load_vector_db()
 
 
702
 
703
+ # ---- RAG QA ----
704
+ def answer_with_rag(query):
705
+ docs = db.similarity_search(query, k=3)
706
+ if not docs:
707
+ return "⚠️ No relevant answer found in embedded knowledge."
708
+ context = "\n".join([doc.page_content for doc in docs])
709
 
710
+ prompt = f"Answer the following using only the provided context:\n\nContext:\n{context}\n\nQuestion: {query}"
711
+ chat_completion = groq_client.chat.completions.create(
712
+ messages=[{"role": "user", "content": prompt}],
713
+ model=MODEL_NAME,
714
+ )
715
+ return chat_completion.choices[0].message.content
716
 
717
+ # ---- Streamlit UI ----
718
+ st.title("📚 iCodeGuru ChatBot (RAG + Chroma + Groq)")
719
 
720
+ user_query = st.text_input("Ask me something about iCodeGuru:")
721
+ if user_query:
722
+ with st.spinner("Thinking..."):
723
+ response = answer_with_rag(user_query)
724
+ st.success(response)