Spaces:

ZunairaHawwar
/

IcodeGuru_Chatbot

Sleeping

App Files Files Community

ZunairaHawwar commited on Jul 27, 2025

Commit

61104b8

verified ·

1 Parent(s): cfac379

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import nest_asyncio
+import streamlit as st
+import os
+import json
+from groq import Groq
+from sentence_transformers import SentenceTransformer
+import chromadb
+from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
+from chromadb.config import Settings
+from langchain.document_loaders import JSONLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Apply asyncio patch (Streamlit fix)
+nest_asyncio.apply()
+# --- CONFIGURATION ---
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+GROQ_MODEL = "llama3-8b-8192"
+# Initialize Groq client
+groq_client = Groq(api_key=GROQ_API_KEY)
+# Explicitly load SentenceTransformer model first to avoid meta tensor bug
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Pass this model into Chroma's embedding function
+embedding_function = SentenceTransformerEmbeddingFunction(embedding_model=embedding_model)
+# Initialize ChromaDB Persistent Client
+chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False))
+collection = chroma_client.get_or_create_collection(
+    name="icodeguru_knowledge",
+    embedding_function=embedding_function
+)
+# --- Ingest JSON Files from /docs/ ---
+def ingest_docs_to_chroma():
+    folder_path = "./docs"
+    all_docs = []
+    for filename in os.listdir(folder_path):
+        if filename.endswith(".json"):
+            file_path = os.path.join(folder_path, filename)
+            loader = JSONLoader(file_path=file_path, jq_schema='.[]')
+            docs = loader.load()
+            all_docs.extend(docs)
+            st.write(f"Loaded {len(docs)} documents from {filename}")
+    # Chunk Documents
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = text_splitter.split_documents(all_docs)
+    st.write(f"Total chunks created: {len(chunks)}")
+    # Add Chunks to ChromaDB
+    for chunk in chunks:
+        # Flatten list content if necessary
+        if isinstance(chunk.page_content, list):
+            content = " ".join(str(item) for item in chunk.page_content).strip()
+        else:
+            content = str(chunk.page_content).strip()
+        metadata = chunk.metadata
+        doc_id = str(hash(content))
+        collection.add(documents=[content], metadatas=[metadata], ids=[doc_id])
+    st.success("✅ Knowledge Base Updated Successfully!")
+# --- Search embedded knowledge ---
+def search_vector_data(query):
+    try:
+        results = collection.query(query_texts=[query], n_results=3)
+        if results and results["documents"]:
+            return "\n\n".join(results["documents"][0])
+    except Exception as e:
+        st.error(f"Vector search error: {e}")
+    return None
+# --- Ask Groq LLM ---
+def ask_groq(context, question):
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
+        {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
+    ]
+    response = groq_client.chat.completions.create(
+        model=GROQ_MODEL,
+        messages=messages
+    )
+    return response.choices[0].message.content.strip()
+# --- Streamlit UI ---
+def main():
+    st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
+    st.title("🎓 EduBot for @icodeguru0")
+    st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.")
+    # --- Auto Update Knowledge Base at App Start ---
+    st.info("🔄 Updating Knowledge Base from /docs/...")
+    ingest_docs_to_chroma()
+    st.success("✅ Knowledge Base Loaded Successfully!")
+    st.markdown("---")
+    user_question = st.text_input("💬 Ask your question:")
+    if user_question:
+        vector_context = search_vector_data(user_question)
+        if vector_context:
+            with st.spinner("🧠 Answering from knowledge base..."):
+                answer = ask_groq(vector_context, user_question)
+                st.success(answer)
+        else:
+            st.warning("⚠️ No relevant answer found in the embedded knowledge.")
+    st.markdown("---")
+    st.caption("Powered by ChromaDB 🧠 and Groq ⚡")
+if __name__ == "__main__":
+    main()