ZunairaHawwar commited on
Commit
61104b8
·
verified ·
1 Parent(s): cfac379

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nest_asyncio
2
+ import streamlit as st
3
+ import os
4
+ import json
5
+ from groq import Groq
6
+ from sentence_transformers import SentenceTransformer
7
+ import chromadb
8
+ from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
9
+ from chromadb.config import Settings
10
+ from langchain.document_loaders import JSONLoader
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+
13
+ # Apply asyncio patch (Streamlit fix)
14
+ nest_asyncio.apply()
15
+
16
+ # --- CONFIGURATION ---
17
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
18
+ GROQ_MODEL = "llama3-8b-8192"
19
+
20
+ # Initialize Groq client
21
+ groq_client = Groq(api_key=GROQ_API_KEY)
22
+
23
+ # Explicitly load SentenceTransformer model first to avoid meta tensor bug
24
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
25
+
26
+ # Pass this model into Chroma's embedding function
27
+ embedding_function = SentenceTransformerEmbeddingFunction(embedding_model=embedding_model)
28
+
29
+ # Initialize ChromaDB Persistent Client
30
+ chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False))
31
+ collection = chroma_client.get_or_create_collection(
32
+ name="icodeguru_knowledge",
33
+ embedding_function=embedding_function
34
+ )
35
+
36
+ # --- Ingest JSON Files from /docs/ ---
37
+ def ingest_docs_to_chroma():
38
+ folder_path = "./docs"
39
+ all_docs = []
40
+ for filename in os.listdir(folder_path):
41
+ if filename.endswith(".json"):
42
+ file_path = os.path.join(folder_path, filename)
43
+ loader = JSONLoader(file_path=file_path, jq_schema='.[]')
44
+ docs = loader.load()
45
+ all_docs.extend(docs)
46
+ st.write(f"Loaded {len(docs)} documents from {filename}")
47
+
48
+ # Chunk Documents
49
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
50
+ chunks = text_splitter.split_documents(all_docs)
51
+ st.write(f"Total chunks created: {len(chunks)}")
52
+
53
+ # Add Chunks to ChromaDB
54
+ for chunk in chunks:
55
+ # Flatten list content if necessary
56
+ if isinstance(chunk.page_content, list):
57
+ content = " ".join(str(item) for item in chunk.page_content).strip()
58
+ else:
59
+ content = str(chunk.page_content).strip()
60
+
61
+ metadata = chunk.metadata
62
+ doc_id = str(hash(content))
63
+ collection.add(documents=[content], metadatas=[metadata], ids=[doc_id])
64
+
65
+
66
+ st.success("✅ Knowledge Base Updated Successfully!")
67
+
68
+ # --- Search embedded knowledge ---
69
+ def search_vector_data(query):
70
+ try:
71
+ results = collection.query(query_texts=[query], n_results=3)
72
+ if results and results["documents"]:
73
+ return "\n\n".join(results["documents"][0])
74
+ except Exception as e:
75
+ st.error(f"Vector search error: {e}")
76
+ return None
77
+
78
+ # --- Ask Groq LLM ---
79
+ def ask_groq(context, question):
80
+ messages = [
81
+ {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."},
82
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"}
83
+ ]
84
+ response = groq_client.chat.completions.create(
85
+ model=GROQ_MODEL,
86
+ messages=messages
87
+ )
88
+ return response.choices[0].message.content.strip()
89
+
90
+ # --- Streamlit UI ---
91
+ def main():
92
+ st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide")
93
+ st.title("🎓 EduBot for @icodeguru0")
94
+ st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.")
95
+
96
+ # --- Auto Update Knowledge Base at App Start ---
97
+ st.info("🔄 Updating Knowledge Base from /docs/...")
98
+ ingest_docs_to_chroma()
99
+ st.success("✅ Knowledge Base Loaded Successfully!")
100
+
101
+ st.markdown("---")
102
+
103
+ user_question = st.text_input("💬 Ask your question:")
104
+
105
+ if user_question:
106
+ vector_context = search_vector_data(user_question)
107
+ if vector_context:
108
+ with st.spinner("🧠 Answering from knowledge base..."):
109
+ answer = ask_groq(vector_context, user_question)
110
+ st.success(answer)
111
+ else:
112
+ st.warning("⚠️ No relevant answer found in the embedded knowledge.")
113
+
114
+ st.markdown("---")
115
+ st.caption("Powered by ChromaDB 🧠 and Groq ⚡")
116
+
117
+ if __name__ == "__main__":
118
+ main()