ZunairaHawwar commited on
Commit
c23d6c0
·
verified ·
1 Parent(s): 6a70874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +469 -210
app.py CHANGED
@@ -1,10 +1,18 @@
1
- import streamlit as st
2
  import os
3
  import json
4
- from typing import List, Optional
 
 
 
 
 
 
5
  import nest_asyncio
 
 
6
 
7
- # LangChain imports
8
  from langchain.vectorstores import Chroma
9
  from langchain.embeddings import HuggingFaceEmbeddings
10
  from langchain.document_loaders import JSONLoader, DirectoryLoader
@@ -17,10 +25,15 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
19
 
 
 
 
 
 
20
  # Apply asyncio patch for Streamlit compatibility
21
  nest_asyncio.apply()
22
 
23
- # --- CONFIGURATION ---
24
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
25
  if not GROQ_API_KEY:
26
  st.error("⚠️ GROQ_API_KEY environment variable is not set!")
@@ -30,10 +43,121 @@ GROQ_MODEL = "llama3-8b-8192"
30
  EMBEDDING_MODEL = "all-MiniLM-L6-v2"
31
  CHROMA_PERSIST_DIR = "./chroma_db"
32
  DOCS_DIR = "./docs"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- class LangChainRAGSystem:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def __init__(self):
36
- """Initialize the LangChain RAG system components."""
37
  self.embeddings = None
38
  self.vectorstore = None
39
  self.llm = None
@@ -47,14 +171,12 @@ class LangChainRAGSystem:
47
 
48
  def setup_components(self):
49
  """Setup all LangChain components."""
50
- # Initialize embeddings
51
  self.embeddings = HuggingFaceEmbeddings(
52
  model_name=EMBEDDING_MODEL,
53
  model_kwargs={'device': 'cpu'},
54
  encode_kwargs={'normalize_embeddings': True}
55
  )
56
 
57
- # Initialize LLM
58
  self.llm = ChatGroq(
59
  groq_api_key=GROQ_API_KEY,
60
  model_name=GROQ_MODEL,
@@ -62,10 +184,7 @@ class LangChainRAGSystem:
62
  max_tokens=1024
63
  )
64
 
65
- # Load or create vectorstore
66
  self.load_vectorstore()
67
-
68
- # Setup retrieval chain
69
  self.setup_retrieval_chain()
70
 
71
  def load_vectorstore(self):
@@ -76,9 +195,7 @@ class LangChainRAGSystem:
76
  embedding_function=self.embeddings,
77
  collection_name="icodeguru_knowledge"
78
  )
79
- st.info("✅ Loaded existing knowledge base.")
80
  except Exception as e:
81
- st.warning(f"Creating new knowledge base: {e}")
82
  self.vectorstore = Chroma(
83
  persist_directory=CHROMA_PERSIST_DIR,
84
  embedding_function=self.embeddings,
@@ -86,72 +203,73 @@ class LangChainRAGSystem:
86
  )
87
 
88
  def setup_retrieval_chain(self):
89
- """Setup the conversational retrieval chain."""
90
- # Custom prompt template
91
- prompt_template = """You are an expert assistant for iCodeGuru, a programming education platform.
92
- Use the following context to answer the user's question comprehensively and accurately.
93
- Always provide relevant video links, website links, or resources when available in the context.
94
- If you don't know the answer based on the context, say so clearly.
95
-
96
- Context: {context}
97
-
98
- Chat History: {chat_history}
99
-
100
- Human: {question}
101
-
102
- Assistant: I'll help you with that based on the iCodeGuru knowledge base.
103
-
104
- """
105
-
106
- PROMPT = PromptTemplate(
107
- template=prompt_template,
108
- input_variables=["context", "chat_history", "question"]
109
- )
110
-
111
- # Always try to create retriever - let it handle empty collections gracefully
112
- try:
113
- # Create retriever
114
- retriever = self.vectorstore.as_retriever(
115
- search_type="similarity",
116
- search_kwargs={"k": 4} # Retrieve top 4 most relevant chunks
117
- )
118
 
119
- # Create conversational retrieval chain
120
- self.retrieval_chain = ConversationalRetrievalChain.from_llm(
121
- llm=self.llm,
122
- retriever=retriever,
123
- memory=self.memory,
124
- combine_docs_chain_kwargs={"prompt": PROMPT},
125
- return_source_documents=True,
126
- verbose=True
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  )
128
- st.success("✅ Retrieval chain setup successfully!")
129
 
130
- except Exception as e:
131
- st.warning(f"⚠️ Retrieval chain setup issue: {str(e)}")
132
- self.retrieval_chain = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  def load_and_process_documents(self) -> List[Document]:
135
  """Load and process JSON documents from the docs directory."""
136
  documents = []
137
 
138
  if not os.path.exists(DOCS_DIR):
139
- st.error(f"❌ Documents directory '{DOCS_DIR}' not found!")
140
  return documents
141
 
142
- # Get all JSON files
143
  json_files = [f for f in os.listdir(DOCS_DIR) if f.endswith('.json')]
144
 
145
  if not json_files:
146
- st.warning(f"⚠️ No JSON files found in '{DOCS_DIR}' directory!")
147
  return documents
148
 
149
- st.info(f"📂 Found {len(json_files)} JSON files to process...")
150
-
151
  for filename in json_files:
152
  file_path = os.path.join(DOCS_DIR, filename)
153
  try:
154
- # Use JSONLoader with proper schema
155
  loader = JSONLoader(
156
  file_path=file_path,
157
  jq_schema='.[]',
@@ -159,16 +277,13 @@ class LangChainRAGSystem:
159
  )
160
  file_docs = loader.load()
161
 
162
- # Add source metadata
163
  for doc in file_docs:
164
  doc.metadata['source_file'] = filename
165
  doc.metadata['file_path'] = file_path
166
 
167
  documents.extend(file_docs)
168
- st.success(f"✅ Loaded {len(file_docs)} documents from {filename}")
169
 
170
  except Exception as e:
171
- st.error(f"❌ Error loading {filename}: {str(e)}")
172
  continue
173
 
174
  return documents
@@ -183,88 +298,56 @@ class LangChainRAGSystem:
183
  )
184
 
185
  chunks = text_splitter.split_documents(documents)
186
- st.info(f"📄 Created {len(chunks)} document chunks")
187
  return chunks
188
 
189
  def clear_knowledge_base(self):
190
  """Clear the existing knowledge base."""
191
  try:
192
  if self.vectorstore:
193
- # Delete the collection
194
  self.vectorstore.delete_collection()
195
- st.success("🗑️ Cleared existing knowledge base")
196
-
197
- # Recreate empty vectorstore
198
  self.vectorstore = Chroma(
199
  persist_directory=CHROMA_PERSIST_DIR,
200
  embedding_function=self.embeddings,
201
  collection_name="icodeguru_knowledge"
202
  )
203
  except Exception as e:
204
- st.error(f"❌ Error clearing knowledge base: {str(e)}")
205
 
206
  def ingest_documents(self):
207
  """Complete document ingestion pipeline."""
208
- with st.spinner("🔄 Loading documents..."):
209
- # Load documents
210
- documents = self.load_and_process_documents()
211
-
212
- if not documents:
213
- st.error("❌ No documents loaded. Please check your docs folder.")
214
- return False
215
 
216
- with st.spinner("✂️ Splitting documents into chunks..."):
217
- # Split documents
218
- chunks = self.split_documents(documents)
219
-
220
- if not chunks:
221
- st.error("❌ No document chunks created.")
222
- return False
223
 
224
- with st.spinner("🧠 Creating embeddings and storing in vector database..."):
225
- try:
226
- # Clear existing data
227
- self.clear_knowledge_base()
228
-
229
- # Add chunks to vectorstore
230
- self.vectorstore.add_documents(chunks)
231
-
232
- # Persist the vectorstore
233
- self.vectorstore.persist()
234
-
235
- st.success(f"✅ Successfully ingested {len(chunks)} document chunks!")
236
-
237
- # Force recreate retrieval chain with new data
238
- self.setup_retrieval_chain()
239
-
240
- # Verify the setup worked
241
- try:
242
- doc_count = self.vectorstore._collection.count()
243
- st.info(f"📊 Knowledge base now contains {doc_count} documents")
244
- except:
245
- st.info("📊 Knowledge base updated successfully")
246
-
247
- return True
248
-
249
- except Exception as e:
250
- st.error(f"❌ Error during ingestion: {str(e)}")
251
- return False
252
 
253
  def get_answer(self, question: str) -> dict:
254
  """Get answer for a user question."""
255
  if not self.retrieval_chain:
256
  return {
257
- "answer": "⚠️ Knowledge base is empty. Please refresh the knowledge base first.",
258
  "source_documents": []
259
  }
260
 
261
  try:
262
- # Check if vectorstore has documents before querying
263
  doc_count = 0
264
  try:
265
  doc_count = self.vectorstore._collection.count()
266
  except:
267
- # If count fails, try a simple similarity search to test
268
  try:
269
  test_results = self.vectorstore.similarity_search("test", k=1)
270
  doc_count = len(test_results) if test_results else 0
@@ -273,136 +356,312 @@ class LangChainRAGSystem:
273
 
274
  if doc_count == 0:
275
  return {
276
- "answer": "⚠️ No documents found in knowledge base. Please refresh the knowledge base first.",
277
  "source_documents": []
278
  }
279
 
280
- # Get response from the chain
281
  response = self.retrieval_chain({"question": question})
282
  return response
283
 
284
  except Exception as e:
285
  return {
286
- "answer": f" Error getting answer: {str(e)}",
287
  "source_documents": []
288
  }
289
 
290
  def reset_conversation(self):
291
- """Reset the conversation memory and UI chat history."""
292
  self.memory.clear()
293
- # Also clear Streamlit session state messages
294
- if "messages" in st.session_state:
295
- st.session_state.messages = []
296
- st.success("🔄 Conversation history cleared!")
297
 
298
  # Initialize the RAG system
299
  @st.cache_resource
300
  def get_rag_system():
301
  """Cache the RAG system to avoid reinitialization."""
302
- return LangChainRAGSystem()
303
 
304
- def main():
305
- """Main Streamlit application."""
306
- st.set_page_config(
307
- page_title="EduBot for iCodeGuru",
308
- page_icon="🎓",
309
- layout="wide",
310
- initial_sidebar_state="expanded"
311
- )
312
-
313
- # Header
314
- st.title("🎓 EduBot for @icodeguru0")
315
- st.markdown("**Powered by LangChain** | Ask anything based on pre-loaded iCodeGuru knowledge.")
316
-
317
- # Initialize RAG system
318
- rag_system = get_rag_system()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
- # Sidebar for admin functions
321
- with st.sidebar:
322
- st.header("⚙️ Admin Panel")
323
 
324
- if st.button("🔄 Refresh Knowledge Base", type="primary"):
325
- success = rag_system.ingest_documents()
326
- if success:
327
- st.balloons()
328
 
329
- if st.button("🗑️ Clear Conversation"):
330
- rag_system.reset_conversation()
331
- st.rerun() # Force UI refresh
 
 
 
 
 
332
 
333
- st.markdown("---")
334
- st.subheader("📊 System Info")
 
 
335
 
336
- # Show vectorstore stats
337
- if rag_system.vectorstore:
338
- try:
339
- doc_count = rag_system.vectorstore._collection.count()
340
- st.metric("Documents in KB", doc_count)
341
- except:
342
- st.metric("Documents in KB", "N/A")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- st.markdown("---")
345
- st.caption("🧠 **ChromaDB** for vector storage")
346
- st.caption("⚡ **Groq LLM** for answers")
347
- st.caption("🔗 **LangChain** for orchestration")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
- # Main chat interface
350
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
- # Initialize session state for chat history
353
- if "messages" not in st.session_state:
354
- st.session_state.messages = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
- # Display chat history
357
- for message in st.session_state.messages:
358
- with st.chat_message(message["role"]):
359
- st.markdown(message["content"])
360
- if "sources" in message and message["sources"]:
361
- with st.expander("📚 Sources"):
362
- for i, source in enumerate(message["sources"], 1):
363
- st.markdown(f"**Source {i}:** {source}")
364
 
365
- # User input
366
- if prompt := st.chat_input("💬 Ask your question about iCodeGuru..."):
367
- # Add user message to chat history
368
- st.session_state.messages.append({"role": "user", "content": prompt})
 
 
 
 
 
 
 
 
 
 
369
 
370
- # Display user message
371
  with st.chat_message("user"):
372
  st.markdown(prompt)
373
 
374
- # Get assistant response
375
  with st.chat_message("assistant"):
376
- with st.spinner("🤔 Thinking..."):
377
- response = rag_system.get_answer(prompt)
378
- answer = response.get("answer", "No answer available.")
379
- source_docs = response.get("source_documents", [])
380
-
381
- st.markdown(answer)
382
-
383
- # Show sources if available
384
- if source_docs:
385
- sources = []
386
- for doc in source_docs[:3]: # Show top 3 sources
387
- source = doc.metadata.get('source_file', 'Unknown source')
388
- content_preview = doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
389
- sources.append(f"{source}: {content_preview}")
390
-
391
- if sources:
392
- with st.expander("📚 Sources"):
393
- for i, source in enumerate(sources, 1):
394
- st.markdown(f"**Source {i}:** {source}")
395
-
396
- # Add to session state with sources
397
- st.session_state.messages.append({
398
- "role": "assistant",
399
- "content": answer,
400
- "sources": sources
401
- })
402
- else:
403
- st.session_state.messages.append({"role": "assistant", "content": answer})
404
- else:
405
- st.session_state.messages.append({"role": "assistant", "content": answer})
 
 
 
406
 
407
  if __name__ == "__main__":
408
  main()
 
1
+ # app.py - Complete Enhanced ICodeGuru Chatbot
2
  import os
3
  import json
4
+ import uuid
5
+ import time
6
+ import base64
7
+ import datetime
8
+ from typing import List, Optional, Dict, Any
9
+ import streamlit as st
10
+ import streamlit.components.v1 as components
11
  import nest_asyncio
12
+ from dataclasses import dataclass, asdict
13
+ from pathlib import Path
14
 
15
+ # LangChain imports (your teammate's backend)
16
  from langchain.vectorstores import Chroma
17
  from langchain.embeddings import HuggingFaceEmbeddings
18
  from langchain.document_loaders import JSONLoader, DirectoryLoader
 
25
  from langchain.memory import ConversationBufferMemory
26
  from langchain.chains import ConversationalRetrievalChain
27
 
28
+ # Enhanced components
29
+ from components import render_response_box, render_enhanced_response_box
30
+ from user_manager import UserManager, UserProfile
31
+ from chat_manager import ChatManager, ChatSession
32
+
33
  # Apply asyncio patch for Streamlit compatibility
34
  nest_asyncio.apply()
35
 
36
+ # ========== Configuration ==========
37
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
38
  if not GROQ_API_KEY:
39
  st.error("⚠️ GROQ_API_KEY environment variable is not set!")
 
43
  EMBEDDING_MODEL = "all-MiniLM-L6-v2"
44
  CHROMA_PERSIST_DIR = "./chroma_db"
45
  DOCS_DIR = "./docs"
46
+ USER_DATA_DIR = "./user_data"
47
+ CHAT_DATA_DIR = "./chat_data"
48
+
49
+ # Ensure directories exist
50
+ for directory in [USER_DATA_DIR, CHAT_DATA_DIR, DOCS_DIR]:
51
+ Path(directory).mkdir(exist_ok=True)
52
+
53
+ # ========== Page Configuration ==========
54
+ st.set_page_config(
55
+ page_title="ICodeGuru AI Assistant",
56
+ page_icon="🤖",
57
+ layout="centered",
58
+ initial_sidebar_state="expanded"
59
+ )
60
 
61
+ # Load CSS with error handling
62
+ try:
63
+ with open("style.css") as f:
64
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
65
+ except FileNotFoundError:
66
+ st.warning("style.css file not found. Using default styling.")
67
+
68
+ # ========== Initialize Managers ==========
69
+ @st.cache_resource
70
+ def get_user_manager():
71
+ return UserManager(USER_DATA_DIR)
72
+
73
+ @st.cache_resource
74
+ def get_chat_manager():
75
+ return ChatManager(CHAT_DATA_DIR)
76
+
77
+ user_manager = get_user_manager()
78
+ chat_manager = get_chat_manager()
79
+
80
+ # ========== Logo Function ==========
81
+ def get_base64_image(image_path):
82
+ try:
83
+ with open(image_path, "rb") as img_file:
84
+ return f"data:image/jpeg;base64,{base64.b64encode(img_file.read()).decode()}"
85
+ except FileNotFoundError:
86
+ return ""
87
+
88
+ # ========== User Authentication ==========
89
+ def render_user_auth():
90
+ """Render user authentication interface"""
91
+ if 'user_id' not in st.session_state:
92
+ st.session_state.user_id = None
93
+
94
+ if not st.session_state.user_id:
95
+ st.sidebar.markdown("### 👤 User Profile")
96
+
97
+ auth_option = st.sidebar.radio("Choose option:", ["Login", "Create New Profile"])
98
+
99
+ if auth_option == "Create New Profile":
100
+ with st.sidebar.form("create_profile"):
101
+ username = st.text_input("Username", placeholder="Enter username")
102
+ display_name = st.text_input("Display Name", placeholder="Your display name")
103
+ expertise_level = st.selectbox("Programming Experience",
104
+ ["Beginner", "Intermediate", "Advanced", "Expert"])
105
+ preferred_languages = st.multiselect("Preferred Languages",
106
+ ["Python", "JavaScript", "Java", "C++", "C#", "Go", "Rust", "PHP", "Ruby"])
107
+ learning_goals = st.text_area("Learning Goals",
108
+ placeholder="What do you want to learn?")
109
+
110
+ if st.form_submit_button("Create Profile"):
111
+ if username and display_name:
112
+ try:
113
+ profile = UserProfile(
114
+ user_id=str(uuid.uuid4()),
115
+ username=username,
116
+ display_name=display_name,
117
+ expertise_level=expertise_level,
118
+ preferred_languages=preferred_languages,
119
+ learning_goals=learning_goals
120
+ )
121
+ user_manager.create_user(profile)
122
+ st.session_state.user_id = profile.user_id
123
+ st.session_state.current_user = profile
124
+ st.rerun()
125
+ except Exception as e:
126
+ st.error(f"Error creating profile: {str(e)}")
127
+ else:
128
+ st.error("Username and Display Name are required!")
129
+
130
+ else: # Login
131
+ existing_users = user_manager.get_all_usernames()
132
+ if existing_users:
133
+ selected_username = st.sidebar.selectbox("Select Username", existing_users)
134
+
135
+ if st.sidebar.button("Login"):
136
+ profile = user_manager.get_user_by_username(selected_username)
137
+ if profile:
138
+ st.session_state.user_id = profile.user_id
139
+ st.session_state.current_user = profile
140
+ st.rerun()
141
+ else:
142
+ st.sidebar.info("No existing profiles. Create a new one!")
143
+
144
+ else:
145
+ # User is logged in
146
+ user = st.session_state.get('current_user')
147
+ if user:
148
+ st.sidebar.markdown(f"### 👋 Welcome, {user.display_name}!")
149
+ st.sidebar.markdown(f"**Level:** {user.expertise_level}")
150
+
151
+ if st.sidebar.button("Logout"):
152
+ st.session_state.user_id = None
153
+ st.session_state.current_user = None
154
+ if 'current_session_id' in st.session_state:
155
+ del st.session_state.current_session_id
156
+ st.rerun()
157
+
158
+ # ========== Enhanced LangChain RAG System ==========
159
+ class EnhancedLangChainRAGSystem:
160
  def __init__(self):
 
161
  self.embeddings = None
162
  self.vectorstore = None
163
  self.llm = None
 
171
 
172
  def setup_components(self):
173
  """Setup all LangChain components."""
 
174
  self.embeddings = HuggingFaceEmbeddings(
175
  model_name=EMBEDDING_MODEL,
176
  model_kwargs={'device': 'cpu'},
177
  encode_kwargs={'normalize_embeddings': True}
178
  )
179
 
 
180
  self.llm = ChatGroq(
181
  groq_api_key=GROQ_API_KEY,
182
  model_name=GROQ_MODEL,
 
184
  max_tokens=1024
185
  )
186
 
 
187
  self.load_vectorstore()
 
 
188
  self.setup_retrieval_chain()
189
 
190
  def load_vectorstore(self):
 
195
  embedding_function=self.embeddings,
196
  collection_name="icodeguru_knowledge"
197
  )
 
198
  except Exception as e:
 
199
  self.vectorstore = Chroma(
200
  persist_directory=CHROMA_PERSIST_DIR,
201
  embedding_function=self.embeddings,
 
203
  )
204
 
205
  def setup_retrieval_chain(self):
206
+ """Setup the conversational retrieval chain with personalization."""
207
+ def get_personalized_prompt():
208
+ user = st.session_state.get('current_user')
209
+ if user:
210
+ user_context = f"""
211
+ User Profile Context:
212
+ - Name: {user.display_name}
213
+ - Experience Level: {user.expertise_level}
214
+ - Preferred Languages: {', '.join(user.preferred_languages) if user.preferred_languages else 'None specified'}
215
+ - Learning Goals: {user.learning_goals or 'None specified'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ Please tailor your response to match the user's experience level and preferences.
218
+ """
219
+ else:
220
+ user_context = "User profile not available. Provide general guidance."
221
+
222
+ return f"""You are an expert assistant for iCodeGuru, a programming education platform.
223
+ {user_context}
224
+
225
+ Use the following context to answer the user's question comprehensively and accurately.
226
+ Always provide relevant video links, website links, or resources when available in the context.
227
+ If you don't know the answer based on the context, say so clearly.
228
+
229
+ Context: {{context}}
230
+ Chat History: {{chat_history}}
231
+ Human: {{question}}"""
232
+
233
+ # Part 2 of Enhanced App - RAG System Implementation and Features
234
+
235
+ PROMPT = PromptTemplate(
236
+ template=get_personalized_prompt(),
237
+ input_variables=["context", "chat_history", "question"]
238
  )
 
239
 
240
+ try:
241
+ retriever = self.vectorstore.as_retriever(
242
+ search_type="similarity",
243
+ search_kwargs={"k": 4}
244
+ )
245
+
246
+ self.retrieval_chain = ConversationalRetrievalChain.from_llm(
247
+ llm=self.llm,
248
+ retriever=retriever,
249
+ memory=self.memory,
250
+ combine_docs_chain_kwargs={"prompt": PROMPT},
251
+ return_source_documents=True,
252
+ verbose=False
253
+ )
254
+
255
+ except Exception as e:
256
+ self.retrieval_chain = None
257
 
258
  def load_and_process_documents(self) -> List[Document]:
259
  """Load and process JSON documents from the docs directory."""
260
  documents = []
261
 
262
  if not os.path.exists(DOCS_DIR):
 
263
  return documents
264
 
 
265
  json_files = [f for f in os.listdir(DOCS_DIR) if f.endswith('.json')]
266
 
267
  if not json_files:
 
268
  return documents
269
 
 
 
270
  for filename in json_files:
271
  file_path = os.path.join(DOCS_DIR, filename)
272
  try:
 
273
  loader = JSONLoader(
274
  file_path=file_path,
275
  jq_schema='.[]',
 
277
  )
278
  file_docs = loader.load()
279
 
 
280
  for doc in file_docs:
281
  doc.metadata['source_file'] = filename
282
  doc.metadata['file_path'] = file_path
283
 
284
  documents.extend(file_docs)
 
285
 
286
  except Exception as e:
 
287
  continue
288
 
289
  return documents
 
298
  )
299
 
300
  chunks = text_splitter.split_documents(documents)
 
301
  return chunks
302
 
303
  def clear_knowledge_base(self):
304
  """Clear the existing knowledge base."""
305
  try:
306
  if self.vectorstore:
 
307
  self.vectorstore.delete_collection()
 
 
 
308
  self.vectorstore = Chroma(
309
  persist_directory=CHROMA_PERSIST_DIR,
310
  embedding_function=self.embeddings,
311
  collection_name="icodeguru_knowledge"
312
  )
313
  except Exception as e:
314
+ pass
315
 
316
  def ingest_documents(self):
317
  """Complete document ingestion pipeline."""
318
+ documents = self.load_and_process_documents()
 
 
 
 
 
 
319
 
320
+ if not documents:
321
+ return False
 
 
 
 
 
322
 
323
+ chunks = self.split_documents(documents)
324
+
325
+ if not chunks:
326
+ return False
327
+
328
+ try:
329
+ self.clear_knowledge_base()
330
+ self.vectorstore.add_documents(chunks)
331
+ self.vectorstore.persist()
332
+ self.setup_retrieval_chain()
333
+ return True
334
+
335
+ except Exception as e:
336
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
  def get_answer(self, question: str) -> dict:
339
  """Get answer for a user question."""
340
  if not self.retrieval_chain:
341
  return {
342
+ "answer": "⚠️ Knowledge base is initializing. Please try again in a moment.",
343
  "source_documents": []
344
  }
345
 
346
  try:
 
347
  doc_count = 0
348
  try:
349
  doc_count = self.vectorstore._collection.count()
350
  except:
 
351
  try:
352
  test_results = self.vectorstore.similarity_search("test", k=1)
353
  doc_count = len(test_results) if test_results else 0
 
356
 
357
  if doc_count == 0:
358
  return {
359
+ "answer": "I'm ready to help! However, I don't have any specific documents loaded in my knowledge base right now. I can still answer general programming questions based on my training. Feel free to ask anything!",
360
  "source_documents": []
361
  }
362
 
 
363
  response = self.retrieval_chain({"question": question})
364
  return response
365
 
366
  except Exception as e:
367
  return {
368
+ "answer": f"I apologize, but I encountered an issue processing your question. Could you please try rephrasing it?",
369
  "source_documents": []
370
  }
371
 
372
  def reset_conversation(self):
373
+ """Reset the conversation memory."""
374
  self.memory.clear()
 
 
 
 
375
 
376
  # Initialize the RAG system
377
  @st.cache_resource
378
  def get_rag_system():
379
  """Cache the RAG system to avoid reinitialization."""
380
+ return EnhancedLangChainRAGSystem()
381
 
382
+ # ========== Session Management ==========
383
+ def initialize_chat_session():
384
+ """Initialize or load chat session"""
385
+ if 'current_session_id' not in st.session_state:
386
+ user_id = st.session_state.get('user_id')
387
+ if user_id:
388
+ session_id = chat_manager.create_session(user_id)
389
+ st.session_state.current_session_id = session_id
390
+ st.session_state.messages = []
391
+ else:
392
+ st.session_state.messages = []
393
+ else:
394
+ # Load existing session messages
395
+ session = chat_manager.get_session(st.session_state.current_session_id)
396
+ if session:
397
+ st.session_state.messages = []
398
+ for msg in session.messages:
399
+ st.session_state.messages.append({
400
+ "role": msg.role,
401
+ "content": msg.content,
402
+ "message_id": msg.message_id,
403
+ "rating": msg.rating,
404
+ "is_bookmarked": msg.is_bookmarked,
405
+ "source_documents": msg.source_documents
406
+ })
407
+
408
+ # ========== Chat History Management ==========
409
+ def render_chat_history_sidebar():
410
+ """Render chat history in sidebar"""
411
+ if st.session_state.get('user_id'):
412
+ user_sessions = chat_manager.get_user_sessions(st.session_state.user_id)
413
+
414
+ if user_sessions:
415
+ st.sidebar.markdown("### 💬 Chat History")
416
+
417
+ for session in user_sessions[:10]: # Show last 10 sessions
418
+ session_title = session.title[:30] + "..." if len(session.title) > 30 else session.title
419
+
420
+ col1, col2 = st.sidebar.columns([3, 1])
421
+
422
+ with col1:
423
+ if st.button(session_title, key=f"session_{session.session_id}"):
424
+ st.session_state.current_session_id = session.session_id
425
+ initialize_chat_session()
426
+ st.rerun()
427
+
428
+ with col2:
429
+ if st.button("🗑️", key=f"delete_{session.session_id}", help="Delete session"):
430
+ chat_manager.delete_session(session.session_id)
431
+ if st.session_state.get('current_session_id') == session.session_id:
432
+ del st.session_state.current_session_id
433
+ st.rerun()
434
+
435
+ # ========== Enhanced Sidebar Features ==========
436
+ def render_enhanced_sidebar():
437
+ """Render enhanced sidebar with all features"""
438
+ # User Authentication
439
+ render_user_auth()
440
 
441
+ if st.session_state.get('user_id'):
442
+ # Chat History
443
+ render_chat_history_sidebar()
444
 
445
+ st.sidebar.markdown("---")
 
 
 
446
 
447
+ # New Chat Button
448
+ if st.sidebar.button("🆕 New Chat", type="primary"):
449
+ user_id = st.session_state.user_id
450
+ session_id = chat_manager.create_session(user_id)
451
+ st.session_state.current_session_id = session_id
452
+ st.session_state.messages = []
453
+ get_rag_system().reset_conversation()
454
+ st.rerun()
455
 
456
+ # Model Selection
457
+ st.sidebar.markdown("### 🧠 AI Settings")
458
+ model_options = ["llama3-8b-8192", "llama3-70b-8192"]
459
+ selected_model = st.sidebar.selectbox("Choose LLM Model", model_options, index=0)
460
 
461
+ if selected_model != GROQ_MODEL:
462
+ GROQ_MODEL = selected_model
463
+ get_rag_system().llm.model_name = selected_model
464
+
465
+ # Knowledge Base Management
466
+ st.sidebar.markdown("### 📚 Knowledge Base")
467
+ if st.sidebar.button("🔄 Refresh Knowledge Base"):
468
+ with st.spinner("Refreshing knowledge base..."):
469
+ success = get_rag_system().ingest_documents()
470
+ if success:
471
+ st.sidebar.success("✅ Knowledge base refreshed!")
472
+ else:
473
+ st.sidebar.warning("⚠️ No documents found to load")
474
+
475
+ # Export Chat History
476
+ st.sidebar.markdown("### 📤 Export")
477
+ if st.sidebar.button("📄 Export Chat History"):
478
+ if st.session_state.get('current_session_id'):
479
+ export_data = chat_manager.export_chat_history(
480
+ st.session_state.user_id,
481
+ st.session_state.current_session_id
482
+ )
483
+ if export_data:
484
+ st.sidebar.download_button(
485
+ label="⬇️ Download JSON",
486
+ data=json.dumps(export_data, indent=2),
487
+ file_name=f"chat_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
488
+ mime="application/json"
489
+ )
490
+
491
+ # User Statistics
492
+ st.sidebar.markdown("### 📊 Your Stats")
493
+ user_stats = user_manager.get_user_stats(st.session_state.user_id)
494
+ chat_stats = chat_manager.get_chat_statistics(st.session_state.user_id)
495
+
496
+ col1, col2 = st.sidebar.columns(2)
497
+ with col1:
498
+ st.metric("Total Chats", chat_stats.get('total_sessions', 0))
499
+ with col2:
500
+ st.metric("Messages", chat_stats.get('total_messages', 0))
501
+
502
+ st.sidebar.metric("Bookmarks", chat_stats.get('bookmarked_messages', 0))
503
+
504
+ # Bookmarked Messages
505
+ bookmarked = chat_manager.get_bookmarked_messages(st.session_state.user_id)
506
+ if bookmarked:
507
+ st.sidebar.markdown("### 🔖 Bookmarked Responses")
508
+ for bookmark in bookmarked[:5]: # Show 5 most recent
509
+ message_preview = bookmark['message']['content'][:50] + "..."
510
+ if st.sidebar.button(message_preview, key=f"bookmark_{bookmark['message']['message_id']}"):
511
+ # Show full bookmarked message
512
+ st.sidebar.write(bookmark['message']['content'])
513
+
514
+ # ========== Message Rating Handler ==========
515
+ def handle_component_value():
516
+ """Handle component interactions (ratings, bookmarks)"""
517
+ if 'component_value' in st.session_state and st.session_state.component_value:
518
+ data = st.session_state.component_value
519
+
520
+ if data.get('action') == 'rate_message':
521
+ chat_manager.rate_message(
522
+ data['session_id'],
523
+ data['message_id'],
524
+ data['rating']
525
+ )
526
+
527
+ elif data.get('action') == 'bookmark_message'):
528
+ chat_manager.bookmark_message(
529
+ data['session_id'],
530
+ data['message_id'],
531
+ data['is_bookmarked']
532
+ )
533
 
534
+ # Clear the component value
535
+ st.session_state.component_value = None
536
+
537
+ # ========== Main App Logic ==========
538
+ def main():
539
+ """Main application logic"""
540
+
541
+ # Handle component interactions
542
+ handle_component_value()
543
+
544
+ # Display logo and header
545
+ image_data_url = get_base64_image("10001.jpeg")
546
+ st.markdown(f"""
547
+ <div class="custom-header">
548
+ <h1><img src="{image_data_url}" class="chatbot-logo" alt="Bot" /> ICodeGuru AI Assistant</h1>
549
+ </div>
550
+ """, unsafe_allow_html=True)
551
+
552
+ # Render enhanced sidebar
553
+ render_enhanced_sidebar()
554
+
555
+ # Initialize RAG system
556
+ rag_system = get_rag_system()
557
 
558
+ # Check if user is logged in
559
+ if not st.session_state.get('user_id'):
560
+ st.info("👈 Please login or create a profile to start chatting!")
561
+ return
562
+
563
+ # Initialize chat session
564
+ initialize_chat_session()
565
+
566
+ # Generate response function
567
+ def generate_response(user_query):
568
+ """Generate AI response using LangChain system"""
569
+ if not user_query or not user_query.strip():
570
+ return "Please provide a valid question."
571
+
572
+ try:
573
+ response = rag_system.get_answer(user_query)
574
+ answer = response.get("answer", "I apologize, but I couldn't generate a response. Please try again.")
575
+
576
+ source_docs = response.get("source_documents", [])
577
+ if source_docs:
578
+ sources_text = "\n\n📚 **Sources:**\n"
579
+ for i, doc in enumerate(source_docs[:2], 1):
580
+ source_file = doc.metadata.get('source_file', 'Unknown')
581
+ content_preview = doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
582
+ sources_text += f"{i}. {source_file}: {content_preview}\n"
583
+
584
+ answer += sources_text
585
+
586
+ return answer, [doc.metadata.get('source_file', '') for doc in source_docs]
587
+
588
+ except Exception as e:
589
+ return "I apologize, but I encountered an issue processing your question. Could you please try again.", []
590
 
591
+ # Display chat messages
592
+ for i, msg in enumerate(st.session_state.messages):
593
+ with st.chat_message(msg["role"]):
594
+ if msg["role"] == "assistant":
595
+ message_id = msg.get("message_id", f"msg-{i}")
596
+ session_id = st.session_state.get("current_session_id", "")
597
+
598
+ render_enhanced_response_box(
599
+ msg["content"],
600
+ message_id,
601
+ session_id,
602
+ is_bookmarked=msg.get("is_bookmarked", False),
603
+ rating=msg.get("rating"),
604
+ show_actions=True
605
+ )
606
+ else:
607
+ st.markdown(msg["content"])
608
 
609
+ # Chat input
610
+ prompt = st.chat_input("Type your message...")
 
 
 
 
 
 
611
 
612
+ if prompt:
613
+ # Add user message to session
614
+ user_message_id = chat_manager.add_message(
615
+ st.session_state.current_session_id,
616
+ "user",
617
+ prompt
618
+ )
619
+
620
+ # Add to session state
621
+ st.session_state.messages.append({
622
+ "role": "user",
623
+ "content": prompt,
624
+ "message_id": user_message_id
625
+ })
626
 
 
627
  with st.chat_message("user"):
628
  st.markdown(prompt)
629
 
630
+ # Generate and display assistant response
631
  with st.chat_message("assistant"):
632
+ with st.spinner("Thinking..."):
633
+ full_response, source_docs = generate_response(prompt)
634
+
635
+ # Add assistant message to session
636
+ assistant_message_id = chat_manager.add_message(
637
+ st.session_state.current_session_id,
638
+ "assistant",
639
+ full_response,
640
+ source_docs
641
+ )
642
+
643
+ # Display response with enhanced box
644
+ render_enhanced_response_box(
645
+ full_response,
646
+ assistant_message_id,
647
+ st.session_state.current_session_id,
648
+ is_bookmarked=False,
649
+ rating=None,
650
+ show_actions=True
651
+ )
652
+
653
+ # Add to session state
654
+ st.session_state.messages.append({
655
+ "role": "assistant",
656
+ "content": full_response,
657
+ "message_id": assistant_message_id,
658
+ "rating": None,
659
+ "is_bookmarked": False,
660
+ "source_documents": source_docs
661
+ })
662
+
663
+ # Update user chat count
664
+ user_manager.increment_chat_count(st.session_state.user_id)
665
 
666
  if __name__ == "__main__":
667
  main()