Girish Jeswani commited on
Commit
625853d
·
1 Parent(s): 9fabeb7

add document auth

Browse files
multi_llm_chatbot_backend/app/api/routes/chat.py CHANGED
@@ -49,11 +49,13 @@ async def switch_to_chat(
49
  current_user: User = Depends(get_current_active_user)
50
  ):
51
  """
52
- Switch to an existing chat session and load its context
53
- FIXED VERSION - Returns messages in correct frontend format
54
  """
55
  try:
56
- # Load the chat session into memory context
 
 
57
  memory_session_id = await get_or_create_session_for_request_async(
58
  req,
59
  chat_session_id=request.chat_session_id,
@@ -63,9 +65,15 @@ async def switch_to_chat(
63
  if not memory_session_id:
64
  raise HTTPException(status_code=404, detail="Chat session not found")
65
 
 
 
66
  # Get the loaded session
67
  session = session_manager.get_session(memory_session_id)
68
 
 
 
 
 
69
  # Get the original MongoDB chat session to retrieve messages in proper format
70
  db = get_database()
71
  chat_session = await db.chat_sessions.find_one({
@@ -80,7 +88,7 @@ async def switch_to_chat(
80
  # Return the messages in the original frontend format from MongoDB
81
  original_messages = chat_session.get("messages", [])
82
 
83
- logger.info(f"Switching to chat {request.chat_session_id} with {len(original_messages)} messages")
84
 
85
  return {
86
  "status": "success",
@@ -89,14 +97,28 @@ async def switch_to_chat(
89
  "message_count": len(original_messages),
90
  "context": {
91
  "messages": original_messages, # Return original format messages
92
- "rag_info": session.get_rag_stats()
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
  }
95
 
96
  except HTTPException:
97
  raise
98
  except Exception as e:
99
- logger.error(f"Error switching to chat: {e}")
 
 
100
  raise HTTPException(status_code=500, detail="Failed to switch to chat")
101
 
102
  @router.post("/new-chat")
@@ -133,35 +155,40 @@ async def create_new_chat(
133
  @router.post("/chat-sequential")
134
  async def chat_sequential_enhanced(message: ChatMessage, request: Request):
135
  """
136
- Enhanced sequential chat with proper session management
137
  """
138
  try:
139
- # Determine session ID based on whether this is an existing chat or new
140
  if message.chat_session_id:
141
- # This is an existing chat - we should have already loaded it via switch-chat
142
- # Use the memory session ID format
143
  session_id = f"chat_{message.chat_session_id}"
 
144
 
145
- # Verify the session exists in memory
146
  if session_id not in session_manager.sessions:
147
- # If not in memory, this means switch-chat wasn't called
148
- # We should load it now (but log a warning)
149
  logger.warning(f"Chat session {message.chat_session_id} not in memory, loading now")
150
- session_id = await get_or_create_session_for_request_async(
 
151
  request,
152
  chat_session_id=message.chat_session_id,
153
- user_id="anonymous" # We don't have user context here
154
  )
 
155
  else:
156
  # This is a new chat or no specific chat session
157
  session_id = await get_or_create_session_for_request_async(
158
  request,
159
  session_id_override=message.session_id
160
  )
 
161
 
162
  # Get the session
163
  session = session_manager.get_session(session_id)
164
 
 
 
 
 
165
  # Debug logging
166
  user_messages_count = len([msg for msg in session.messages if msg.get('role') == 'user'])
167
  logger.info(f"Session {session_id} has {user_messages_count} user messages before processing")
@@ -185,7 +212,8 @@ async def chat_sequential_enhanced(message: ChatMessage, request: Request):
185
  "type": "clarification_needed",
186
  "message": clarification_question,
187
  "suggestions": chat_orchestrator._get_clarification_suggestions(),
188
- "session_id": session_id
 
189
  }
190
 
191
  # No clarification needed - proceed with intelligent persona ordering
@@ -201,11 +229,14 @@ async def chat_sequential_enhanced(message: ChatMessage, request: Request):
201
 
202
  for persona_id in top_personas:
203
  try:
 
 
 
204
  # Generate response from this persona
205
  persona_result = await chat_orchestrator.chat_with_persona(
206
  user_input=message.user_input,
207
  persona_id=persona_id,
208
- session_id=session_id,
209
  response_length=message.response_length or "medium"
210
  )
211
 
@@ -213,13 +244,17 @@ async def chat_sequential_enhanced(message: ChatMessage, request: Request):
213
  responses.append({
214
  "persona": persona_result["persona_name"],
215
  "persona_id": persona_result["persona_id"],
216
- "response": persona_result["response"]
 
 
217
  })
218
  else:
219
  responses.append({
220
  "persona": chat_orchestrator.personas[persona_id].name,
221
  "persona_id": persona_id,
222
- "response": "I'm having trouble processing your question right now. Please try again."
 
 
223
  })
224
 
225
  except Exception as e:
@@ -227,13 +262,22 @@ async def chat_sequential_enhanced(message: ChatMessage, request: Request):
227
  responses.append({
228
  "persona": chat_orchestrator.personas[persona_id].name,
229
  "persona_id": persona_id,
230
- "response": "I encountered an error while processing your question. Please try again."
 
 
231
  })
232
 
 
233
  return {
234
  "type": "sequential_responses",
235
  "responses": responses,
236
- "session_id": session_id # Include session ID in response
 
 
 
 
 
 
237
  }
238
 
239
  except Exception as e:
@@ -242,12 +286,14 @@ async def chat_sequential_enhanced(message: ChatMessage, request: Request):
242
  "type": "error",
243
  "responses": [{
244
  "persona": "System",
245
- "response": "I'm having trouble processing your request. Could you please try again?"
 
 
246
  }],
247
- "session_id": session_id if 'session_id' in locals() else None
 
248
  }
249
 
250
- # Keep existing endpoints but update them to use async session management
251
 
252
  @router.post("/chat/{persona_id}")
253
  async def chat_with_specific_advisor(persona_id: str, input: UserInput, request: Request):
 
49
  current_user: User = Depends(get_current_active_user)
50
  ):
51
  """
52
+ Switch to an existing chat session and load its context - FIXED VERSION
53
+ Ensures documents are accessible after switching
54
  """
55
  try:
56
+ logger.info(f"Switching to chat session: {request.chat_session_id}")
57
+
58
+ # Load the chat session into memory context with consistent session ID
59
  memory_session_id = await get_or_create_session_for_request_async(
60
  req,
61
  chat_session_id=request.chat_session_id,
 
65
  if not memory_session_id:
66
  raise HTTPException(status_code=404, detail="Chat session not found")
67
 
68
+ logger.info(f"Loaded chat into memory session: {memory_session_id}")
69
+
70
  # Get the loaded session
71
  session = session_manager.get_session(memory_session_id)
72
 
73
+ # Verify document access after loading
74
+ rag_stats = session.get_rag_stats()
75
+ logger.info(f"After switch - Session {memory_session_id} has {rag_stats.get('total_documents', 0)} documents")
76
+
77
  # Get the original MongoDB chat session to retrieve messages in proper format
78
  db = get_database()
79
  chat_session = await db.chat_sessions.find_one({
 
88
  # Return the messages in the original frontend format from MongoDB
89
  original_messages = chat_session.get("messages", [])
90
 
91
+ logger.info(f"Switch successful - {len(original_messages)} messages, {rag_stats.get('total_documents', 0)} documents")
92
 
93
  return {
94
  "status": "success",
 
97
  "message_count": len(original_messages),
98
  "context": {
99
  "messages": original_messages, # Return original format messages
100
+ "rag_info": rag_stats
101
+ },
102
+ # Include document access verification
103
+ "document_access": {
104
+ "total_documents": rag_stats.get('total_documents', 0),
105
+ "total_chunks": rag_stats.get('total_chunks', 0),
106
+ "documents": rag_stats.get('documents', []),
107
+ "uploaded_files": session.uploaded_files
108
+ },
109
+ "debug_info": {
110
+ "memory_session_format": memory_session_id,
111
+ "documents_accessible": rag_stats.get('total_documents', 0) > 0,
112
+ "session_loaded": memory_session_id in session_manager.sessions
113
  }
114
  }
115
 
116
  except HTTPException:
117
  raise
118
  except Exception as e:
119
+ logger.error(f"Error switching to chat {request.chat_session_id}: {e}")
120
+ import traceback
121
+ logger.error(f"Full traceback: {traceback.format_exc()}")
122
  raise HTTPException(status_code=500, detail="Failed to switch to chat")
123
 
124
  @router.post("/new-chat")
 
155
  @router.post("/chat-sequential")
156
  async def chat_sequential_enhanced(message: ChatMessage, request: Request):
157
  """
158
+ Enhanced sequential chat with proper session management and document access
159
  """
160
  try:
161
+ # Ensure consistent session ID for document retrieval
162
  if message.chat_session_id:
163
+ # Use the memory session format that matches document storage
 
164
  session_id = f"chat_{message.chat_session_id}"
165
+ logger.info(f"Using chat session: {session_id}")
166
 
167
+ # Ensure session exists in memory (load if needed)
168
  if session_id not in session_manager.sessions:
 
 
169
  logger.warning(f"Chat session {message.chat_session_id} not in memory, loading now")
170
+ # Load the session using the async utility function
171
+ memory_session_id = await get_or_create_session_for_request_async(
172
  request,
173
  chat_session_id=message.chat_session_id,
174
+ user_id="system" # This might need proper user ID from auth
175
  )
176
+ session_id = memory_session_id
177
  else:
178
  # This is a new chat or no specific chat session
179
  session_id = await get_or_create_session_for_request_async(
180
  request,
181
  session_id_override=message.session_id
182
  )
183
+ logger.info(f"Using new/existing session: {session_id}")
184
 
185
  # Get the session
186
  session = session_manager.get_session(session_id)
187
 
188
+ # Debug document access
189
+ rag_stats = session.get_rag_stats()
190
+ logger.info(f"Session {session_id} has {rag_stats.get('total_documents', 0)} documents available")
191
+
192
  # Debug logging
193
  user_messages_count = len([msg for msg in session.messages if msg.get('role') == 'user'])
194
  logger.info(f"Session {session_id} has {user_messages_count} user messages before processing")
 
212
  "type": "clarification_needed",
213
  "message": clarification_question,
214
  "suggestions": chat_orchestrator._get_clarification_suggestions(),
215
+ "session_id": session_id,
216
+ "chat_session_id": message.chat_session_id # Include for frontend
217
  }
218
 
219
  # No clarification needed - proceed with intelligent persona ordering
 
229
 
230
  for persona_id in top_personas:
231
  try:
232
+ # Pass the consistent session_id for document retrieval
233
+ logger.info(f"Generating response for {persona_id} with session {session_id}")
234
+
235
  # Generate response from this persona
236
  persona_result = await chat_orchestrator.chat_with_persona(
237
  user_input=message.user_input,
238
  persona_id=persona_id,
239
+ session_id=session_id, # This ensures document access
240
  response_length=message.response_length or "medium"
241
  )
242
 
 
244
  responses.append({
245
  "persona": persona_result["persona_name"],
246
  "persona_id": persona_result["persona_id"],
247
+ "response": persona_result["response"],
248
+ "used_documents": persona_result.get("used_documents", False),
249
+ "document_chunks_used": persona_result.get("document_chunks_used", 0)
250
  })
251
  else:
252
  responses.append({
253
  "persona": chat_orchestrator.personas[persona_id].name,
254
  "persona_id": persona_id,
255
+ "response": "I'm having trouble processing your question right now. Please try again.",
256
+ "used_documents": False,
257
+ "document_chunks_used": 0
258
  })
259
 
260
  except Exception as e:
 
262
  responses.append({
263
  "persona": chat_orchestrator.personas[persona_id].name,
264
  "persona_id": persona_id,
265
+ "response": "I encountered an error while processing your question. Please try again.",
266
+ "used_documents": False,
267
+ "document_chunks_used": 0
268
  })
269
 
270
+ # Include document access info in response
271
  return {
272
  "type": "sequential_responses",
273
  "responses": responses,
274
+ "session_id": session_id,
275
+ "chat_session_id": message.chat_session_id,
276
+ "document_info": {
277
+ "total_documents": rag_stats.get('total_documents', 0),
278
+ "total_chunks": rag_stats.get('total_chunks', 0),
279
+ "documents": rag_stats.get('documents', [])
280
+ }
281
  }
282
 
283
  except Exception as e:
 
286
  "type": "error",
287
  "responses": [{
288
  "persona": "System",
289
+ "response": "I'm having trouble processing your request. Could you please try again?",
290
+ "used_documents": False,
291
+ "document_chunks_used": 0
292
  }],
293
+ "session_id": session_id if 'session_id' in locals() else None,
294
+ "chat_session_id": message.chat_session_id if hasattr(message, 'chat_session_id') else None
295
  }
296
 
 
297
 
298
  @router.post("/chat/{persona_id}")
299
  async def chat_with_specific_advisor(persona_id: str, input: UserInput, request: Request):
multi_llm_chatbot_backend/app/api/routes/documents.py CHANGED
@@ -155,9 +155,21 @@ def convert_messages_for_export(messages):
155
 
156
 
157
  @router.post("/upload-document")
158
- async def upload_document(file: UploadFile = File(...), request: Request = None):
 
 
 
 
159
  try:
160
- session_id = get_or_create_session_for_request(request)
 
 
 
 
 
 
 
 
161
  session = session_manager.get_session(session_id)
162
 
163
  MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
@@ -177,10 +189,12 @@ async def upload_document(file: UploadFile = File(...), request: Request = None)
177
  }
178
  file_type = file_type_map.get(file.content_type, "unknown")
179
 
 
 
180
  rag_result = rag_manager.add_document(
181
  content=content,
182
  filename=file.filename,
183
- session_id=session_id,
184
  file_type=file_type
185
  )
186
 
@@ -198,6 +212,7 @@ async def upload_document(file: UploadFile = File(...), request: Request = None)
198
  f"Document uploaded: '{doc_title}' ({file.filename}) - {rag_result['chunks_created']} sections processed, ~{rag_result['total_tokens']} tokens analyzed. You can now ask questions about this document by referencing it by name."
199
  )
200
 
 
201
  return {
202
  "message": f"Document '{file.filename}' uploaded and processed successfully.",
203
  "filename": file.filename,
@@ -205,7 +220,9 @@ async def upload_document(file: UploadFile = File(...), request: Request = None)
205
  "chunks_created": rag_result['chunks_created'],
206
  "total_tokens": rag_result['total_tokens'],
207
  "file_type": file_type,
208
- "can_reference_by_name": True
 
 
209
  }
210
 
211
  except HTTPException:
 
155
 
156
 
157
  @router.post("/upload-document")
158
+ async def upload_document(
159
+ file: UploadFile = File(...),
160
+ request: Request = None,
161
+ chat_session_id: str = Query(None, description="Chat session ID if uploading to specific chat")
162
+ ):
163
  try:
164
+ if chat_session_id:
165
+ # If uploading to a specific chat, use chat_{id} format
166
+ session_id = f"chat_{chat_session_id}"
167
+ logger.info(f"Uploading document to specific chat session: {session_id}")
168
+ else:
169
+ # For new/temporary chats, use regular session management
170
+ session_id = get_or_create_session_for_request(request)
171
+ logger.info(f"Uploading document to new session: {session_id}")
172
+
173
  session = session_manager.get_session(session_id)
174
 
175
  MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
 
189
  }
190
  file_type = file_type_map.get(file.content_type, "unknown")
191
 
192
+ # Pass the consistent session_id to RAG manager
193
+ logger.info(f"Adding document {file.filename} to session {session_id}")
194
  rag_result = rag_manager.add_document(
195
  content=content,
196
  filename=file.filename,
197
+ session_id=session_id, # This now uses the consistent format
198
  file_type=file_type
199
  )
200
 
 
212
  f"Document uploaded: '{doc_title}' ({file.filename}) - {rag_result['chunks_created']} sections processed, ~{rag_result['total_tokens']} tokens analyzed. You can now ask questions about this document by referencing it by name."
213
  )
214
 
215
+ # Return session info for frontend tracking
216
  return {
217
  "message": f"Document '{file.filename}' uploaded and processed successfully.",
218
  "filename": file.filename,
 
220
  "chunks_created": rag_result['chunks_created'],
221
  "total_tokens": rag_result['total_tokens'],
222
  "file_type": file_type,
223
+ "can_reference_by_name": True,
224
+ "session_id": session_id, # Include session ID for debugging
225
+ "chat_session_id": chat_session_id # Include original chat session ID
226
  }
227
 
228
  except HTTPException:
multi_llm_chatbot_backend/app/api/routes/sessions.py CHANGED
@@ -23,28 +23,48 @@ async def get_context(
23
  current_user: User = Depends(get_current_active_user)
24
  ):
25
  """
26
- Get context for current session - ENHANCED
27
- Now properly handles different chat sessions
28
  """
29
  try:
30
- # Determine which session to get context for
31
  if chat_session_id:
32
- # Getting context for a specific chat session
33
- session_id = await get_or_create_session_for_request_async(
34
- request,
35
- chat_session_id=chat_session_id,
36
- user_id=str(current_user.id)
37
- )
 
 
 
 
 
 
 
 
38
  else:
39
  # Getting context for current session
40
  session_id = await get_or_create_session_for_request_async(request)
 
41
 
42
  session = session_manager.get_session(session_id)
43
  rag_stats = session.get_rag_stats()
44
 
45
- logger.info(f"Retrieved context for session {session_id}: {len(session.messages)} messages")
 
 
 
 
 
46
 
47
- return {
 
 
 
 
 
 
48
  "session_id": session_id,
49
  "chat_session_id": chat_session_id,
50
  "messages": session.messages,
@@ -60,16 +80,39 @@ async def get_context(
60
  "total_upload_size": session.total_upload_size,
61
  "created_at": session.created_at.isoformat(),
62
  "last_accessed": session.last_accessed.isoformat()
 
 
 
 
 
 
63
  }
64
  }
65
 
 
 
66
  except Exception as e:
67
- logger.error(f"Error getting context: {str(e)}")
 
 
 
 
68
  return {
69
- "session_id": None,
 
70
  "messages": [],
71
- "rag_info": {"total_documents": 0, "total_chunks": 0},
72
- "error": str(e)
 
 
 
 
 
 
 
 
 
 
73
  }
74
 
75
  @router.post("/reset-session")
 
23
  current_user: User = Depends(get_current_active_user)
24
  ):
25
  """
26
+ Get context for current session - ENHANCED with document access fix
27
+ Now properly handles different chat sessions and ensures document access
28
  """
29
  try:
30
+ # Determine which session to get context for with consistent session ID format
31
  if chat_session_id:
32
+ # Getting context for a specific chat session - use consistent format
33
+ session_id = f"chat_{chat_session_id}"
34
+ logger.info(f"Getting context for specific chat session: {session_id}")
35
+
36
+ # Ensure session is loaded in memory
37
+ if session_id not in session_manager.sessions:
38
+ logger.info(f"Chat session {session_id} not in memory, loading from database")
39
+ loaded_session_id = await get_or_create_session_for_request_async(
40
+ request,
41
+ chat_session_id=chat_session_id,
42
+ user_id=str(current_user.id)
43
+ )
44
+ session_id = loaded_session_id
45
+ logger.info(f"Loaded session ID: {session_id}")
46
  else:
47
  # Getting context for current session
48
  session_id = await get_or_create_session_for_request_async(request)
49
+ logger.info(f"Getting context for current session: {session_id}")
50
 
51
  session = session_manager.get_session(session_id)
52
  rag_stats = session.get_rag_stats()
53
 
54
+ # Enhanced logging for document access debugging
55
+ logger.info(f"Retrieved context for session {session_id}:")
56
+ logger.info(f" - Messages: {len(session.messages)}")
57
+ logger.info(f" - Documents: {rag_stats.get('total_documents', 0)}")
58
+ logger.info(f" - Chunks: {rag_stats.get('total_chunks', 0)}")
59
+ logger.info(f" - Uploaded files: {len(session.uploaded_files)}")
60
 
61
+ # Log document details if available
62
+ if rag_stats.get('documents'):
63
+ for doc in rag_stats['documents']:
64
+ logger.info(f" - Available document: {doc.get('filename', 'unknown')} ({doc.get('chunks', 0)} chunks)")
65
+
66
+ # Include session debugging info in response
67
+ context_response = {
68
  "session_id": session_id,
69
  "chat_session_id": chat_session_id,
70
  "messages": session.messages,
 
80
  "total_upload_size": session.total_upload_size,
81
  "created_at": session.created_at.isoformat(),
82
  "last_accessed": session.last_accessed.isoformat()
83
+ },
84
+ # Add debugging info
85
+ "debug_info": {
86
+ "session_format": "chat_session" if chat_session_id else "new_session",
87
+ "session_in_memory": session_id in session_manager.sessions,
88
+ "document_access_working": rag_stats.get("total_documents", 0) > 0
89
  }
90
  }
91
 
92
+ return context_response
93
+
94
  except Exception as e:
95
+ logger.error(f"Error getting context for session_id {session_id if 'session_id' in locals() else 'unknown'}: {str(e)}")
96
+ logger.error(f"Chat session ID: {chat_session_id}")
97
+ import traceback
98
+ logger.error(f"Full traceback: {traceback.format_exc()}")
99
+
100
  return {
101
+ "session_id": session_id if 'session_id' in locals() else None,
102
+ "chat_session_id": chat_session_id,
103
  "messages": [],
104
+ "rag_info": {"total_documents": 0, "total_chunks": 0, "documents": []},
105
+ "context_stats": {
106
+ "message_count": 0,
107
+ "user_messages": 0,
108
+ "uploaded_files": [],
109
+ "total_upload_size": 0
110
+ },
111
+ "error": str(e),
112
+ "debug_info": {
113
+ "error_occurred": True,
114
+ "error_type": type(e).__name__
115
+ }
116
  }
117
 
118
  @router.post("/reset-session")
multi_llm_chatbot_backend/app/core/improved_orchestrator.py CHANGED
@@ -297,15 +297,51 @@ class ImprovedChatOrchestrator:
297
  Enhanced document retrieval with document awareness and better attribution
298
  """
299
  try:
 
 
 
 
300
  rag_manager = get_rag_manager()
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  # Extract document hints from user query
303
  document_hint = self._extract_document_hint_from_query(user_input)
 
304
 
305
  # Get persona-specific context for better retrieval
306
  persona_context = self._get_enhanced_persona_context_keywords(persona_id)
307
 
308
  # Search for relevant chunks with document awareness
 
309
  relevant_chunks = rag_manager.search_documents_with_context(
310
  query=user_input,
311
  session_id=session_id,
@@ -316,15 +352,31 @@ class ImprovedChatOrchestrator:
316
 
317
  logger.info(f"Retrieved {len(relevant_chunks)} chunks for {persona_id}")
318
 
 
 
 
 
 
 
 
 
319
  if not relevant_chunks:
320
- logger.info(f"No relevant documents found for query: {user_input[:50]}...")
321
  return ""
322
 
323
  # Format retrieved content with enhanced attribution
324
- return self._format_document_context_with_attribution(relevant_chunks, persona_id)
 
 
 
 
 
325
 
326
  except Exception as e:
327
- logger.error(f"Error retrieving documents for {persona_id}: {str(e)}")
 
 
 
328
  return ""
329
 
330
  def _extract_document_hint_from_query(self, query: str) -> Optional[str]:
@@ -579,34 +631,73 @@ When analyzing the document context:
579
  """
580
  return self._get_enhanced_persona_context_keywords(persona_id)
581
 
582
- async def chat_with_persona(self, persona_id: str, user_input: str, session_id: str, response_length: str = "medium") -> Dict[str, Any]:
583
  """
584
- Chat with a specific persona directly
585
  """
586
  try:
587
  persona = self.get_persona(persona_id)
588
  if not persona:
589
  return {
590
  "error": f"Persona {persona_id} not found",
591
- "available_personas": list(self.personas.keys())
 
 
592
  }
593
 
 
594
  session = self.session_manager.get_session(session_id)
 
 
 
595
  session.append_message("user", user_input)
596
 
597
- # Generate response from single persona
 
 
 
598
  response_data = await self._generate_single_persona_response(session, persona, response_length)
599
 
600
  # Add response to session
601
  session.append_message(persona_id, response_data["response"])
602
 
603
- return response_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
 
605
  except Exception as e:
606
- logger.error(f"Error in chat_with_persona: {str(e)}")
 
 
 
 
607
  return {
608
  "error": f"Error processing request: {str(e)}",
609
- "persona_id": persona_id
 
 
 
 
 
 
 
 
610
  }
611
 
612
 
 
297
  Enhanced document retrieval with document awareness and better attribution
298
  """
299
  try:
300
+ # Add comprehensive logging to track session ID usage
301
+ logger.info(f"Retrieving documents for session_id: {session_id}")
302
+ logger.info(f"User input: {user_input[:100]}...")
303
+
304
  rag_manager = get_rag_manager()
305
 
306
+ # Check what documents are available for this session with detailed logging
307
+ doc_stats = rag_manager.get_document_stats(session_id)
308
+ logger.info(f"Available documents for {session_id}: {doc_stats.get('total_documents', 0)} documents, {doc_stats.get('total_chunks', 0)} chunks")
309
+
310
+ # Log document details for debugging
311
+ if doc_stats.get('documents'):
312
+ for doc in doc_stats['documents']:
313
+ logger.info(f" - Document: {doc.get('filename', 'unknown')} ({doc.get('chunks', 0)} chunks)")
314
+
315
+ # If no documents found and this looks like a chat session, log warning
316
+ if doc_stats.get('total_documents', 0) == 0:
317
+ if session_id.startswith('chat_'):
318
+ logger.warning(f"No documents found for chat session {session_id} - this may indicate session ID mismatch during upload")
319
+
320
+ # Try alternative session ID formats for debugging
321
+ alternative_formats = [
322
+ session_id.replace('chat_', ''), # Remove chat_ prefix
323
+ session_id, # Keep as is
324
+ ]
325
+
326
+ for alt_session_id in alternative_formats:
327
+ if alt_session_id != session_id:
328
+ alt_stats = rag_manager.get_document_stats(alt_session_id)
329
+ if alt_stats.get('total_documents', 0) > 0:
330
+ logger.warning(f"Found documents under alternative session ID {alt_session_id}: {alt_stats}")
331
+ else:
332
+ logger.info(f"No documents found for new session {session_id} - this is normal for new chats")
333
+
334
+ return "" # No documents available
335
+
336
  # Extract document hints from user query
337
  document_hint = self._extract_document_hint_from_query(user_input)
338
+ logger.info(f"Document hint extracted from query: {document_hint}")
339
 
340
  # Get persona-specific context for better retrieval
341
  persona_context = self._get_enhanced_persona_context_keywords(persona_id)
342
 
343
  # Search for relevant chunks with document awareness
344
+ logger.info(f"Searching with persona context: {persona_context[:100]}...")
345
  relevant_chunks = rag_manager.search_documents_with_context(
346
  query=user_input,
347
  session_id=session_id,
 
352
 
353
  logger.info(f"Retrieved {len(relevant_chunks)} chunks for {persona_id}")
354
 
355
+ # Log relevance scores for debugging
356
+ if relevant_chunks:
357
+ for i, chunk in enumerate(relevant_chunks):
358
+ relevance = chunk.get("relevance_score", 0)
359
+ doc_source = chunk.get("document_source", {})
360
+ filename = doc_source.get("filename", "unknown")
361
+ logger.info(f" Chunk {i+1}: {filename} (relevance: {relevance:.3f})")
362
+
363
  if not relevant_chunks:
364
+ logger.info(f"No relevant document chunks found for query: {user_input[:50]}...")
365
  return ""
366
 
367
  # Format retrieved content with enhanced attribution
368
+ formatted_context = self._format_document_context_with_attribution(relevant_chunks, persona_id)
369
+
370
+ # Log final context length
371
+ logger.info(f"Final document context length: {len(formatted_context)} characters")
372
+
373
+ return formatted_context
374
 
375
  except Exception as e:
376
+ logger.error(f"Error retrieving documents for {persona_id} in session {session_id}: {str(e)}")
377
+ logger.error(f"Error type: {type(e).__name__}")
378
+ import traceback
379
+ logger.error(f"Full traceback: {traceback.format_exc()}")
380
  return ""
381
 
382
  def _extract_document_hint_from_query(self, query: str) -> Optional[str]:
 
631
  """
632
  return self._get_enhanced_persona_context_keywords(persona_id)
633
 
634
+ async def chat_with_persona(self, user_input: str, persona_id: str, session_id: str, response_length: str = "medium") -> Dict[str, Any]:
635
  """
636
+ Chat with a specific persona directly - FIXED for consistent document access
637
  """
638
  try:
639
  persona = self.get_persona(persona_id)
640
  if not persona:
641
  return {
642
  "error": f"Persona {persona_id} not found",
643
+ "available_personas": list(self.personas.keys()),
644
+ "persona_id": persona_id,
645
+ "persona_name": "Unknown"
646
  }
647
 
648
+ # Ensure session exists and log session info
649
  session = self.session_manager.get_session(session_id)
650
+ logger.info(f"Chat with {persona_id} using session {session_id}")
651
+
652
+ # Add user message to session
653
  session.append_message("user", user_input)
654
 
655
+ # Use the same session_id for document retrieval
656
+ logger.info(f"Generating response for {persona_id} with session {session_id}")
657
+
658
+ # Generate response from single persona using consistent session ID
659
  response_data = await self._generate_single_persona_response(session, persona, response_length)
660
 
661
  # Add response to session
662
  session.append_message(persona_id, response_data["response"])
663
 
664
+ # Ensure response data includes all necessary fields
665
+ return {
666
+ "persona_id": persona_id,
667
+ "persona_name": persona.name,
668
+ "response": response_data.get("response", "I'm having trouble generating a response."),
669
+ "used_documents": response_data.get("used_documents", False),
670
+ "document_chunks_used": response_data.get("document_chunks_used", 0),
671
+ "response_length": response_length,
672
+ "context_quality": response_data.get("context_quality", "unknown"),
673
+ "session_id": session_id,
674
+ "type": "single_persona_response",
675
+ "persona": {
676
+ "persona_id": persona_id,
677
+ "persona_name": persona.name,
678
+ "response": response_data.get("response", "I'm having trouble generating a response."),
679
+ "used_documents": response_data.get("used_documents", False),
680
+ "document_chunks_used": response_data.get("document_chunks_used", 0)
681
+ }
682
+ }
683
 
684
  except Exception as e:
685
+ logger.error(f"Error in chat_with_persona for {persona_id}: {str(e)}")
686
+ logger.error(f"Session ID: {session_id}")
687
+ import traceback
688
+ logger.error(f"Full traceback: {traceback.format_exc()}")
689
+
690
  return {
691
  "error": f"Error processing request: {str(e)}",
692
+ "persona_id": persona_id,
693
+ "persona_name": self.personas.get(persona_id, {}).name if persona_id in self.personas else "Unknown",
694
+ "response": "I encountered an error while processing your request. Please try again.",
695
+ "used_documents": False,
696
+ "document_chunks_used": 0,
697
+ "response_length": response_length,
698
+ "context_quality": "error",
699
+ "session_id": session_id,
700
+ "type": "error"
701
  }
702
 
703
 
phd-advisor-frontend/src/components/EnhancedChatInput.js CHANGED
@@ -6,7 +6,9 @@ const EnhancedChatInput = ({
6
  onSendMessage,
7
  onFileUploaded,
8
  uploadedDocuments = [],
9
- isLoading,
 
 
10
  placeholder = "Ask your advisors anything about your PhD journey..."
11
  }) => {
12
  const [inputMessage, setInputMessage] = useState('');
@@ -98,8 +100,10 @@ const EnhancedChatInput = ({
98
  {showUpload && (
99
  <div className="floating-upload-section">
100
  <FileUpload
101
- onFileUploaded={handleFileUploaded}
102
- isUploading={isUploading}
 
 
103
  onUploadStart={handleUploadStart}
104
  />
105
  </div>
 
6
  onSendMessage,
7
  onFileUploaded,
8
  uploadedDocuments = [],
9
+ isLoading,
10
+ currentChatSessionId,
11
+ authToken,
12
  placeholder = "Ask your advisors anything about your PhD journey..."
13
  }) => {
14
  const [inputMessage, setInputMessage] = useState('');
 
100
  {showUpload && (
101
  <div className="floating-upload-section">
102
  <FileUpload
103
+ onFileUploaded={onFileUploaded}
104
+ isUploading={isLoading}
105
+ currentChatSessionId={currentChatSessionId}
106
+ authToken={authToken}
107
  onUploadStart={handleUploadStart}
108
  />
109
  </div>
phd-advisor-frontend/src/components/FileUpload.js CHANGED
@@ -3,7 +3,7 @@ import { Upload, FileText, File, X, CheckCircle, AlertCircle } from 'lucide-reac
3
  import { useTheme } from '../contexts/ThemeContext';
4
  import '../styles/FileUpload.css'
5
 
6
- const FileUpload = ({ onFileUploaded, isUploading, onUploadStart }) => {
7
  const [dragActive, setDragActive] = useState(false);
8
  const [uploadStatus, setUploadStatus] = useState(null); // 'success', 'error', null
9
  const [uploadMessage, setUploadMessage] = useState('');
@@ -44,8 +44,24 @@ const FileUpload = ({ onFileUploaded, isUploading, onUploadStart }) => {
44
  formData.append('file', file);
45
 
46
  try {
47
- const response = await fetch('http://localhost:8000/upload-document', {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  method: 'POST',
 
49
  body: formData,
50
  });
51
 
@@ -55,6 +71,14 @@ const FileUpload = ({ onFileUploaded, isUploading, onUploadStart }) => {
55
  setUploadMessage(`${file.name} uploaded successfully and added to context.`);
56
  onFileUploaded && onFileUploaded(file, data);
57
 
 
 
 
 
 
 
 
 
58
  // Auto-clear success message after 5 seconds
59
  setTimeout(() => {
60
  setUploadStatus(null);
 
3
  import { useTheme } from '../contexts/ThemeContext';
4
  import '../styles/FileUpload.css'
5
 
6
+ const FileUpload = ({ onFileUploaded, isUploading, onUploadStart, currentChatSessionId = null, authToken = null }) => {
7
  const [dragActive, setDragActive] = useState(false);
8
  const [uploadStatus, setUploadStatus] = useState(null); // 'success', 'error', null
9
  const [uploadMessage, setUploadMessage] = useState('');
 
44
  formData.append('file', file);
45
 
46
  try {
47
+ // FIXED: Build URL with chat_session_id parameter if available
48
+ let uploadUrl = 'http://localhost:8000/upload-document';
49
+ if (currentChatSessionId) {
50
+ uploadUrl += `?chat_session_id=${currentChatSessionId}`;
51
+ console.log(`Uploading to specific chat session: ${currentChatSessionId}`);
52
+ } else {
53
+ console.log('Uploading to new/current session');
54
+ }
55
+
56
+ // FIXED: Include auth token in headers if available
57
+ const headers = {};
58
+ if (authToken) {
59
+ headers['Authorization'] = `Bearer ${authToken}`;
60
+ }
61
+
62
+ const response = await fetch(uploadUrl, {
63
  method: 'POST',
64
+ headers: headers, // Add auth headers
65
  body: formData,
66
  });
67
 
 
71
  setUploadMessage(`${file.name} uploaded successfully and added to context.`);
72
  onFileUploaded && onFileUploaded(file, data);
73
 
74
+ // FIXED: Log upload result for debugging
75
+ console.log('Document upload result:', {
76
+ filename: data.filename,
77
+ session_id: data.session_id,
78
+ chat_session_id: data.chat_session_id,
79
+ chunks_created: data.chunks_created
80
+ });
81
+
82
  // Auto-clear success message after 5 seconds
83
  setTimeout(() => {
84
  setUploadStatus(null);
phd-advisor-frontend/src/pages/ChatPage.js CHANGED
@@ -326,22 +326,32 @@ const handleNewChat = async (sessionId = null) => {
326
 
327
 
328
 
329
- const handleFileUploaded = async (fileInfo) => {
330
- const documentMessage = {
331
- id: generateMessageId(),
332
- type: 'document_upload',
333
- content: `Document uploaded: ${fileInfo.name}`,
334
- timestamp: new Date()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  };
336
-
337
- setMessages(prev => [...prev, documentMessage]);
338
- setUploadedDocuments(prev => [...prev, fileInfo]);
339
-
340
- // Save document upload message to database if we have a current session
341
- if (currentSessionId) {
342
- await saveMessageToSession(documentMessage);
343
- }
344
- };
345
 
346
  const handleSendMessage = async (inputMessage) => {
347
  if (!inputMessage.trim()) return;
@@ -914,6 +924,8 @@ const handleNewChat = async (sessionId = null) => {
914
  onFileUploaded={handleFileUploaded}
915
  uploadedDocuments={uploadedDocuments}
916
  isLoading={isLoading}
 
 
917
  placeholder={
918
  replyingTo
919
  ? `Reply to ${replyingTo.advisorName}...`
 
326
 
327
 
328
 
329
+ const handleFileUploaded = async (file, uploadResult) => {
330
+ // FIXED: Use the upload result data for better messaging
331
+ const documentMessage = {
332
+ id: generateMessageId(),
333
+ type: 'document_upload',
334
+ content: `Document uploaded: ${uploadResult.filename || file.name} (${uploadResult.chunks_created || 0} sections processed)`,
335
+ timestamp: new Date()
336
+ };
337
+
338
+ setMessages(prev => [...prev, documentMessage]);
339
+ setUploadedDocuments(prev => [...prev, file]);
340
+
341
+ // FIXED: Log document access info
342
+ console.log('File uploaded to session:', {
343
+ filename: uploadResult.filename,
344
+ session_id: uploadResult.session_id,
345
+ chat_session_id: uploadResult.chat_session_id,
346
+ current_session_id: currentSessionId
347
+ });
348
+
349
+ // Save document upload message to database if we have a current session
350
+ if (currentSessionId) {
351
+ await saveMessageToSession(documentMessage);
352
+ }
353
  };
354
+
 
 
 
 
 
 
 
 
355
 
356
  const handleSendMessage = async (inputMessage) => {
357
  if (!inputMessage.trim()) return;
 
924
  onFileUploaded={handleFileUploaded}
925
  uploadedDocuments={uploadedDocuments}
926
  isLoading={isLoading}
927
+ currentSessionId={currentSessionId}
928
+ authToken={authToken}
929
  placeholder={
930
  replyingTo
931
  ? `Reply to ${replyingTo.advisorName}...`