audit_assistant / src /agents /base_chatbot.py
akryldigital's picture
modular agentic changes
fc70367 verified
"""
Base Chatbot - Abstract base class for all chatbot implementations
This module provides shared functionality across all chatbot types:
- Conversation loading/saving
- UI filter extraction
- Conversation context building
- Common utility methods
"""
import json
import time
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional
from abc import ABC, abstractmethod
from langchain_core.messages import HumanMessage, AIMessage
from src.config.paths import CONVERSATIONS_DIR, PROJECT_DIR
logger = logging.getLogger(__name__)
class BaseChatbot(ABC):
"""
Abstract base class for all chatbot implementations.
Provides shared functionality:
- Conversation persistence (load/save)
- UI filter extraction
- Conversation context building
"""
def __init__(self):
"""Initialize base chatbot"""
# Conversations directory
self.conversations_dir = CONVERSATIONS_DIR
try:
self.conversations_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
except (PermissionError, OSError) as e:
logger.warning(f"Could not create conversations directory at {self.conversations_dir}: {e}")
self.conversations_dir = Path("conversations")
try:
self.conversations_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
except (PermissionError, OSError) as e2:
logger.error(f"Could not create conversations directory at {self.conversations_dir}: {e2}")
raise RuntimeError(f"Failed to create conversations directory: {e2}")
@abstractmethod
def chat(self, user_input: str, conversation_id: str = "default") -> Dict[str, Any]:
"""
Main chat interface - must be implemented by subclasses.
Args:
user_input: User's input message
conversation_id: Unique conversation identifier
Returns:
Dictionary with:
- response: AI response
- rag_result: RAG results (sources, answer)
- agent_logs: List of agent logs
- actual_rag_query: The query used for retrieval
"""
pass
def _load_conversation(self, conversation_file: Path) -> Dict[str, Any]:
"""
Load conversation from file.
Args:
conversation_file: Path to conversation JSON file
Returns:
Dictionary with messages, session_start_time, last_ai_message_time, context
"""
if conversation_file.exists():
try:
with open(conversation_file) as f:
data = json.load(f)
# Convert message dicts back to LangChain messages
messages = []
for msg_data in data.get("messages", []):
if msg_data["type"] == "human":
messages.append(HumanMessage(content=msg_data["content"]))
elif msg_data["type"] == "ai":
messages.append(AIMessage(content=msg_data["content"]))
data["messages"] = messages
return data
except Exception as e:
logger.warning(f"Could not load conversation: {e}")
# Return default conversation
return {
"messages": [],
"session_start_time": time.time(),
"last_ai_message_time": time.time(),
"context": {}
}
def _save_conversation(self, conversation_file: Path, conversation: Dict[str, Any]):
"""
Save conversation to file.
Args:
conversation_file: Path to conversation JSON file
conversation: Conversation dictionary
"""
try:
# Ensure the conversations directory exists with proper permissions
conversation_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
# Convert messages to serializable format
messages_data = []
for msg in conversation["messages"]:
if isinstance(msg, HumanMessage):
messages_data.append({"type": "human", "content": msg.content})
elif isinstance(msg, AIMessage):
messages_data.append({"type": "ai", "content": msg.content})
conversation_data = {
"messages": messages_data,
"session_start_time": conversation["session_start_time"],
"last_ai_message_time": conversation["last_ai_message_time"],
"context": conversation.get("context", {})
}
with open(conversation_file, 'w') as f:
json.dump(conversation_data, f, indent=2)
except Exception as e:
logger.error(f"Could not save conversation: {e}")
def _extract_ui_filters(self, query: str) -> Dict[str, List[str]]:
"""
Extract UI filters from query.
Expected format:
FILTER CONTEXT:
Sources: Source1, Source2
Years: 2020, 2021
Districts: District1, District2
Filenames: file1.pdf, file2.pdf
USER QUERY:
actual query text
Args:
query: User query (may contain filter context)
Returns:
Dictionary with extracted filters
"""
filters = {}
# Look for FILTER CONTEXT in query
if "FILTER CONTEXT:" in query:
# Extract the entire filter section (until USER QUERY: or end of query)
filter_section = query.split("FILTER CONTEXT:")[1]
if "USER QUERY:" in filter_section:
filter_section = filter_section.split("USER QUERY:")[0]
filter_section = filter_section.strip()
# Parse sources
if "Sources:" in filter_section:
sources_line = [line for line in filter_section.split('\n') if line.strip().startswith('Sources:')][0]
sources_str = sources_line.split("Sources:")[1].strip()
if sources_str and sources_str != "None":
filters["sources"] = [s.strip() for s in sources_str.split(",")]
# Parse years
if "Years:" in filter_section:
years_line = [line for line in filter_section.split('\n') if line.strip().startswith('Years:')][0]
years_str = years_line.split("Years:")[1].strip()
if years_str and years_str != "None":
filters["years"] = [y.strip() for y in years_str.split(",")]
# Parse districts
if "Districts:" in filter_section:
districts_line = [line for line in filter_section.split('\n') if line.strip().startswith('Districts:')][0]
districts_str = districts_line.split("Districts:")[1].strip()
if districts_str and districts_str != "None":
filters["districts"] = [d.strip() for d in districts_str.split(",")]
# Parse filenames
if "Filenames:" in filter_section:
filenames_line = [line for line in filter_section.split('\n') if line.strip().startswith('Filenames:')][0]
filenames_str = filenames_line.split("Filenames:")[1].strip()
if filenames_str and filenames_str != "None":
filters["filenames"] = [f.strip() for f in filenames_str.split(",")]
return filters
def _build_conversation_context(self, messages: List[Any], num_messages: int = 6) -> str:
"""
Build conversation context from recent messages.
Args:
messages: List of LangChain messages
num_messages: Number of recent messages to include
Returns:
Formatted conversation context string
"""
context_lines = []
# Show last N messages for context
for msg in messages[-num_messages:]:
if isinstance(msg, HumanMessage):
context_lines.append(f"User: {msg.content}")
elif isinstance(msg, AIMessage):
context_lines.append(f"Assistant: {msg.content}")
return "\n".join(context_lines) if context_lines else "No previous conversation."
def _extract_clean_query(self, query: str) -> str:
"""
Extract the actual query without filter context.
Args:
query: User query (may contain filter context)
Returns:
Clean query without filter context
"""
if "USER QUERY:" in query:
return query.split("USER QUERY:")[-1].strip()
return query