v3_ai_assistant / py /agents /document_reader.py
Julian Vanecek
Remove empty shell agents to fix multi-agent system
68b6cce
"""
Document Reader Agent using LangChain with modular tools
"""
import logging
from typing import Dict, List, Optional, Any
from agents.base_agent import BaseAgent
from langchain.agents import Tool
# Import modular tools
from tools import document_tools, agent_tools
from config_loader import get_config
logger = logging.getLogger(__name__)
class DocumentReaderAgent(BaseAgent):
"""Document reader agent using LangChain with RAG capabilities."""
def __init__(self, llm: Optional[Any] = None):
# Initialize base agent
super().__init__(agent_id="document_reader", llm=llm)
def _custom_init(self, **kwargs):
"""Custom initialization for document reader."""
self.config = get_config()
# Initialize ChromaDB manager for RAG pre-query
from backend.chromadb_manager import ChromaDBManager
self.db_manager = ChromaDBManager()
def _create_tools(self) -> List[Tool]:
"""Create tools for the document reader agent."""
# Add document-specific tools
tools = [
document_tools.search_documentation_tool(),
document_tools.list_available_versions_tool()
]
# Add switching tools for all other agents
tools.extend(agent_tools.create_switching_tools_for_agent(self.agent_id))
return tools
def _get_system_prompt(self) -> str:
"""Get the system prompt for document reader agent."""
# Build tool descriptions and switching agents
tool_descriptions, switching_agents = self._build_tool_descriptions_and_agents()
return f"""You are a technical documentation assistant for Harmony and Chorus products.
Your approach:
1. ALWAYS respond to the CURRENT user message - ignore previous searches or queries
2. You receive initial documentation context - check if it answers the user's question
3. If not, search for the specific information they need
4. Provide COMPLETE, self-contained answers with all relevant details from the documentation
5. Quote extensively from the documents you find - users want the actual content
6. Transfer to other agents when users need help beyond documentation
CRITICAL Answer Requirements:
- Your answers must be comprehensive and self-sufficient
- Include ALL relevant information you find in the documentation
- NEVER tell users to "refer to the guide" or "see page X" - instead, include that information in your response
- If you mention something exists in the documentation, quote it fully
- Users come to you to avoid reading documents - give them complete answers
Search principles:
- The search tool uses vector RAG (semantic similarity), so similar terms return similar results
- When users mention a product and version, use them as separate parameters (e.g., "install harmony 1.5" → query="install", product="harmony", version="1.5")
- Products are lowercase: "harmony" or "chorus"
- After searching, provide ALL the relevant content you found, not just a summary
Available tools:
{chr(10).join(tool_descriptions)}
You can transfer the conversation to these agents:
{', '.join(switching_agents)}
When to transfer:
- Profile Settings: User wants to view/update their settings, preferences, or account
Examples: "set my profile name", "update my email", "change my settings", "view my profile"
CRITICAL: When switching agents, ONLY use the switching tool. Do NOT add any text, explanations, or messages - the tool handles everything."""
def _enhance_query(self, query: str, context: Optional[Dict] = None) -> str:
"""Enhance query with RAG context."""
if not context:
context = {}
product = context.get("product", "harmony")
version = context.get("version", "1.8")
# Query RAG with current product/version
try:
rag_results = self.db_manager.query_with_filter(
query,
product,
version,
k=self.config.get_rag_k()
)
# Format RAG results for context - TRUNCATE to prevent token overflow
rag_context_parts = []
total_chars = 0
max_total_chars = 3000 # Limit total context
for i, doc in enumerate(rag_results):
content = doc.page_content
# Check if adding this would exceed total limit
if total_chars + len(content) > max_total_chars:
logger.info(f"Truncating RAG context at document {i+1} to stay within limits")
break
rag_context_parts.append(f"[{i+1}] {content}")
total_chars += len(content)
rag_context = "\n\n".join(rag_context_parts)
logger.info(f"RAG context: {len(rag_results)} documents found, {len(rag_context_parts)} used")
logger.info(f"RAG context size: {len(rag_context)} chars")
# Create enhanced prompt with RAG context
enhanced_query = f"""User Query: {query}
Initial Documentation Context:
{rag_context}
Please answer the user's query. If the initial documentation above doesn't contain the answer, use your search tools to find the relevant information."""
logger.info(f"Enhanced query total size: {len(enhanced_query)} chars")
return enhanced_query
except Exception as e:
logger.error(f"Error during RAG pre-query: {e}")
# Fall back to original query with context
return f"[Context: {product} {version}] {query}"
def _enhance_response(self, response: Dict[str, Any], result: Dict[str, Any]):
"""Add tool usage information to the response."""
# Extract tool calls from intermediate_steps
tool_calls = []
for action, observation in result.get("intermediate_steps", []):
if hasattr(action, 'tool') and hasattr(action, 'tool_input'):
tool_info = {
'tool': action.tool,
'inputs': action.tool_input
}
tool_calls.append(tool_info)
# Append tool usage summary to output
if tool_calls:
tool_summary = "\n\n---\n**Tool Usage Details:**\n"
for i, call in enumerate(tool_calls, 1):
tool_summary += f"{i}. `{call['tool']}`"
# Format inputs based on type
if isinstance(call['inputs'], dict):
params = ", ".join([f"{k}='{v}'" for k, v in call['inputs'].items() if v is not None])
tool_summary += f"({params})\n"
else:
tool_summary += f"({call['inputs']})\n"
response["output"] += tool_summary