Spaces:
Sleeping
Sleeping
File size: 6,936 Bytes
bb80caa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""
Document Reader Agent using LangChain with modular tools
"""
import logging
from typing import Dict, List, Optional, Any
from agents.base_agent import BaseAgent
from langchain.agents import Tool
# Import modular tools
from tools import document_tools, agent_tools
from config_loader import get_config
logger = logging.getLogger(__name__)
class DocumentReaderAgent(BaseAgent):
"""Document reader agent using LangChain with RAG capabilities."""
def __init__(self, llm: Optional[Any] = None):
# Initialize base agent
super().__init__(agent_id="document_reader", llm=llm)
def _custom_init(self, **kwargs):
"""Custom initialization for document reader."""
self.config = get_config()
# Initialize ChromaDB manager for RAG pre-query
from backend.chromadb_manager import ChromaDBManager
self.db_manager = ChromaDBManager()
def _create_tools(self) -> List[Tool]:
"""Create tools for the document reader agent."""
# Add document-specific tools
tools = [
document_tools.search_documentation_tool(),
document_tools.list_available_versions_tool()
]
# Add switching tools for all other agents
tools.extend(agent_tools.create_switching_tools_for_agent(self.agent_id))
return tools
def _get_system_prompt(self) -> str:
"""Get the system prompt for document reader agent."""
# Build tool descriptions and switching agents
tool_descriptions, switching_agents = self._build_tool_descriptions_and_agents()
return f"""You are a technical documentation assistant for Harmony and Chorus products.
Your approach:
1. ALWAYS respond to the CURRENT user message - ignore previous searches or queries
2. You receive initial documentation context - check if it answers the user's question
3. If not, search for the specific information they need
4. Provide COMPLETE, self-contained answers with all relevant details from the documentation
5. Quote extensively from the documents you find - users want the actual content
6. Transfer to other agents when users need help beyond documentation
CRITICAL Answer Requirements:
- Your answers must be comprehensive and self-sufficient
- Include ALL relevant information you find in the documentation
- NEVER tell users to "refer to the guide" or "see page X" - instead, include that information in your response
- If you mention something exists in the documentation, quote it fully
- Users come to you to avoid reading documents - give them complete answers
Search principles:
- The search tool uses vector RAG (semantic similarity), so similar terms return similar results
- When users mention a product and version, use them as separate parameters (e.g., "install harmony 1.5" → query="install", product="harmony", version="1.5")
- Products are lowercase: "harmony" or "chorus"
- After searching, provide ALL the relevant content you found, not just a summary
Available tools:
{chr(10).join(tool_descriptions)}
You can transfer the conversation to these agents:
{', '.join(switching_agents)}
When to transfer:
- Profile Settings: User wants to view/update their settings, preferences, or account
Examples: "set my profile name", "update my email", "change my settings", "view my profile"
CRITICAL: When switching agents, ONLY use the switching tool. Do NOT add any text, explanations, or messages - the tool handles everything."""
def _enhance_query(self, query: str, context: Optional[Dict] = None) -> str:
"""Enhance query with RAG context."""
if not context:
context = {}
product = context.get("product", "harmony")
version = context.get("version", "1.8")
# Query RAG with current product/version
try:
rag_results = self.db_manager.query_with_filter(
query,
product,
version,
k=self.config.get_rag_k()
)
# Format RAG results for context - TRUNCATE to prevent token overflow
rag_context_parts = []
total_chars = 0
max_total_chars = 3000 # Limit total context
for i, doc in enumerate(rag_results):
content = doc.page_content
# Check if adding this would exceed total limit
if total_chars + len(content) > max_total_chars:
logger.info(f"Truncating RAG context at document {i+1} to stay within limits")
break
rag_context_parts.append(f"[{i+1}] {content}")
total_chars += len(content)
rag_context = "\n\n".join(rag_context_parts)
logger.info(f"RAG context: {len(rag_results)} documents found, {len(rag_context_parts)} used")
logger.info(f"RAG context size: {len(rag_context)} chars")
# Create enhanced prompt with RAG context
enhanced_query = f"""User Query: {query}
Initial Documentation Context:
{rag_context}
Please answer the user's query. If the initial documentation above doesn't contain the answer, use your search tools to find the relevant information."""
logger.info(f"Enhanced query total size: {len(enhanced_query)} chars")
return enhanced_query
except Exception as e:
logger.error(f"Error during RAG pre-query: {e}")
# Fall back to original query with context
return f"[Context: {product} {version}] {query}"
def _enhance_response(self, response: Dict[str, Any], result: Dict[str, Any]):
"""Add tool usage information to the response."""
# Extract tool calls from intermediate_steps
tool_calls = []
for action, observation in result.get("intermediate_steps", []):
if hasattr(action, 'tool') and hasattr(action, 'tool_input'):
tool_info = {
'tool': action.tool,
'inputs': action.tool_input
}
tool_calls.append(tool_info)
# Append tool usage summary to output
if tool_calls:
tool_summary = "\n\n---\n**Tool Usage Details:**\n"
for i, call in enumerate(tool_calls, 1):
tool_summary += f"{i}. `{call['tool']}`"
# Format inputs based on type
if isinstance(call['inputs'], dict):
params = ", ".join([f"{k}='{v}'" for k, v in call['inputs'].items() if v is not None])
tool_summary += f"({params})\n"
else:
tool_summary += f"({call['inputs']})\n"
response["output"] += tool_summary |