Spaces:
Sleeping
Sleeping
| """ | |
| Document Reader Agent using LangChain with modular tools | |
| """ | |
| import logging | |
| from typing import Dict, List, Optional, Any | |
| from agents.base_agent import BaseAgent | |
| from langchain.agents import Tool | |
| # Import modular tools | |
| from tools import document_tools, agent_tools | |
| from config_loader import get_config | |
| logger = logging.getLogger(__name__) | |
| class DocumentReaderAgent(BaseAgent): | |
| """Document reader agent using LangChain with RAG capabilities.""" | |
| def __init__(self, llm: Optional[Any] = None): | |
| # Initialize base agent | |
| super().__init__(agent_id="document_reader", llm=llm) | |
| def _custom_init(self, **kwargs): | |
| """Custom initialization for document reader.""" | |
| self.config = get_config() | |
| # Initialize ChromaDB manager for RAG pre-query | |
| from backend.chromadb_manager import ChromaDBManager | |
| self.db_manager = ChromaDBManager() | |
| def _create_tools(self) -> List[Tool]: | |
| """Create tools for the document reader agent.""" | |
| # Add document-specific tools | |
| tools = [ | |
| document_tools.search_documentation_tool(), | |
| document_tools.list_available_versions_tool() | |
| ] | |
| # Add switching tools for all other agents | |
| tools.extend(agent_tools.create_switching_tools_for_agent(self.agent_id)) | |
| return tools | |
| def _get_system_prompt(self) -> str: | |
| """Get the system prompt for document reader agent.""" | |
| # Build tool descriptions and switching agents | |
| tool_descriptions, switching_agents = self._build_tool_descriptions_and_agents() | |
| return f"""You are a technical documentation assistant for Harmony and Chorus products. | |
| Your approach: | |
| 1. ALWAYS respond to the CURRENT user message - ignore previous searches or queries | |
| 2. You receive initial documentation context - check if it answers the user's question | |
| 3. If not, search for the specific information they need | |
| 4. Provide COMPLETE, self-contained answers with all relevant details from the documentation | |
| 5. Quote extensively from the documents you find - users want the actual content | |
| 6. Transfer to other agents when users need help beyond documentation | |
| CRITICAL Answer Requirements: | |
| - Your answers must be comprehensive and self-sufficient | |
| - Include ALL relevant information you find in the documentation | |
| - NEVER tell users to "refer to the guide" or "see page X" - instead, include that information in your response | |
| - If you mention something exists in the documentation, quote it fully | |
| - Users come to you to avoid reading documents - give them complete answers | |
| Search principles: | |
| - The search tool uses vector RAG (semantic similarity), so similar terms return similar results | |
| - When users mention a product and version, use them as separate parameters (e.g., "install harmony 1.5" → query="install", product="harmony", version="1.5") | |
| - Products are lowercase: "harmony" or "chorus" | |
| - After searching, provide ALL the relevant content you found, not just a summary | |
| Available tools: | |
| {chr(10).join(tool_descriptions)} | |
| You can transfer the conversation to these agents: | |
| {', '.join(switching_agents)} | |
| When to transfer: | |
| - Profile Settings: User wants to view/update their settings, preferences, or account | |
| Examples: "set my profile name", "update my email", "change my settings", "view my profile" | |
| CRITICAL: When switching agents, ONLY use the switching tool. Do NOT add any text, explanations, or messages - the tool handles everything.""" | |
| def _enhance_query(self, query: str, context: Optional[Dict] = None) -> str: | |
| """Enhance query with RAG context.""" | |
| if not context: | |
| context = {} | |
| product = context.get("product", "harmony") | |
| version = context.get("version", "1.8") | |
| # Query RAG with current product/version | |
| try: | |
| rag_results = self.db_manager.query_with_filter( | |
| query, | |
| product, | |
| version, | |
| k=self.config.get_rag_k() | |
| ) | |
| # Format RAG results for context - TRUNCATE to prevent token overflow | |
| rag_context_parts = [] | |
| total_chars = 0 | |
| max_total_chars = 3000 # Limit total context | |
| for i, doc in enumerate(rag_results): | |
| content = doc.page_content | |
| # Check if adding this would exceed total limit | |
| if total_chars + len(content) > max_total_chars: | |
| logger.info(f"Truncating RAG context at document {i+1} to stay within limits") | |
| break | |
| rag_context_parts.append(f"[{i+1}] {content}") | |
| total_chars += len(content) | |
| rag_context = "\n\n".join(rag_context_parts) | |
| logger.info(f"RAG context: {len(rag_results)} documents found, {len(rag_context_parts)} used") | |
| logger.info(f"RAG context size: {len(rag_context)} chars") | |
| # Create enhanced prompt with RAG context | |
| enhanced_query = f"""User Query: {query} | |
| Initial Documentation Context: | |
| {rag_context} | |
| Please answer the user's query. If the initial documentation above doesn't contain the answer, use your search tools to find the relevant information.""" | |
| logger.info(f"Enhanced query total size: {len(enhanced_query)} chars") | |
| return enhanced_query | |
| except Exception as e: | |
| logger.error(f"Error during RAG pre-query: {e}") | |
| # Fall back to original query with context | |
| return f"[Context: {product} {version}] {query}" | |
| def _enhance_response(self, response: Dict[str, Any], result: Dict[str, Any]): | |
| """Add tool usage information to the response.""" | |
| # Extract tool calls from intermediate_steps | |
| tool_calls = [] | |
| for action, observation in result.get("intermediate_steps", []): | |
| if hasattr(action, 'tool') and hasattr(action, 'tool_input'): | |
| tool_info = { | |
| 'tool': action.tool, | |
| 'inputs': action.tool_input | |
| } | |
| tool_calls.append(tool_info) | |
| # Append tool usage summary to output | |
| if tool_calls: | |
| tool_summary = "\n\n---\n**Tool Usage Details:**\n" | |
| for i, call in enumerate(tool_calls, 1): | |
| tool_summary += f"{i}. `{call['tool']}`" | |
| # Format inputs based on type | |
| if isinstance(call['inputs'], dict): | |
| params = ", ".join([f"{k}='{v}'" for k, v in call['inputs'].items() if v is not None]) | |
| tool_summary += f"({params})\n" | |
| else: | |
| tool_summary += f"({call['inputs']})\n" | |
| response["output"] += tool_summary |