Spaces:

bitsinthesky
/

v3_ai_assistant

Sleeping

v3_ai_assistant / py /agents /document_reader.py

Julian Vanecek

Remove empty shell agents to fix multi-agent system

68b6cce 7 months ago

6.94 kB

	"""
	Document Reader Agent using LangChain with modular tools
	"""

	import logging
	from typing import Dict, List, Optional, Any

	from agents.base_agent import BaseAgent
	from langchain.agents import Tool

	# Import modular tools
	from tools import document_tools, agent_tools
	from config_loader import get_config

	logger = logging.getLogger(__name__)


	class DocumentReaderAgent(BaseAgent):
	"""Document reader agent using LangChain with RAG capabilities."""

	def __init__(self, llm: Optional[Any] = None):
	# Initialize base agent
	super().__init__(agent_id="document_reader", llm=llm)

	def _custom_init(self, **kwargs):
	"""Custom initialization for document reader."""
	self.config = get_config()
	# Initialize ChromaDB manager for RAG pre-query
	from backend.chromadb_manager import ChromaDBManager
	self.db_manager = ChromaDBManager()

	def _create_tools(self) -> List[Tool]:
	"""Create tools for the document reader agent."""
	# Add document-specific tools
	tools = [
	document_tools.search_documentation_tool(),
	document_tools.list_available_versions_tool()
	]

	# Add switching tools for all other agents
	tools.extend(agent_tools.create_switching_tools_for_agent(self.agent_id))

	return tools

	def _get_system_prompt(self) -> str:
	"""Get the system prompt for document reader agent."""
	# Build tool descriptions and switching agents
	tool_descriptions, switching_agents = self._build_tool_descriptions_and_agents()

	return f"""You are a technical documentation assistant for Harmony and Chorus products.

	Your approach:
	1. ALWAYS respond to the CURRENT user message - ignore previous searches or queries
	2. You receive initial documentation context - check if it answers the user's question
	3. If not, search for the specific information they need
	4. Provide COMPLETE, self-contained answers with all relevant details from the documentation
	5. Quote extensively from the documents you find - users want the actual content
	6. Transfer to other agents when users need help beyond documentation

	CRITICAL Answer Requirements:
	- Your answers must be comprehensive and self-sufficient
	- Include ALL relevant information you find in the documentation
	- NEVER tell users to "refer to the guide" or "see page X" - instead, include that information in your response
	- If you mention something exists in the documentation, quote it fully
	- Users come to you to avoid reading documents - give them complete answers

	Search principles:
	- The search tool uses vector RAG (semantic similarity), so similar terms return similar results
	- When users mention a product and version, use them as separate parameters (e.g., "install harmony 1.5" → query="install", product="harmony", version="1.5")
	- Products are lowercase: "harmony" or "chorus"
	- After searching, provide ALL the relevant content you found, not just a summary

	Available tools:
	{chr(10).join(tool_descriptions)}

	You can transfer the conversation to these agents:
	{', '.join(switching_agents)}

	When to transfer:
	- Profile Settings: User wants to view/update their settings, preferences, or account
	Examples: "set my profile name", "update my email", "change my settings", "view my profile"

	CRITICAL: When switching agents, ONLY use the switching tool. Do NOT add any text, explanations, or messages - the tool handles everything."""

	def _enhance_query(self, query: str, context: Optional[Dict] = None) -> str:
	"""Enhance query with RAG context."""
	if not context:
	context = {}

	product = context.get("product", "harmony")
	version = context.get("version", "1.8")

	# Query RAG with current product/version
	try:
	rag_results = self.db_manager.query_with_filter(
	query,
	product,
	version,
	k=self.config.get_rag_k()
	)

	# Format RAG results for context - TRUNCATE to prevent token overflow
	rag_context_parts = []
	total_chars = 0
	max_total_chars = 3000 # Limit total context

	for i, doc in enumerate(rag_results):
	content = doc.page_content

	# Check if adding this would exceed total limit
	if total_chars + len(content) > max_total_chars:
	logger.info(f"Truncating RAG context at document {i+1} to stay within limits")
	break

	rag_context_parts.append(f"[{i+1}] {content}")
	total_chars += len(content)

	rag_context = "\n\n".join(rag_context_parts)

	logger.info(f"RAG context: {len(rag_results)} documents found, {len(rag_context_parts)} used")
	logger.info(f"RAG context size: {len(rag_context)} chars")

	# Create enhanced prompt with RAG context
	enhanced_query = f"""User Query: {query}

	Initial Documentation Context:
	{rag_context}

	Please answer the user's query. If the initial documentation above doesn't contain the answer, use your search tools to find the relevant information."""

	logger.info(f"Enhanced query total size: {len(enhanced_query)} chars")
	return enhanced_query

	except Exception as e:
	logger.error(f"Error during RAG pre-query: {e}")
	# Fall back to original query with context
	return f"[Context: {product} {version}] {query}"

	def _enhance_response(self, response: Dict[str, Any], result: Dict[str, Any]):
	"""Add tool usage information to the response."""
	# Extract tool calls from intermediate_steps
	tool_calls = []

	for action, observation in result.get("intermediate_steps", []):
	if hasattr(action, 'tool') and hasattr(action, 'tool_input'):
	tool_info = {
	'tool': action.tool,
	'inputs': action.tool_input
	}
	tool_calls.append(tool_info)

	# Append tool usage summary to output
	if tool_calls:
	tool_summary = "\n\n---\nTool Usage Details:\n"
	for i, call in enumerate(tool_calls, 1):
	tool_summary += f"{i}. `{call['tool']}`"
	# Format inputs based on type
	if isinstance(call['inputs'], dict):
	params = ", ".join([f"{k}='{v}'" for k, v in call['inputs'].items() if v is not None])
	tool_summary += f"({params})\n"
	else:
	tool_summary += f"({call['inputs']})\n"

	response["output"] += tool_summary