""" Document tools for searching and querying documentation These are READ-ONLY tools used by the Document Reader agent. """ from typing import Optional from langchain.agents import Tool from langchain.tools import StructuredTool from pydantic import BaseModel, Field from backend.chromadb_manager import ChromaDBManager # Pydantic models for structured tool inputs class SearchDocumentationInput(BaseModel): """Input model for searching documentation""" query: str = Field(description="The search query") product: Optional[str] = Field(None, description="Product name (harmony or chorus). If not specified, searches all products") version: Optional[str] = Field(None, description="Product version (e.g., '1.2', '1.8'). If not specified, searches all versions") class ListVersionsInput(BaseModel): """Input model for listing versions""" # No parameters needed, but StructuredTool requires a model # Initialize ChromaDB manager (singleton pattern) _db_manager = None def get_db_manager(): """Get or create ChromaDB manager instance""" global _db_manager if _db_manager is None: _db_manager = ChromaDBManager() return _db_manager def search_documentation(query: str, product: Optional[str] = None, version: Optional[str] = None) -> str: """ Search documentation with flexible filtering Args: query: Search query product: Optional product name (harmony or chorus) version: Optional product version (e.g., '1.2', '1.8') Returns: Search results as formatted string with metadata """ db_manager = get_db_manager() # Determine which search method to use based on parameters if version is not None and product is not None: # Search specific version of specific product docs = db_manager.query_with_filter(query, product, version, k=5) header = f"Results for '{query}' in {product} {version}:" elif product is not None: # Search all versions of specific product docs = db_manager.query_product_all_versions(query, product, k=5) header = f"Results for '{query}' across all {product} versions:" else: # Search across all products and versions # Note: We'll need to search each product separately and combine results all_docs = [] for prod in ["harmony", "chorus"]: prod_docs = db_manager.query_product_all_versions(query, prod, k=3) all_docs.extend(prod_docs) docs = all_docs[:5] # Limit total results header = f"Results for '{query}' across all products:" if not docs: return f"No results found for '{query}'" results = [header] metadata_summary = [] for i, doc in enumerate(docs, 1): # Extract all metadata metadata = doc.metadata # Include version info if searching across versions if version is None and 'version' in metadata: version_info = f"[{metadata.get('product', 'unknown')} {metadata.get('version', 'unknown')}] " else: version_info = "" # Add main content results.append(f"\n{i}. {version_info}{doc.page_content[:500]}...") # Collect metadata for summary meta_info = { 'chunk': i, 'product': metadata.get('product', 'unknown'), 'version': metadata.get('version', 'unknown'), 'document': metadata.get('document', 'unknown'), 'page': metadata.get('page', 'unknown'), 'chunk_id': metadata.get('chunk_id', 'unknown') } metadata_summary.append(meta_info) # Add metadata summary at the end results.append("\n\n**Metadata of chunks retrieved:**") for meta in metadata_summary: results.append(f"- Chunk {meta['chunk']}: {meta['product']} v{meta['version']}, " f"{meta['document']} (page {meta['page']})") return "\n".join(results) def list_available_versions() -> str: """ List all available product versions Returns: List of available products and versions """ db_manager = get_db_manager() versions = db_manager.list_available_versions() result = "Available product versions:\n" for product, version_list in versions.items(): result += f"\n{product.capitalize()}:\n" for version in version_list: result += f" - {version}\n" return result # Tool creation functions for agents to use def search_documentation_tool() -> StructuredTool: """Create a StructuredTool object for search_documentation""" return StructuredTool.from_function( func=search_documentation, name="search_documentation", description="Search technical documentation with flexible filtering by product and version", args_schema=SearchDocumentationInput ) def list_available_versions_tool() -> StructuredTool: """Create a StructuredTool object for list_available_versions""" return StructuredTool.from_function( func=list_available_versions, name="list_available_versions", description="List all available product versions in the documentation", args_schema=ListVersionsInput )