Spaces:
Sleeping
Sleeping
| """ | |
| Document tools for searching and querying documentation | |
| These are READ-ONLY tools used by the Document Reader agent. | |
| """ | |
| from typing import Optional | |
| from langchain.agents import Tool | |
| from langchain.tools import StructuredTool | |
| from pydantic import BaseModel, Field | |
| from backend.chromadb_manager import ChromaDBManager | |
| # Pydantic models for structured tool inputs | |
| class SearchDocumentationInput(BaseModel): | |
| """Input model for searching documentation""" | |
| query: str = Field(description="The search query") | |
| product: Optional[str] = Field(None, description="Product name (harmony or chorus). If not specified, searches all products") | |
| version: Optional[str] = Field(None, description="Product version (e.g., '1.2', '1.8'). If not specified, searches all versions") | |
| class ListVersionsInput(BaseModel): | |
| """Input model for listing versions""" | |
| # No parameters needed, but StructuredTool requires a model | |
| # Initialize ChromaDB manager (singleton pattern) | |
| _db_manager = None | |
| def get_db_manager(): | |
| """Get or create ChromaDB manager instance""" | |
| global _db_manager | |
| if _db_manager is None: | |
| _db_manager = ChromaDBManager() | |
| return _db_manager | |
| def search_documentation(query: str, product: Optional[str] = None, version: Optional[str] = None) -> str: | |
| """ | |
| Search documentation with flexible filtering | |
| Args: | |
| query: Search query | |
| product: Optional product name (harmony or chorus) | |
| version: Optional product version (e.g., '1.2', '1.8') | |
| Returns: | |
| Search results as formatted string with metadata | |
| """ | |
| db_manager = get_db_manager() | |
| # Determine which search method to use based on parameters | |
| if version is not None and product is not None: | |
| # Search specific version of specific product | |
| docs = db_manager.query_with_filter(query, product, version, k=5) | |
| header = f"Results for '{query}' in {product} {version}:" | |
| elif product is not None: | |
| # Search all versions of specific product | |
| docs = db_manager.query_product_all_versions(query, product, k=5) | |
| header = f"Results for '{query}' across all {product} versions:" | |
| else: | |
| # Search across all products and versions | |
| # Note: We'll need to search each product separately and combine results | |
| all_docs = [] | |
| for prod in ["harmony", "chorus"]: | |
| prod_docs = db_manager.query_product_all_versions(query, prod, k=3) | |
| all_docs.extend(prod_docs) | |
| docs = all_docs[:5] # Limit total results | |
| header = f"Results for '{query}' across all products:" | |
| if not docs: | |
| return f"No results found for '{query}'" | |
| results = [header] | |
| metadata_summary = [] | |
| for i, doc in enumerate(docs, 1): | |
| # Extract all metadata | |
| metadata = doc.metadata | |
| # Include version info if searching across versions | |
| if version is None and 'version' in metadata: | |
| version_info = f"[{metadata.get('product', 'unknown')} {metadata.get('version', 'unknown')}] " | |
| else: | |
| version_info = "" | |
| # Add main content | |
| results.append(f"\n{i}. {version_info}{doc.page_content[:500]}...") | |
| # Collect metadata for summary | |
| meta_info = { | |
| 'chunk': i, | |
| 'product': metadata.get('product', 'unknown'), | |
| 'version': metadata.get('version', 'unknown'), | |
| 'document': metadata.get('document', 'unknown'), | |
| 'page': metadata.get('page', 'unknown'), | |
| 'chunk_id': metadata.get('chunk_id', 'unknown') | |
| } | |
| metadata_summary.append(meta_info) | |
| # Add metadata summary at the end | |
| results.append("\n\n**Metadata of chunks retrieved:**") | |
| for meta in metadata_summary: | |
| results.append(f"- Chunk {meta['chunk']}: {meta['product']} v{meta['version']}, " | |
| f"{meta['document']} (page {meta['page']})") | |
| return "\n".join(results) | |
| def list_available_versions() -> str: | |
| """ | |
| List all available product versions | |
| Returns: | |
| List of available products and versions | |
| """ | |
| db_manager = get_db_manager() | |
| versions = db_manager.list_available_versions() | |
| result = "Available product versions:\n" | |
| for product, version_list in versions.items(): | |
| result += f"\n{product.capitalize()}:\n" | |
| for version in version_list: | |
| result += f" - {version}\n" | |
| return result | |
| # Tool creation functions for agents to use | |
| def search_documentation_tool() -> StructuredTool: | |
| """Create a StructuredTool object for search_documentation""" | |
| return StructuredTool.from_function( | |
| func=search_documentation, | |
| name="search_documentation", | |
| description="Search technical documentation with flexible filtering by product and version", | |
| args_schema=SearchDocumentationInput | |
| ) | |
| def list_available_versions_tool() -> StructuredTool: | |
| """Create a StructuredTool object for list_available_versions""" | |
| return StructuredTool.from_function( | |
| func=list_available_versions, | |
| name="list_available_versions", | |
| description="List all available product versions in the documentation", | |
| args_schema=ListVersionsInput | |
| ) | |