v3_ai_assistant / py /tools /document_tools.py
Julian Vanecek
Initial commit: AI Assistant Multi-Agent System for HuggingFace Spaces
bb80caa
"""
Document tools for searching and querying documentation
These are READ-ONLY tools used by the Document Reader agent.
"""
from typing import Optional
from langchain.agents import Tool
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field
from backend.chromadb_manager import ChromaDBManager
# Pydantic models for structured tool inputs
class SearchDocumentationInput(BaseModel):
"""Input model for searching documentation"""
query: str = Field(description="The search query")
product: Optional[str] = Field(None, description="Product name (harmony or chorus). If not specified, searches all products")
version: Optional[str] = Field(None, description="Product version (e.g., '1.2', '1.8'). If not specified, searches all versions")
class ListVersionsInput(BaseModel):
"""Input model for listing versions"""
# No parameters needed, but StructuredTool requires a model
# Initialize ChromaDB manager (singleton pattern)
_db_manager = None
def get_db_manager():
"""Get or create ChromaDB manager instance"""
global _db_manager
if _db_manager is None:
_db_manager = ChromaDBManager()
return _db_manager
def search_documentation(query: str, product: Optional[str] = None, version: Optional[str] = None) -> str:
"""
Search documentation with flexible filtering
Args:
query: Search query
product: Optional product name (harmony or chorus)
version: Optional product version (e.g., '1.2', '1.8')
Returns:
Search results as formatted string with metadata
"""
db_manager = get_db_manager()
# Determine which search method to use based on parameters
if version is not None and product is not None:
# Search specific version of specific product
docs = db_manager.query_with_filter(query, product, version, k=5)
header = f"Results for '{query}' in {product} {version}:"
elif product is not None:
# Search all versions of specific product
docs = db_manager.query_product_all_versions(query, product, k=5)
header = f"Results for '{query}' across all {product} versions:"
else:
# Search across all products and versions
# Note: We'll need to search each product separately and combine results
all_docs = []
for prod in ["harmony", "chorus"]:
prod_docs = db_manager.query_product_all_versions(query, prod, k=3)
all_docs.extend(prod_docs)
docs = all_docs[:5] # Limit total results
header = f"Results for '{query}' across all products:"
if not docs:
return f"No results found for '{query}'"
results = [header]
metadata_summary = []
for i, doc in enumerate(docs, 1):
# Extract all metadata
metadata = doc.metadata
# Include version info if searching across versions
if version is None and 'version' in metadata:
version_info = f"[{metadata.get('product', 'unknown')} {metadata.get('version', 'unknown')}] "
else:
version_info = ""
# Add main content
results.append(f"\n{i}. {version_info}{doc.page_content[:500]}...")
# Collect metadata for summary
meta_info = {
'chunk': i,
'product': metadata.get('product', 'unknown'),
'version': metadata.get('version', 'unknown'),
'document': metadata.get('document', 'unknown'),
'page': metadata.get('page', 'unknown'),
'chunk_id': metadata.get('chunk_id', 'unknown')
}
metadata_summary.append(meta_info)
# Add metadata summary at the end
results.append("\n\n**Metadata of chunks retrieved:**")
for meta in metadata_summary:
results.append(f"- Chunk {meta['chunk']}: {meta['product']} v{meta['version']}, "
f"{meta['document']} (page {meta['page']})")
return "\n".join(results)
def list_available_versions() -> str:
"""
List all available product versions
Returns:
List of available products and versions
"""
db_manager = get_db_manager()
versions = db_manager.list_available_versions()
result = "Available product versions:\n"
for product, version_list in versions.items():
result += f"\n{product.capitalize()}:\n"
for version in version_list:
result += f" - {version}\n"
return result
# Tool creation functions for agents to use
def search_documentation_tool() -> StructuredTool:
"""Create a StructuredTool object for search_documentation"""
return StructuredTool.from_function(
func=search_documentation,
name="search_documentation",
description="Search technical documentation with flexible filtering by product and version",
args_schema=SearchDocumentationInput
)
def list_available_versions_tool() -> StructuredTool:
"""Create a StructuredTool object for list_available_versions"""
return StructuredTool.from_function(
func=list_available_versions,
name="list_available_versions",
description="List all available product versions in the documentation",
args_schema=ListVersionsInput
)