Spaces:
Sleeping
Sleeping
File size: 5,317 Bytes
bb80caa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
"""
Document tools for searching and querying documentation
These are READ-ONLY tools used by the Document Reader agent.
"""
from typing import Optional
from langchain.agents import Tool
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field
from backend.chromadb_manager import ChromaDBManager
# Pydantic models for structured tool inputs
class SearchDocumentationInput(BaseModel):
"""Input model for searching documentation"""
query: str = Field(description="The search query")
product: Optional[str] = Field(None, description="Product name (harmony or chorus). If not specified, searches all products")
version: Optional[str] = Field(None, description="Product version (e.g., '1.2', '1.8'). If not specified, searches all versions")
class ListVersionsInput(BaseModel):
"""Input model for listing versions"""
# No parameters needed, but StructuredTool requires a model
# Initialize ChromaDB manager (singleton pattern)
_db_manager = None
def get_db_manager():
"""Get or create ChromaDB manager instance"""
global _db_manager
if _db_manager is None:
_db_manager = ChromaDBManager()
return _db_manager
def search_documentation(query: str, product: Optional[str] = None, version: Optional[str] = None) -> str:
"""
Search documentation with flexible filtering
Args:
query: Search query
product: Optional product name (harmony or chorus)
version: Optional product version (e.g., '1.2', '1.8')
Returns:
Search results as formatted string with metadata
"""
db_manager = get_db_manager()
# Determine which search method to use based on parameters
if version is not None and product is not None:
# Search specific version of specific product
docs = db_manager.query_with_filter(query, product, version, k=5)
header = f"Results for '{query}' in {product} {version}:"
elif product is not None:
# Search all versions of specific product
docs = db_manager.query_product_all_versions(query, product, k=5)
header = f"Results for '{query}' across all {product} versions:"
else:
# Search across all products and versions
# Note: We'll need to search each product separately and combine results
all_docs = []
for prod in ["harmony", "chorus"]:
prod_docs = db_manager.query_product_all_versions(query, prod, k=3)
all_docs.extend(prod_docs)
docs = all_docs[:5] # Limit total results
header = f"Results for '{query}' across all products:"
if not docs:
return f"No results found for '{query}'"
results = [header]
metadata_summary = []
for i, doc in enumerate(docs, 1):
# Extract all metadata
metadata = doc.metadata
# Include version info if searching across versions
if version is None and 'version' in metadata:
version_info = f"[{metadata.get('product', 'unknown')} {metadata.get('version', 'unknown')}] "
else:
version_info = ""
# Add main content
results.append(f"\n{i}. {version_info}{doc.page_content[:500]}...")
# Collect metadata for summary
meta_info = {
'chunk': i,
'product': metadata.get('product', 'unknown'),
'version': metadata.get('version', 'unknown'),
'document': metadata.get('document', 'unknown'),
'page': metadata.get('page', 'unknown'),
'chunk_id': metadata.get('chunk_id', 'unknown')
}
metadata_summary.append(meta_info)
# Add metadata summary at the end
results.append("\n\n**Metadata of chunks retrieved:**")
for meta in metadata_summary:
results.append(f"- Chunk {meta['chunk']}: {meta['product']} v{meta['version']}, "
f"{meta['document']} (page {meta['page']})")
return "\n".join(results)
def list_available_versions() -> str:
"""
List all available product versions
Returns:
List of available products and versions
"""
db_manager = get_db_manager()
versions = db_manager.list_available_versions()
result = "Available product versions:\n"
for product, version_list in versions.items():
result += f"\n{product.capitalize()}:\n"
for version in version_list:
result += f" - {version}\n"
return result
# Tool creation functions for agents to use
def search_documentation_tool() -> StructuredTool:
"""Create a StructuredTool object for search_documentation"""
return StructuredTool.from_function(
func=search_documentation,
name="search_documentation",
description="Search technical documentation with flexible filtering by product and version",
args_schema=SearchDocumentationInput
)
def list_available_versions_tool() -> StructuredTool:
"""Create a StructuredTool object for list_available_versions"""
return StructuredTool.from_function(
func=list_available_versions,
name="list_available_versions",
description="List all available product versions in the documentation",
args_schema=ListVersionsInput
)
|