""" RAG Search Tool - Semantic search using vector embeddings """ import logging from typing import Dict, Any, List import sys import os # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils.rag_utils import semantic_search, create_rag_store logger = logging.getLogger(__name__) def search_documents(query: str, documents: List[str], top_k: int = 3) -> Dict[str, Any]: """ Perform semantic search on a collection of documents. Args: query: Search query string documents: List of document strings to search top_k: Number of top results to return Returns: Dictionary containing search results with scores """ try: if not query or not query.strip(): raise ValueError("Query cannot be empty") if not documents or len(documents) == 0: raise ValueError("Documents list cannot be empty") # Perform semantic search results = semantic_search(query, documents, top_k) return { "query": query, "total_documents": len(documents), "returned_results": len(results), "results": results } except Exception as e: logger.error(f"Error performing RAG search: {e}") raise def build_knowledge_base(documents: List[str]) -> Dict[str, Any]: """ Build a knowledge base from documents for later querying. Args: documents: List of documents to index Returns: Dictionary with knowledge base info """ try: if not documents: raise ValueError("Documents list cannot be empty") # Create RAG store store = create_rag_store(documents) return { "success": True, "document_count": len(documents), "message": "Knowledge base built successfully", "store": store # In a real scenario, this would be persisted } except Exception as e: logger.error(f"Error building knowledge base: {e}") raise def multi_query_search(queries: List[str], documents: List[str], top_k: int = 3) -> Dict[str, Any]: """ Perform multiple searches with different queries on the same document set. Args: queries: List of query strings documents: List of documents to search top_k: Number of results per query Returns: Dictionary with results for each query """ try: if not queries or not documents: raise ValueError("Both queries and documents must be provided") # Build store once for efficiency store = create_rag_store(documents) all_results = {} for idx, query in enumerate(queries): try: results = store.search(query, top_k) all_results[f"query_{idx+1}"] = { "query": query, "results": results } except Exception as e: logger.error(f"Error searching query {idx+1}: {e}") all_results[f"query_{idx+1}"] = { "query": query, "error": str(e), "results": [] } return { "total_queries": len(queries), "total_documents": len(documents), "results": all_results } except Exception as e: logger.error(f"Error in multi-query search: {e}") raise def find_similar_documents(target_doc: str, documents: List[str], top_k: int = 5) -> Dict[str, Any]: """ Find documents similar to a target document. Args: target_doc: The document to find similar ones for documents: Corpus of documents to search top_k: Number of similar documents to return Returns: Dictionary with similar documents """ try: if not target_doc or not documents: raise ValueError("Target document and documents list must be provided") # Use target doc as query results = semantic_search(target_doc, documents, top_k) return { "target_document": target_doc[:200] + "..." if len(target_doc) > 200 else target_doc, "corpus_size": len(documents), "similar_documents": results } except Exception as e: logger.error(f"Error finding similar documents: {e}") raise