Spaces:
Sleeping
Sleeping
File size: 4,794 Bytes
f1b19d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
"""
RAG Search Tool - Semantic search using vector embeddings
"""
import logging
from typing import Dict, Any, List
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.rag_utils import semantic_search, create_rag_store
logger = logging.getLogger(__name__)
def search_documents(query: str, documents: List[str], top_k: int = 3) -> Dict[str, Any]:
"""
Perform semantic search on a collection of documents.
Args:
query: Search query string
documents: List of document strings to search
top_k: Number of top results to return
Returns:
Dictionary containing search results with scores
"""
try:
if not query or not query.strip():
raise ValueError("Query cannot be empty")
if not documents or len(documents) == 0:
raise ValueError("Documents list cannot be empty")
# Perform semantic search
results = semantic_search(query, documents, top_k)
return {
"query": query,
"total_documents": len(documents),
"returned_results": len(results),
"results": results
}
except Exception as e:
logger.error(f"Error performing RAG search: {e}")
raise
def build_knowledge_base(documents: List[str]) -> Dict[str, Any]:
"""
Build a knowledge base from documents for later querying.
Args:
documents: List of documents to index
Returns:
Dictionary with knowledge base info
"""
try:
if not documents:
raise ValueError("Documents list cannot be empty")
# Create RAG store
store = create_rag_store(documents)
return {
"success": True,
"document_count": len(documents),
"message": "Knowledge base built successfully",
"store": store # In a real scenario, this would be persisted
}
except Exception as e:
logger.error(f"Error building knowledge base: {e}")
raise
def multi_query_search(queries: List[str], documents: List[str], top_k: int = 3) -> Dict[str, Any]:
"""
Perform multiple searches with different queries on the same document set.
Args:
queries: List of query strings
documents: List of documents to search
top_k: Number of results per query
Returns:
Dictionary with results for each query
"""
try:
if not queries or not documents:
raise ValueError("Both queries and documents must be provided")
# Build store once for efficiency
store = create_rag_store(documents)
all_results = {}
for idx, query in enumerate(queries):
try:
results = store.search(query, top_k)
all_results[f"query_{idx+1}"] = {
"query": query,
"results": results
}
except Exception as e:
logger.error(f"Error searching query {idx+1}: {e}")
all_results[f"query_{idx+1}"] = {
"query": query,
"error": str(e),
"results": []
}
return {
"total_queries": len(queries),
"total_documents": len(documents),
"results": all_results
}
except Exception as e:
logger.error(f"Error in multi-query search: {e}")
raise
def find_similar_documents(target_doc: str, documents: List[str], top_k: int = 5) -> Dict[str, Any]:
"""
Find documents similar to a target document.
Args:
target_doc: The document to find similar ones for
documents: Corpus of documents to search
top_k: Number of similar documents to return
Returns:
Dictionary with similar documents
"""
try:
if not target_doc or not documents:
raise ValueError("Target document and documents list must be provided")
# Use target doc as query
results = semantic_search(target_doc, documents, top_k)
return {
"target_document": target_doc[:200] + "..." if len(target_doc) > 200 else target_doc,
"corpus_size": len(documents),
"similar_documents": results
}
except Exception as e:
logger.error(f"Error finding similar documents: {e}")
raise
|