File size: 4,794 Bytes
f1b19d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""

RAG Search Tool - Semantic search using vector embeddings

"""
import logging
from typing import Dict, Any, List
import sys
import os

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from utils.rag_utils import semantic_search, create_rag_store

logger = logging.getLogger(__name__)


def search_documents(query: str, documents: List[str], top_k: int = 3) -> Dict[str, Any]:
    """

    Perform semantic search on a collection of documents.

    

    Args:

        query: Search query string

        documents: List of document strings to search

        top_k: Number of top results to return

        

    Returns:

        Dictionary containing search results with scores

    """
    try:
        if not query or not query.strip():
            raise ValueError("Query cannot be empty")
        
        if not documents or len(documents) == 0:
            raise ValueError("Documents list cannot be empty")
        
        # Perform semantic search
        results = semantic_search(query, documents, top_k)
        
        return {
            "query": query,
            "total_documents": len(documents),
            "returned_results": len(results),
            "results": results
        }
        
    except Exception as e:
        logger.error(f"Error performing RAG search: {e}")
        raise


def build_knowledge_base(documents: List[str]) -> Dict[str, Any]:
    """

    Build a knowledge base from documents for later querying.

    

    Args:

        documents: List of documents to index

        

    Returns:

        Dictionary with knowledge base info

    """
    try:
        if not documents:
            raise ValueError("Documents list cannot be empty")
        
        # Create RAG store
        store = create_rag_store(documents)
        
        return {
            "success": True,
            "document_count": len(documents),
            "message": "Knowledge base built successfully",
            "store": store  # In a real scenario, this would be persisted
        }
        
    except Exception as e:
        logger.error(f"Error building knowledge base: {e}")
        raise


def multi_query_search(queries: List[str], documents: List[str], top_k: int = 3) -> Dict[str, Any]:
    """

    Perform multiple searches with different queries on the same document set.

    

    Args:

        queries: List of query strings

        documents: List of documents to search

        top_k: Number of results per query

        

    Returns:

        Dictionary with results for each query

    """
    try:
        if not queries or not documents:
            raise ValueError("Both queries and documents must be provided")
        
        # Build store once for efficiency
        store = create_rag_store(documents)
        
        all_results = {}
        for idx, query in enumerate(queries):
            try:
                results = store.search(query, top_k)
                all_results[f"query_{idx+1}"] = {
                    "query": query,
                    "results": results
                }
            except Exception as e:
                logger.error(f"Error searching query {idx+1}: {e}")
                all_results[f"query_{idx+1}"] = {
                    "query": query,
                    "error": str(e),
                    "results": []
                }
        
        return {
            "total_queries": len(queries),
            "total_documents": len(documents),
            "results": all_results
        }
        
    except Exception as e:
        logger.error(f"Error in multi-query search: {e}")
        raise


def find_similar_documents(target_doc: str, documents: List[str], top_k: int = 5) -> Dict[str, Any]:
    """

    Find documents similar to a target document.

    

    Args:

        target_doc: The document to find similar ones for

        documents: Corpus of documents to search

        top_k: Number of similar documents to return

        

    Returns:

        Dictionary with similar documents

    """
    try:
        if not target_doc or not documents:
            raise ValueError("Target document and documents list must be provided")
        
        # Use target doc as query
        results = semantic_search(target_doc, documents, top_k)
        
        return {
            "target_document": target_doc[:200] + "..." if len(target_doc) > 200 else target_doc,
            "corpus_size": len(documents),
            "similar_documents": results
        }
        
    except Exception as e:
        logger.error(f"Error finding similar documents: {e}")
        raise