Agentic-Service-Data-Eyond-Catalog

Running

sofhiaazzhr Claude Sonnet 4.6 commited on 16 days ago

Commit

df220ea

1 Parent(s): f31f673

[KM-553] migrate retrieval layer and remove obsolete rag/tools modules

- Replace src/rag/ with src/retrieval/: implement DocumentRetriever (MMR/cosine/euclidean/manhattan), simplified RetrievalRouter (unstructured-only, no schema leg, Redis cache preserved), and shared RetrievalResult/BaseRetriever base
- Remove src/tools/ (orphaned LangChain @tool wrapper, never called by production code)
- Update RetrievalResult imports in chat.py, query/base.py, query/executors/db_executor.py, query/executors/tabular.py, query/query_executor.py from src.rag.base to src.retrieval.base
- Wire chat.py to new retrieval_router (aliased as retriever, no call-site changes)
- Delete dead stubs: src/query/service.py, src/models/user_info.py, src/pipeline/document_pipeline.py (flat, shadowed by subfolder)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (20) hide show

src/api/v1/chat.py +2 -2
src/models/user_info.py +0 -15
src/pipeline/document_pipeline.py +0 -11
src/query/base.py +1 -1
src/query/executors/db_executor.py +1 -1
src/query/executors/tabular.py +1 -1
src/query/query_executor.py +1 -1
src/query/service.py +0 -15
src/rag/__init__.py +0 -0
src/rag/retriever.py +0 -46
src/rag/retrievers/__init__.py +0 -0
src/rag/retrievers/baseline.py +0 -76
src/rag/retrievers/document.py +0 -158
src/rag/retrievers/schema.py +0 -411
src/rag/router.py +0 -179
src/{rag → retrieval}/base.py +1 -1
src/retrieval/document.py +154 -7
src/retrieval/router.py +78 -6
src/tools/__init__.py +0 -0
src/tools/search.py +0 -46

src/api/v1/chat.py CHANGED Viewed

@@ -8,8 +8,8 @@ from src.db.postgres.connection import get_db
 from src.db.postgres.models import ChatMessage, MessageSource
 from src.agents.orchestration import orchestrator
 from src.agents.chatbot import chatbot
-from src.rag.retriever import retriever
-from src.rag.base import RetrievalResult
 from src.query.query_executor import query_executor
 from src.query.base import QueryResult
 from src.db.redis.connection import get_redis

 from src.db.postgres.models import ChatMessage, MessageSource
 from src.agents.orchestration import orchestrator
 from src.agents.chatbot import chatbot
+from src.retrieval.router import retrieval_router as retriever
+from src.retrieval.base import RetrievalResult
 from src.query.query_executor import query_executor
 from src.query.base import QueryResult
 from src.db.redis.connection import get_redis

src/models/user_info.py DELETED Viewed

@@ -1,15 +0,0 @@
-"""User info models for existing users.py."""
-from pydantic import BaseModel
-class UserCreate(BaseModel):
-    """User creation model."""
-    fullname: str
-    email: str
-    password: str
-    company: str | None = None
-    company_size: str | None = None
-    function: str | None = None
-    site: str | None = None
-    role: str | None = None

src/pipeline/document_pipeline.py DELETED Viewed

@@ -1,11 +0,0 @@
-"""DocumentPipeline — extract text, chunk, embed, ingest to PGVector.
-For unstructured sources (PDF / DOCX / TXT). Receives the working
-implementation from the previous pipeline/document_pipeline/document_pipeline.py
-during the cleanup phase.
-"""
-class DocumentPipeline:
-    async def run(self, document_id: str, user_id: str) -> None:
-        raise NotImplementedError

src/query/base.py CHANGED Viewed

@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
 from sqlalchemy.ext.asyncio import AsyncSession
-from src.rag.base import RetrievalResult
 @dataclass

 from sqlalchemy.ext.asyncio import AsyncSession
+from src.retrieval.base import RetrievalResult
 @dataclass

src/query/executors/db_executor.py CHANGED Viewed

@@ -31,7 +31,7 @@ from src.middlewares.logging import get_logger
 from src.models.sql_query import SQLQuery
 from src.pipeline.db_pipeline import db_pipeline_service
 from src.query.base import BaseExecutor, QueryResult
-from src.rag.base import RetrievalResult
 from src.utils.db_credential_encryption import decrypt_credentials_dict
 logger = get_logger("db_executor")

 from src.models.sql_query import SQLQuery
 from src.pipeline.db_pipeline import db_pipeline_service
 from src.query.base import BaseExecutor, QueryResult
+from src.retrieval.base import RetrievalResult
 from src.utils.db_credential_encryption import decrypt_credentials_dict
 logger = get_logger("db_executor")

src/query/executors/tabular.py CHANGED Viewed

@@ -22,7 +22,7 @@ from src.config.settings import settings
 from src.knowledge.parquet_service import download_parquet
 from src.middlewares.logging import get_logger
 from src.query.base import BaseExecutor, QueryResult
-from src.rag.base import RetrievalResult
 logger = get_logger("tabular_executor")

 from src.knowledge.parquet_service import download_parquet
 from src.middlewares.logging import get_logger
 from src.query.base import BaseExecutor, QueryResult
+from src.retrieval.base import RetrievalResult
 logger = get_logger("tabular_executor")

src/query/query_executor.py CHANGED Viewed

@@ -8,7 +8,7 @@ from src.middlewares.logging import get_logger
 from src.query.base import QueryResult
 from src.query.executors.db_executor import db_executor
 from src.query.executors.tabular import tabular_executor
-from src.rag.base import RetrievalResult
 logger = get_logger("query_executor")

 from src.query.base import QueryResult
 from src.query.executors.db_executor import db_executor
 from src.query.executors.tabular import tabular_executor
+from src.retrieval.base import RetrievalResult
 logger = get_logger("query_executor")

src/query/service.py DELETED Viewed

@@ -1,15 +0,0 @@
-"""QueryService — orchestrates plan → validate → compile → execute.
-Top-level entry point for catalog-driven structured queries. Wired into
-the chat endpoint when source_hint == "structured".
-"""
-from ..catalog.models import Catalog
-from .executor.base import QueryResult
-class QueryService:
-    """End-to-end runner for a user question against a catalog."""
-    async def run(self, user_id: str, question: str, catalog: Catalog) -> QueryResult:
-        raise NotImplementedError

src/rag/__init__.py DELETED Viewed

File without changes

src/rag/retriever.py DELETED Viewed

@@ -1,46 +0,0 @@
-"""Public retrieval API — thin wrapper around RetrievalRouter."""
-from sqlalchemy.ext.asyncio import AsyncSession
-from src.middlewares.logging import get_logger
-from src.rag.base import RetrievalResult
-from src.rag.retrievers.document import document_retriever
-from src.rag.retrievers.schema import schema_retriever
-from src.rag.router import RetrievalRouter, SourceHint
-logger = get_logger("retriever")
-class RetrieverService:
-    """Public retrieval service used by chat.py and search tools.
-    Delegates to RetrievalRouter which dispatches based on source_hint.
-    Returns RetrievalResult objects directly so downstream consumers
-    (db_executor, tabular_executor) can be fed without lossy dict
-    conversion. The `db` parameter is accepted for call-site compatibility
-    but currently unused — retrieval reads PGVector via _pgvector_engine
-    inside each retriever.
-    """
-    def __init__(self):
-        self._router = RetrievalRouter(
-            schema_retriever=schema_retriever,
-            document_retriever=document_retriever,
-        )
-    async def retrieve(
-        self,
-        query: str,
-        user_id: str,
-        db: AsyncSession,
-        k: int = 5,
-        source_hint: SourceHint = "both",
-    ) -> list[RetrievalResult]:
-        try:
-            return await self._router.retrieve(query, user_id, source_hint, k)
-        except Exception as e:
-            logger.error("retrieval failed", error=str(e))
-            return []
-retriever = RetrieverService()

src/rag/retrievers/__init__.py DELETED Viewed

File without changes

src/rag/retrievers/baseline.py DELETED Viewed

@@ -1,76 +0,0 @@
-"""Service for retrieving relevant documents from vector store."""
-import hashlib
-import json
-from src.db.postgres.vector_store import get_vector_store
-from src.db.redis.connection import get_redis
-from sqlalchemy.ext.asyncio import AsyncSession
-from src.middlewares.logging import get_logger
-from typing import List, Dict, Any
-logger = get_logger("retriever")
-_RETRIEVAL_CACHE_TTL = 3600  # 1 hour
-class BaselineRetrieverService:
-    """Baseline (pre-Phase-1) retriever — preserved for benchmark comparison.
-    Renamed from RetrieverService so it doesn't shadow the production wrapper
-    at src/rag/retriever.py. Production code imports from src.rag.retriever;
-    benchmark scripts that want this baseline must import explicitly from
-    src.rag.retrievers.baseline.
-    """
-    def __init__(self):
-        self.vector_store = get_vector_store()
-    async def retrieve(
-        self,
-        query: str,
-        user_id: str,
-        db: AsyncSession,
-        k: int = 5
-    ) -> List[Dict[str, Any]]:
-        """Retrieve relevant chunks for a query, scoped to the user's documents.
-        Returns:
-            List of dicts with keys: content, metadata
-            metadata includes: document_id, user_id, filename, chunk_index, page_label (if PDF)
-        """
-        try:
-            redis = await get_redis()
-            query_hash = hashlib.md5(query.encode()).hexdigest()
-            cache_key = f"retrieval:{user_id}:{query_hash}:{k}"
-            cached = await redis.get(cache_key)
-            if cached:
-                logger.info("Returning cached retrieval results")
-                return json.loads(cached)
-            logger.info(f"Retrieving for user {user_id}, query: {query[:50]}...")
-            docs = await self.vector_store.asimilarity_search(
-                query=query,
-                k=k,
-                filter={"user_id": user_id}
-            )
-            results = [
-                {
-                    "content": doc.page_content,
-                    "metadata": doc.metadata,
-                }
-                for doc in docs
-            ]
-            logger.info(f"Retrieved {len(results)} chunks")
-            await redis.setex(cache_key, _RETRIEVAL_CACHE_TTL, json.dumps(results))
-            return results
-        except Exception as e:
-            logger.error("Retrieval failed", error=str(e))
-            return []
-baseline_retriever = BaselineRetrieverService()

src/rag/retrievers/document.py DELETED Viewed

@@ -1,158 +0,0 @@
-"""Document retriever — handles PDF, DOCX, TXT chunks (source_type="document", non-tabular)."""
-import math
-from langchain_postgres import PGVector
-from langchain_postgres.vectorstores import DistanceStrategy
-from langchain_openai import AzureOpenAIEmbeddings
-from sqlalchemy import text
-from src.config.settings import settings
-from src.db.postgres.connection import _pgvector_engine
-from src.db.postgres.vector_store import get_vector_store
-from src.middlewares.logging import get_logger
-from src.rag.base import BaseRetriever, RetrievalResult
-logger = get_logger("document_retriever")
-# Change this one line to switch retrieval method
-# Options: "mmr" | "cosine" | "euclidean" | "inner_product" | "manhattan"
-_RETRIEVAL_METHOD = "mmr"
-_TABULAR_TYPES = {"csv", "xlsx"}
-_FETCH_K = 20
-_LAMBDA_MULT = 0.5
-_COLLECTION_NAME = "document_embeddings"
-_embeddings = AzureOpenAIEmbeddings(
-    azure_deployment=settings.azureai_deployment_name_embedding,
-    openai_api_version=settings.azureai_api_version_embedding,
-    azure_endpoint=settings.azureai_endpoint_url_embedding,
-    api_key=settings.azureai_api_key_embedding,
-)
-_euclidean_store = PGVector(
-    embeddings=_embeddings,
-    connection=_pgvector_engine,
-    collection_name=_COLLECTION_NAME,
-    distance_strategy=DistanceStrategy.EUCLIDEAN,
-    use_jsonb=True,
-    async_mode=True,
-    create_extension=False,
-)
-_ip_store = PGVector(
-    embeddings=_embeddings,
-    connection=_pgvector_engine,
-    collection_name=_COLLECTION_NAME,
-    distance_strategy=DistanceStrategy.MAX_INNER_PRODUCT,
-    use_jsonb=True,
-    async_mode=True,
-    create_extension=False,
-)
-_MANHATTAN_SQL = text("""
-    SELECT
-        lpe.document,
-        lpe.cmetadata,
-        lpe.embedding <+> CAST(:embedding AS vector) AS distance
-    FROM langchain_pg_embedding lpe
-    JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-    WHERE lpc.name = :collection
-      AND lpe.cmetadata->>'user_id' = :user_id
-      AND lpe.cmetadata->>'source_type' = 'document'
-    ORDER BY distance ASC
-    LIMIT :k
-""")
-class DocumentRetriever(BaseRetriever):
-    def __init__(self) -> None:
-        self.vector_store = get_vector_store()
-    async def retrieve(
-        self, query: str, user_id: str, k: int = 5
-    ) -> list[RetrievalResult]:
-        filter_ = {"user_id": user_id, "source_type": "document"}
-        fetch_k = k + len(_TABULAR_TYPES)
-        if _RETRIEVAL_METHOD == "manhattan":
-            return await self._retrieve_manhattan(query, user_id, k, fetch_k)
-        if _RETRIEVAL_METHOD == "mmr":
-            docs = await self.vector_store.amax_marginal_relevance_search(
-                query=query,
-                k=fetch_k,
-                fetch_k=_FETCH_K,
-                lambda_mult=_LAMBDA_MULT,
-                filter=filter_,
-            )
-            cosine = await self.vector_store.asimilarity_search_with_score(
-                query=query, k=fetch_k, filter=filter_,
-            )
-            score_map = {doc.page_content: score for doc, score in cosine}
-            docs_with_scores = [(doc, score_map.get(doc.page_content, 0.0)) for doc in docs]
-        elif _RETRIEVAL_METHOD == "euclidean":
-            docs_with_scores = await _euclidean_store.asimilarity_search_with_score(
-                query=query, k=fetch_k, filter=filter_,
-            )
-        elif _RETRIEVAL_METHOD == "inner_product":
-            docs_with_scores = await _ip_store.asimilarity_search_with_score(
-                query=query, k=fetch_k, filter=filter_,
-            )
-        else:  # cosine
-            docs_with_scores = await self.vector_store.asimilarity_search_with_score(
-                query=query, k=fetch_k, filter=filter_,
-            )
-        results = []
-        for doc, score in docs_with_scores:
-            file_type = doc.metadata.get("data", {}).get("file_type", "")
-            if file_type not in _TABULAR_TYPES:
-                results.append(RetrievalResult(
-                    content=doc.page_content,
-                    metadata=doc.metadata,
-                    score=score,
-                    source_type="document",
-                ))
-            if len(results) == k:
-                break
-        logger.info("retrieved chunks", method=_RETRIEVAL_METHOD, count=len(results))
-        return results
-    async def _retrieve_manhattan(
-        self, query: str, user_id: str, k: int, fetch_k: int
-    ) -> list[RetrievalResult]:
-        query_vector = await _embeddings.aembed_query(query)
-        if not all(math.isfinite(v) for v in query_vector):
-            raise ValueError("Embedding vector contains NaN or Infinity values.")
-        vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(_MANHATTAN_SQL, {
-                "embedding": vector_str,
-                "collection": _COLLECTION_NAME,
-                "user_id": user_id,
-                "k": fetch_k,
-            })
-            rows = result.fetchall()
-        results = []
-        for row in rows:
-            file_type = row.cmetadata.get("data", {}).get("file_type", "")
-            if file_type not in _TABULAR_TYPES:
-                results.append(RetrievalResult(
-                    content=row.document,
-                    metadata=row.cmetadata,
-                    score=float(row.distance),
-                    source_type="document",
-                ))
-            if len(results) == k:
-                break
-        logger.info("retrieved chunks", method="manhattan", count=len(results))
-        return results
-document_retriever = DocumentRetriever()

src/rag/retrievers/schema.py DELETED Viewed

@@ -1,411 +0,0 @@
-"""Schema retriever — handles DB schemas (source_type="database") and tabular file
-columns stored as source_type="document" with file_type in ("csv","xlsx").
-Strategy: hybrid_bm25 — RRF merge of dense cosine search (DB columns + DB tables
-+ tabular columns + tabular sheets) and PostgreSQL full-text search (DB columns only).
-Embeds the query once, fans out five legs in parallel.
-The DB-tables leg surfaces table-level summary chunks (chunk_level='table') as
-a recall signal for multi-table questions: when a relevant table's columns
-don't individually win on similarity, the table chunk can still pull the table
-into the hit set, where db_executor's downstream full-schema fetch picks up
-the per-column detail.
-FTS requires a GIN index on langchain_pg_embedding.document (created by init_db.py).
-"""
-import asyncio
-from sqlalchemy import text
-from src.db.postgres.connection import _pgvector_engine
-from src.db.postgres.vector_store import get_vector_store
-from src.middlewares.logging import get_logger
-from src.rag.base import BaseRetriever, RetrievalResult
-logger = get_logger("schema_retriever")
-_TABULAR_FILE_TYPES = ("csv", "xlsx")
-_TABLE_CHUNK_K_MULTIPLIER = 2  # how many table chunks to pull before RRF
-class SchemaRetriever(BaseRetriever):
-    def __init__(self):
-        self.vector_store = get_vector_store()
-    # ------------------------------------------------------------------
-    # Internal helpers
-    # ------------------------------------------------------------------
-    async def _embed_query(self, query: str) -> list[float]:
-        return await asyncio.to_thread(self.vector_store.embeddings.embed_query, query)
-    async def _search_db(
-        self, embedding: list[float], user_id: str, k: int
-    ) -> list[RetrievalResult]:
-        """Cosine vector search over database chunks."""
-        emb_str = "[" + ",".join(str(x) for x in embedding) + "]"
-        sql = text(f"""
-            SELECT lpe.document, lpe.cmetadata,
-                   1.0 - (lpe.embedding <=> '{emb_str}'::vector) AS score
-            FROM langchain_pg_embedding lpe
-            JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-            WHERE lpc.name = 'document_embeddings'
-              AND lpe.cmetadata->>'user_id' = :user_id
-              AND lpe.cmetadata->>'source_type' = 'database'
-              AND lpe.cmetadata->>'chunk_level' = 'column'
-            ORDER BY lpe.embedding <=> '{emb_str}'::vector ASC
-            LIMIT :k
-        """)
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(sql, {"user_id": user_id, "k": k * 4})
-            rows = result.fetchall()
-        return [
-            RetrievalResult(
-                content=row.document,
-                metadata=row.cmetadata,
-                score=float(row.score),
-                source_type="database",
-            )
-            for row in rows
-        ]
-    async def _search_db_tables(
-        self, embedding: list[float], user_id: str, k: int
-    ) -> list[RetrievalResult]:
-        """Cosine vector search over database TABLE-level chunks.
-        Recall channel for multi-table questions. The chunk's content is
-        discarded downstream — db_executor only consumes its `data.table_name`
-        to seed full-schema fetch.
-        """
-        emb_str = "[" + ",".join(str(x) for x in embedding) + "]"
-        sql = text(f"""
-            SELECT lpe.document, lpe.cmetadata,
-                   1.0 - (lpe.embedding <=> '{emb_str}'::vector) AS score
-            FROM langchain_pg_embedding lpe
-            JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-            WHERE lpc.name = 'document_embeddings'
-              AND lpe.cmetadata->>'user_id' = :user_id
-              AND lpe.cmetadata->>'source_type' = 'database'
-              AND lpe.cmetadata->>'chunk_level' = 'table'
-            ORDER BY lpe.embedding <=> '{emb_str}'::vector ASC
-            LIMIT :k
-        """)
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(
-                sql, {"user_id": user_id, "k": k * _TABLE_CHUNK_K_MULTIPLIER}
-            )
-            rows = result.fetchall()
-        return [
-            RetrievalResult(
-                content=row.document,
-                metadata=row.cmetadata,
-                score=float(row.score),
-                source_type="database",
-            )
-            for row in rows
-        ]
-    async def _search_tabular(
-        self, embedding: list[float], user_id: str, k: int
-    ) -> list[RetrievalResult]:
-        """Cosine vector search over tabular document chunks (csv/xlsx)."""
-        emb_str = "[" + ",".join(str(x) for x in embedding) + "]"
-        sql = text(f"""
-            SELECT lpe.document, lpe.cmetadata,
-                   1.0 - (lpe.embedding <=> '{emb_str}'::vector) AS score
-            FROM langchain_pg_embedding lpe
-            JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-            WHERE lpc.name = 'document_embeddings'
-              AND lpe.cmetadata->>'user_id' = :user_id
-              AND lpe.cmetadata->>'source_type' = 'document'
-              AND lpe.cmetadata->>'chunk_level' = 'column'
-              AND (lpe.cmetadata->'data'->>'file_type' = 'csv'
-                OR lpe.cmetadata->'data'->>'file_type' = 'xlsx')
-            ORDER BY lpe.embedding <=> '{emb_str}'::vector ASC
-            LIMIT :k
-        """)
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(sql, {"user_id": user_id, "k": k * 4})
-            rows = result.fetchall()
-        return [
-            RetrievalResult(
-                content=row.document,
-                metadata=row.cmetadata,
-                score=float(row.score),
-                source_type="document",
-            )
-            for row in rows
-        ]
-    async def _search_tabular_sheets(
-        self, embedding: list[float], user_id: str, k: int
-    ) -> list[RetrievalResult]:
-        """Leg 5: sheet-level summary chunks from CSV/XLSX files."""
-        emb_str = "[" + ",".join(str(x) for x in embedding) + "]"
-        sql = text(f"""
-            SELECT lpe.document, lpe.cmetadata,
-                   1.0 - (lpe.embedding <=> '{emb_str}'::vector) AS score
-            FROM langchain_pg_embedding lpe
-            JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-            WHERE lpc.name = 'document_embeddings'
-              AND lpe.cmetadata->>'user_id' = :user_id
-              AND lpe.cmetadata->>'source_type' = 'document'
-              AND lpe.cmetadata->>'chunk_level' = 'sheet'
-              AND (lpe.cmetadata->'data'->>'file_type' = 'csv'
-                OR lpe.cmetadata->'data'->>'file_type' = 'xlsx')
-            ORDER BY lpe.embedding <=> '{emb_str}'::vector ASC
-            LIMIT :k
-        """)
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(sql, {"user_id": user_id, "k": k})
-            rows = result.fetchall()
-        return [
-            RetrievalResult(
-                content=row.document,
-                metadata=row.cmetadata,
-                score=float(row.score),
-                source_type="document",
-            )
-            for row in rows
-        ]
-    async def _search_fts_db(self, query: str, user_id: str, k: int) -> list[RetrievalResult]:
-        """Full-text search over DB schema chunks using PostgreSQL tsvector."""
-        sql = text("""
-            SELECT lpe.document, lpe.cmetadata,
-                   ts_rank(to_tsvector('english', lpe.document),
-                           plainto_tsquery('english', :query)) AS rank
-            FROM langchain_pg_embedding lpe
-            JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
-            WHERE lpc.name = 'document_embeddings'
-              AND lpe.cmetadata->>'user_id' = :user_id
-              AND lpe.cmetadata->>'source_type' = 'database'
-              AND lpe.cmetadata->>'chunk_level' = 'column'
-              AND to_tsvector('english', lpe.document) @@ plainto_tsquery('english', :query)
-            ORDER BY rank DESC
-            LIMIT :k
-        """)
-        async with _pgvector_engine.connect() as conn:
-            result = await conn.execute(sql, {"query": query, "user_id": user_id, "k": k})
-            rows = result.fetchall()
-        return [
-            RetrievalResult(
-                content=row.document,
-                metadata=row.cmetadata,
-                score=float(row.rank),
-                source_type="database",
-            )
-            for row in rows
-        ]
-    def _rank_tabular_sheets(
-        self,
-        sheet_results: list[RetrievalResult],
-        column_results: list[RetrievalResult],
-        top_k: int,
-        k_rrf: int = 60,
-    ) -> list[RetrievalResult]:
-        """Rank tabular sheets by RRF across two voting legs:
-          L1 (primary): sheet-chunk cosine score
-          L2 (vote):    best column-chunk position per (doc_id, sheet_name)
-        Returns top-k sheet-level RetrievalResults. The full column list of
-        each sheet is already in the sheet chunk's data.column_names from
-        ingestion, so downstream tabular_executor can read full sheet context.
-        For sheets surfaced by column votes but missing a sheet chunk (rare —
-        ingestion always creates one), a minimal stub is returned and
-        tabular_executor falls back to reading columns from the parquet.
-        """
-        # L1: sheets indexed by (doc_id, sheet_name) from sheet chunks
-        sheet_index: dict[tuple, RetrievalResult] = {}
-        sheet_ranked: list[tuple] = []
-        for r in sheet_results:
-            d = r.metadata.get("data", {})
-            key = (d.get("document_id"), d.get("sheet_name"))
-            if key[0] and key not in sheet_index:
-                sheet_index[key] = r
-                sheet_ranked.append(key)
-        # L2: sheets ranked by first-appearance in column-chunk results
-        col_sheet_ranked: list[tuple] = []
-        seen: set[tuple] = set()
-        for r in column_results:
-            d = r.metadata.get("data", {})
-            key = (d.get("document_id"), d.get("sheet_name"))
-            if key[0] and key not in seen:
-                col_sheet_ranked.append(key)
-                seen.add(key)
-        # RRF over (doc_id, sheet_name) across the two legs
-        rrf_scores: dict[tuple, float] = {}
-        for ranked_list in [sheet_ranked, col_sheet_ranked]:
-            for rank, key in enumerate(ranked_list):
-                rrf_scores[key] = rrf_scores.get(key, 0.0) + 1.0 / (k_rrf + rank + 1)
-        top_sheets = sorted(rrf_scores, key=lambda k: rrf_scores[k], reverse=True)[:top_k]
-        results: list[RetrievalResult] = []
-        for key in top_sheets:
-            if key in sheet_index:
-                r = sheet_index[key]
-                r.score = rrf_scores[key]
-                results.append(r)
-            else:
-                # Surfaced by column votes only — build stub from a representative
-                # column result so tabular_executor can group correctly.
-                doc_id, sheet_name = key
-                rep = next(
-                    (r for r in column_results
-                     if r.metadata.get("data", {}).get("document_id") == doc_id
-                     and r.metadata.get("data", {}).get("sheet_name") == sheet_name),
-                    None,
-                )
-                if rep is None:
-                    continue
-                stub_data = dict(rep.metadata.get("data", {}))
-                stub_data.pop("column_name", None)
-                stub_data.pop("column_type", None)
-                results.append(RetrievalResult(
-                    content=f"Sheet: {stub_data.get('filename', '')}"
-                            + (f" / sheet: {sheet_name}" if sheet_name else ""),
-                    metadata={**rep.metadata, "data": stub_data, "chunk_level": "sheet"},
-                    score=rrf_scores[key],
-                    source_type="document",
-                ))
-        return results
-    def _rank_db_tables(
-        self,
-        tbl_results: list[RetrievalResult],
-        col_results: list[RetrievalResult],
-        fts_results: list[RetrievalResult],
-        top_k: int,
-        k_rrf: int = 60,
-    ) -> list[RetrievalResult]:
-        """Rank DB tables by RRF across three legs:
-          L1 (primary): table-summary chunk similarity
-          L2 (vote):    best column-chunk position per table
-          L3 (vote):    best FTS position per table
-        Returns top-k table-chunk RetrievalResults. For tables surfaced by
-        L2/L3 but missing a table chunk, a minimal stub is returned so that
-        db_executor._fetch_full_schema can seed off data.table_name.
-        """
-        # L1: tables ranked by table-chunk cosine score
-        tbl_index: dict[str, RetrievalResult] = {}
-        tbl_ranked: list[str] = []
-        for r in tbl_results:
-            tname = r.metadata.get("data", {}).get("table_name")
-            if tname and tname not in tbl_index:
-                tbl_index[tname] = r
-                tbl_ranked.append(tname)
-        # L2: tables ranked by first-appearance in column-chunk list (best col score)
-        col_table_ranked: list[str] = []
-        seen: set[str] = set()
-        for r in col_results:
-            tname = r.metadata.get("data", {}).get("table_name")
-            if tname and tname not in seen:
-                col_table_ranked.append(tname)
-                seen.add(tname)
-        # L3: tables ranked by first-appearance in FTS list
-        fts_table_ranked: list[str] = []
-        seen = set()
-        for r in fts_results:
-            tname = r.metadata.get("data", {}).get("table_name")
-            if tname and tname not in seen:
-                fts_table_ranked.append(tname)
-                seen.add(tname)
-        # RRF over table names across the three legs
-        rrf_scores: dict[str, float] = {}
-        for ranked_list in [tbl_ranked, col_table_ranked, fts_table_ranked]:
-            for rank, tname in enumerate(ranked_list):
-                rrf_scores[tname] = rrf_scores.get(tname, 0.0) + 1.0 / (k_rrf + rank + 1)
-        top_tables = sorted(rrf_scores, key=lambda t: rrf_scores[t], reverse=True)[:top_k]
-        results: list[RetrievalResult] = []
-        for tname in top_tables:
-            if tname in tbl_index:
-                r = tbl_index[tname]
-                r.score = rrf_scores[tname]
-                results.append(r)
-            else:
-                # Surfaced by column/FTS votes with no table chunk — minimal stub
-                results.append(RetrievalResult(
-                    content=f"Table: {tname}",
-                    metadata={"data": {"table_name": tname}, "source_type": "database"},
-                    score=rrf_scores[tname],
-                    source_type="database",
-                ))
-        return results
-    # ------------------------------------------------------------------
-    # Public interface — called by the router
-    # ------------------------------------------------------------------
-    async def retrieve(self, query: str, user_id: str, k: int = 5) -> list[RetrievalResult]:
-        """Table-first retrieval for DB sources; chunk-level for tabular.
-        DB tables are ranked via RRF across three legs:
-          L1 (primary): table-summary chunk similarity
-          L2 (vote): top-K column-chunk cosine, grouped by table
-          L3 (vote): top-K FTS column hits, grouped by table
-        db_executor downstream fetches the full per-column schema for the
-        ranked table set via _fetch_full_schema — the column chunks returned
-        here are intentionally NOT used as the schema source, only for voting.
-        Tabular (CSV/XLSX) sheets are ranked via RRF across two legs:
-          L1: sheet-chunk cosine
-          L2: column-chunk votes (best position per sheet)
-        Returns sheet-level RetrievalResults so tabular_executor receives
-        full sheet context (all columns) rather than fragmented column hits.
-        """
-        embedding = await self._embed_query(query)
-        db_col_results, db_tbl_results, tabular_results, fts_results, sheet_results = await asyncio.gather(
-            self._search_db(embedding, user_id, k),
-            self._search_db_tables(embedding, user_id, k),
-            self._search_tabular(embedding, user_id, k),
-            self._search_fts_db(query, user_id, k * 4),
-            self._search_tabular_sheets(embedding, user_id, k),
-        )
-        db_ranked = self._rank_db_tables(db_tbl_results, db_col_results, fts_results, top_k=k)
-        tabular_ranked = self._rank_tabular_sheets(sheet_results, tabular_results, top_k=k)
-        results = sorted(db_ranked + tabular_ranked, key=lambda r: r.score, reverse=True)
-        logger.info(
-            "schema retrieval",
-            count=len(results),
-            db_tables_ranked=len(db_ranked),
-            db_cols=len(db_col_results),
-            db_tables=len(db_tbl_results),
-            tabular_cols=len(tabular_results),
-            tabular_sheets=len(sheet_results),
-            tabular_ranked=len(tabular_ranked),
-            fts=len(fts_results),
-        )
-        return results
-schema_retriever = SchemaRetriever()

src/rag/router.py DELETED Viewed

@@ -1,179 +0,0 @@
-"""Routes retrieval requests to the appropriate retriever based on source_hint.
-Cross-retriever merging uses Reciprocal Rank Fusion (RRF) on per-retriever
-ranked lists — score scales differ across retrievers (RRF, cosine, distance)
-and aren't directly comparable, so we rank-merge instead of score-merge.
-"""
-import asyncio
-import hashlib
-import json
-from dataclasses import asdict
-from typing import Literal
-from src.db.redis.connection import get_redis
-from src.middlewares.logging import get_logger
-from src.rag.base import BaseRetriever, RetrievalResult
-logger = get_logger("retrieval_router")
-_CACHE_TTL = 3600  # 1 hour
-_CACHE_KEY_PREFIX = "retrieval"
-_RRF_K = 60  # standard RRF constant
-SourceHint = Literal["document", "schema", "both"]
-def _result_dedup_key(r: RetrievalResult) -> tuple:
-    """Cross-retriever dedup key — distinguishes DB columns vs DB tables vs
-    tabular columns vs prose chunks vs sheet-level chunks."""
-    data = r.metadata.get("data", {})
-    return (
-        r.source_type,
-        data.get("table_name"),
-        data.get("column_name"),
-        data.get("filename"),
-        data.get("sheet_name"),
-        data.get("chunk_index"),  # disambiguates multiple prose chunks per doc
-        r.metadata.get("chunk_level"),  # distinguishes sheet vs column chunks
-    )
-def _rrf_merge(
-    ranked_lists: list[list[RetrievalResult]],
-    top_k: int,
-    k_rrf: int = _RRF_K,
-) -> list[RetrievalResult]:
-    """Reciprocal Rank Fusion across retriever batches.
-    Each input list is treated as already best-first ordered. Items are
-    deduped via _result_dedup_key and re-ranked by aggregated reciprocal
-    rank across all lists. Score on the returned RetrievalResult is the
-    aggregated RRF score (uniform scale across legs).
-    """
-    scores: dict[tuple, float] = {}
-    index: dict[tuple, RetrievalResult] = {}
-    for ranked in ranked_lists:
-        for rank, result in enumerate(ranked):
-            key = _result_dedup_key(result)
-            scores[key] = scores.get(key, 0.0) + 1.0 / (k_rrf + rank + 1)
-            # Keep the first occurrence; metadata is identical for the same
-            # key across lists, so any copy is fine.
-            if key not in index:
-                index[key] = result
-    merged = sorted(index.values(), key=lambda r: scores[_result_dedup_key(r)], reverse=True)
-    # Overwrite score with RRF score so downstream consumers see a uniform scale.
-    for r in merged:
-        r.score = scores[_result_dedup_key(r)]
-    return merged[:top_k]
-async def invalidate_retrieval_cache(user_id: str) -> int:
-    """Delete every cached retrieval entry for `user_id`.
-    Called by ingest/upload/delete API handlers after a successful write so
-    the next retrieval picks up the new data instead of stale cached top-k.
-    Returns the number of keys removed.
-    """
-    redis = await get_redis()
-    pattern = f"{_CACHE_KEY_PREFIX}:{user_id}:*"
-    keys = [key async for key in redis.scan_iter(match=pattern)]
-    if not keys:
-        return 0
-    deleted = await redis.delete(*keys)
-    logger.info("retrieval cache invalidated", user_id=user_id, deleted=deleted)
-    return int(deleted)
-class RetrievalRouter:
-    def __init__(
-        self,
-        schema_retriever: BaseRetriever,
-        document_retriever: BaseRetriever,
-    ):
-        self._retrievers: dict[str, BaseRetriever] = {
-            "schema": schema_retriever,
-            "document": document_retriever,
-        }
-    def _route(self, source_hint: SourceHint) -> list[tuple[str, BaseRetriever]]:
-        if source_hint == "schema":
-            return [("schema", self._retrievers["schema"])]
-        if source_hint == "document":
-            return [("document", self._retrievers["document"])]
-        return list(self._retrievers.items())
-    async def retrieve(
-        self,
-        query: str,
-        user_id: str,
-        source_hint: SourceHint = "both",
-        k: int = 10,
-    ) -> list[RetrievalResult]:
-        redis = await get_redis()
-        query_hash = hashlib.md5(query.encode()).hexdigest()
-        cache_key = f"{_CACHE_KEY_PREFIX}:{user_id}:{source_hint}:{query_hash}:{k}"
-        cached = await redis.get(cache_key)
-        if cached:
-            try:
-                raw = json.loads(cached)
-                logger.info("returning cached retrieval results", source_hint=source_hint)
-                return [RetrievalResult(**r) for r in raw]
-            except Exception:
-                logger.warning("corrupted retrieval cache, fetching fresh", cache_key=cache_key)
-        results = await self._retrieve_uncached(query, user_id, source_hint, k)
-        # Empty-result fallback: orchestrator may have misclassified intent.
-        # Retry once with "both" before giving up. No-op when source_hint is
-        # already "both".
-        if not results and source_hint != "both":
-            logger.warning(
-                "empty retrieval, falling back to source_hint='both'",
-                original_source_hint=source_hint,
-            )
-            results = await self._retrieve_uncached(query, user_id, "both", k)
-        await redis.setex(
-            cache_key,
-            _CACHE_TTL,
-            json.dumps([asdict(r) for r in results]),
-        )
-        return results
-    async def _retrieve_uncached(
-        self,
-        query: str,
-        user_id: str,
-        source_hint: SourceHint,
-        k: int,
-    ) -> list[RetrievalResult]:
-        routed = self._route(source_hint)
-        batches = await asyncio.gather(
-            *[r.retrieve(query, user_id, k) for _, r in routed],
-            return_exceptions=True,
-        )
-        valid_lists: list[list[RetrievalResult]] = []
-        per_retriever: dict[str, int | str] = {}
-        for (name, _), batch in zip(routed, batches):
-            if isinstance(batch, Exception):
-                logger.error("retriever failed", retriever=name, error=str(batch))
-                per_retriever[name] = "error"
-                continue
-            valid_lists.append(batch)
-            per_retriever[name] = len(batch)
-        results = _rrf_merge(valid_lists, top_k=k)
-        logger.info(
-            "router result",
-            source_hint=source_hint,
-            per_retriever=per_retriever,
-            final_count=len(results),
-            top_score=results[0].score if results else None,
-            bottom_score=results[-1].score if results else None,
-        )
-        return results

src/{rag → retrieval}/base.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Shared contract for all retriever implementations."""
 from abc import ABC, abstractmethod
 from dataclasses import dataclass

+"""Shared types for the retrieval layer."""
 from abc import ABC, abstractmethod
 from dataclasses import dataclass

src/retrieval/document.py CHANGED Viewed

@@ -2,14 +2,161 @@
 For unstructured sources only (PDF / DOCX / TXT). Backed by PGVector with
 collection `document_embeddings`. Methods: MMR, cosine, euclidean, etc.
-Receives the working implementation from the previous src/rag/retrievers/document.py
-during the cleanup phase; for now this is a placeholder.
 """
-class DocumentRetriever:
-    """Dense retrieval over PGVector chunks for unstructured sources."""
-    async def retrieve(self, query: str, user_id: str, k: int = 5) -> list:
-        raise NotImplementedError

 For unstructured sources only (PDF / DOCX / TXT). Backed by PGVector with
 collection `document_embeddings`. Methods: MMR, cosine, euclidean, etc.
 """
+import math
+from langchain_postgres import PGVector
+from langchain_postgres.vectorstores import DistanceStrategy
+from langchain_openai import AzureOpenAIEmbeddings
+from sqlalchemy import text
+from src.config.settings import settings
+from src.db.postgres.connection import _pgvector_engine
+from src.db.postgres.vector_store import get_vector_store
+from src.middlewares.logging import get_logger
+from src.retrieval.base import BaseRetriever, RetrievalResult
+logger = get_logger("document_retriever")
+# Change this one line to switch retrieval method
+# Options: "mmr" | "cosine" | "euclidean" | "inner_product" | "manhattan"
+_RETRIEVAL_METHOD = "mmr"
+_TABULAR_TYPES = {"csv", "xlsx"}
+_FETCH_K = 20
+_LAMBDA_MULT = 0.5
+_COLLECTION_NAME = "document_embeddings"
+_embeddings = AzureOpenAIEmbeddings(
+    azure_deployment=settings.azureai_deployment_name_embedding,
+    openai_api_version=settings.azureai_api_version_embedding,
+    azure_endpoint=settings.azureai_endpoint_url_embedding,
+    api_key=settings.azureai_api_key_embedding,
+)
+_euclidean_store = PGVector(
+    embeddings=_embeddings,
+    connection=_pgvector_engine,
+    collection_name=_COLLECTION_NAME,
+    distance_strategy=DistanceStrategy.EUCLIDEAN,
+    use_jsonb=True,
+    async_mode=True,
+    create_extension=False,
+)
+_ip_store = PGVector(
+    embeddings=_embeddings,
+    connection=_pgvector_engine,
+    collection_name=_COLLECTION_NAME,
+    distance_strategy=DistanceStrategy.MAX_INNER_PRODUCT,
+    use_jsonb=True,
+    async_mode=True,
+    create_extension=False,
+)
+_MANHATTAN_SQL = text("""
+    SELECT
+        lpe.document,
+        lpe.cmetadata,
+        lpe.embedding <+> CAST(:embedding AS vector) AS distance
+    FROM langchain_pg_embedding lpe
+    JOIN langchain_pg_collection lpc ON lpe.collection_id = lpc.uuid
+    WHERE lpc.name = :collection
+      AND lpe.cmetadata->>'user_id' = :user_id
+      AND lpe.cmetadata->>'source_type' = 'document'
+    ORDER BY distance ASC
+    LIMIT :k
+""")
+class DocumentRetriever(BaseRetriever):
+    def __init__(self) -> None:
+        self.vector_store = get_vector_store()
+    async def retrieve(
+        self, query: str, user_id: str, k: int = 5
+    ) -> list[RetrievalResult]:
+        filter_ = {"user_id": user_id, "source_type": "document"}
+        fetch_k = k + len(_TABULAR_TYPES)
+        if _RETRIEVAL_METHOD == "manhattan":
+            return await self._retrieve_manhattan(query, user_id, k, fetch_k)
+        if _RETRIEVAL_METHOD == "mmr":
+            docs = await self.vector_store.amax_marginal_relevance_search(
+                query=query,
+                k=fetch_k,
+                fetch_k=_FETCH_K,
+                lambda_mult=_LAMBDA_MULT,
+                filter=filter_,
+            )
+            cosine = await self.vector_store.asimilarity_search_with_score(
+                query=query, k=fetch_k, filter=filter_,
+            )
+            score_map = {doc.page_content: score for doc, score in cosine}
+            docs_with_scores = [(doc, score_map.get(doc.page_content, 0.0)) for doc in docs]
+        elif _RETRIEVAL_METHOD == "euclidean":
+            docs_with_scores = await _euclidean_store.asimilarity_search_with_score(
+                query=query, k=fetch_k, filter=filter_,
+            )
+        elif _RETRIEVAL_METHOD == "inner_product":
+            docs_with_scores = await _ip_store.asimilarity_search_with_score(
+                query=query, k=fetch_k, filter=filter_,
+            )
+        else:  # cosine
+            docs_with_scores = await self.vector_store.asimilarity_search_with_score(
+                query=query, k=fetch_k, filter=filter_,
+            )
+        results = []
+        for doc, score in docs_with_scores:
+            file_type = doc.metadata.get("data", {}).get("file_type", "")
+            if file_type not in _TABULAR_TYPES:
+                results.append(RetrievalResult(
+                    content=doc.page_content,
+                    metadata=doc.metadata,
+                    score=score,
+                    source_type="document",
+                ))
+            if len(results) == k:
+                break
+        logger.info("retrieved chunks", method=_RETRIEVAL_METHOD, count=len(results))
+        return results
+    async def _retrieve_manhattan(
+        self, query: str, user_id: str, k: int, fetch_k: int
+    ) -> list[RetrievalResult]:
+        query_vector = await _embeddings.aembed_query(query)
+        if not all(math.isfinite(v) for v in query_vector):
+            raise ValueError("Embedding vector contains NaN or Infinity values.")
+        vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
+        async with _pgvector_engine.connect() as conn:
+            result = await conn.execute(_MANHATTAN_SQL, {
+                "embedding": vector_str,
+                "collection": _COLLECTION_NAME,
+                "user_id": user_id,
+                "k": fetch_k,
+            })
+            rows = result.fetchall()
+        results = []
+        for row in rows:
+            file_type = row.cmetadata.get("data", {}).get("file_type", "")
+            if file_type not in _TABULAR_TYPES:
+                results.append(RetrievalResult(
+                    content=row.document,
+                    metadata=row.cmetadata,
+                    score=float(row.distance),
+                    source_type="document",
+                ))
+            if len(results) == k:
+                break
+        logger.info("retrieved chunks", method="manhattan", count=len(results))
+        return results
+document_retriever = DocumentRetriever()

src/retrieval/router.py CHANGED Viewed

@@ -1,11 +1,83 @@
-"""Retrieval-side router.
-Currently dispatches only the `unstructured` route to DocumentRetriever.
-The `structured` route is owned by query/service.py — not by retrieval.
-The `chat` route bypasses retrieval entirely.
 """
 class RetrievalRouter:
-    async def dispatch(self, query: str, user_id: str, source_hint: str) -> list:
-        raise NotImplementedError

+"""Retrieval router — dispatches to DocumentRetriever for unstructured sources.
+Routing rules:
+  - unstructured / document / both → DocumentRetriever (PGVector, PDF/DOCX/TXT)
+  - structured / schema            → empty list; handled by query/service.py
+  - chat                           → empty list; bypasses retrieval entirely
+Exposes the same interface as the old src/rag/retriever.py so call sites in
+chat.py require no changes beyond the import path.
 """
+import hashlib
+import json
+from dataclasses import asdict
+from sqlalchemy.ext.asyncio import AsyncSession
+from src.db.redis.connection import get_redis
+from src.middlewares.logging import get_logger
+from src.retrieval.base import RetrievalResult
+from src.retrieval.document import document_retriever
+logger = get_logger("retrieval_router")
+_CACHE_TTL = 3600
+_CACHE_KEY_PREFIX = "retrieval"
+_UNSTRUCTURED_HINTS = frozenset({"document", "unstructured", "both"})
 class RetrievalRouter:
+    async def retrieve(
+        self,
+        query: str,
+        user_id: str,
+        db: AsyncSession,
+        k: int = 5,
+        source_hint: str = "both",
+    ) -> list[RetrievalResult]:
+        if source_hint not in _UNSTRUCTURED_HINTS:
+            return []
+        redis = await get_redis()
+        query_hash = hashlib.md5(query.encode()).hexdigest()
+        cache_key = f"{_CACHE_KEY_PREFIX}:{user_id}:{source_hint}:{query_hash}:{k}"
+        cached = await redis.get(cache_key)
+        if cached:
+            try:
+                raw = json.loads(cached)
+                logger.info("returning cached retrieval results", source_hint=source_hint)
+                return [RetrievalResult(**r) for r in raw]
+            except Exception:
+                logger.warning("corrupted retrieval cache, fetching fresh")
+        try:
+            results = await document_retriever.retrieve(query, user_id, k)
+        except Exception as e:
+            logger.error("retrieval failed", error=str(e))
+            return []
+        if not results and source_hint == "both":
+            logger.warning("empty retrieval result for source_hint='both'")
+        await redis.setex(
+            cache_key,
+            _CACHE_TTL,
+            json.dumps([asdict(r) for r in results]),
+        )
+        return results
+    async def invalidate_cache(self, user_id: str) -> int:
+        """Delete all cached retrieval entries for a user. Call after upload/delete."""
+        redis = await get_redis()
+        pattern = f"{_CACHE_KEY_PREFIX}:{user_id}:*"
+        keys = [key async for key in redis.scan_iter(match=pattern)]
+        if not keys:
+            return 0
+        deleted = await redis.delete(*keys)
+        logger.info("retrieval cache invalidated", user_id=user_id, deleted=deleted)
+        return int(deleted)
+retrieval_router = RetrievalRouter()

src/tools/__init__.py DELETED Viewed

File without changes

src/tools/search.py DELETED Viewed

@@ -1,46 +0,0 @@
-"""Search tool for agent."""
-from langchain_core.tools import tool
-from src.rag.retriever import retriever
-from sqlalchemy.ext.asyncio import AsyncSession
-from src.middlewares.logging import get_logger
-logger = get_logger("search_tool")
-@tool
-async def search_documents(
-    query: str,
-    user_id: str,
-    db: AsyncSession,
-    num_results: int = 5
-) -> str:
-    """Search user's uploaded documents for relevant information.
-    Args:
-        query: The search query or question
-        user_id: The user's ID
-        db: Database session
-        num_results: Number of results to return (default: 5)
-    Returns:
-        Relevant document excerpts with source and page information
-    """
-    try:
-        results = await retriever.retrieve(query, user_id, db, num_results)
-        if not results:
-            return "No relevant information found in the documents."
-        formatted_results = []
-        for result in results:
-            filename = result.metadata.get("filename", "Unknown")
-            page = result.metadata.get("page_label")
-            source_label = f"{filename}, p.{page}" if page else filename
-            formatted_results.append(f"[Source: {source_label}]\n{result.content}\n")
-        return "\n".join(formatted_results)
-    except Exception as e:
-        logger.error("Search failed", error=str(e))
-        return "Sorry, I encountered an error while searching the documents."