researchradar / src /api /routes_search.py
unknown
Add entity list API, author fetching, and search source decoupling
854be79
"""Search endpoint — RAG-powered question answering."""
import logging
from fastapi import APIRouter, Depends, Request
from src.api.deps import get_db, get_rag_engine
from src.api.models import SearchRequest, SearchResponse, SourcePaper
from src.api.rate_limit import search_limiter
from src.generation.rag_engine import RAGEngine
from src.storage.sqlite_db import SQLiteDB
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/search", tags=["search"])
@router.post("", response_model=SearchResponse)
def search(
request: Request,
req: SearchRequest,
engine: RAGEngine = Depends(get_rag_engine),
db: SQLiteDB = Depends(get_db),
):
"""Answer a research question using RAG over the paper corpus."""
search_limiter.check(request)
# Build metadata filter from optional year/venue constraints
where: dict | None = None
filters = {}
if req.year_min is not None:
filters["year"] = {"$gte": req.year_min}
if req.year_max is not None:
if "year" in filters:
# ChromaDB doesn't support compound $and on the same field in where,
# so we use $and at the top level
where = {"$and": [
{"year": {"$gte": req.year_min}},
{"year": {"$lte": req.year_max}},
]}
else:
filters["year"] = {"$lte": req.year_max}
if req.venue:
filters["venue"] = req.venue
if where is None and filters:
where = filters
response = engine.query(
question=req.query, top_k=5, source_top_k=20, where=where,
)
# Batch-fetch authors for all source papers
paper_ids = [s["paper_id"] for s in response.sources]
authors_map = db.get_authors_for_papers(paper_ids)
for s in response.sources:
s["authors"] = authors_map.get(s["paper_id"], [])
return SearchResponse(
answer=response.answer,
sources=[SourcePaper(**s) for s in response.sources],
model=response.model,
usage=response.usage,
)