from typing import List, Optional from fastapi import FastAPI, Query from pydantic import BaseModel, Field from utils import ( semantic_search, get_schemas, get_model_id, get_corpus_size, ) app = FastAPI(title="Semantic Table Search API", version="1.0.0") class SearchRequest(BaseModel): """Client request schema for semantic search. Attributes: - query: Natural language search text. - limit: Optional max number of results to return (fallback to query param when not provided). Included here to allow sending within body when clients prefer JSON-only interactions. """ query: str = Field(..., min_length=1) limit: Optional[int] = Field(None, ge=1, le=50) class Match(BaseModel): """A single search match result.""" score: float = Field(..., description="Cosine similarity score (-1 to 1)") text: str = Field(..., description="Matched table metadata text") index: int = Field(..., description="Stable index of the matched corpus entry") class SearchResponse(BaseModel): """Search results with basic service metadata.""" query: str results: List[Match] count: int limit: int @app.get("/health") def health() -> dict: """Basic health check including corpus size. Returns a JSON indicating the service is up along with a few diagnostic fields. """ return { "status": "ok", "corpus_size": get_corpus_size(), "model": get_model_id(), } @app.get("/schemas") def schemas( include_columns: bool = Query(False, description="Include column metadata"), ) -> List[dict]: """List available table schemas and optional column metadata. Parameters: - include_columns: When true, return full schema definitions; otherwise a compact view containing table names and descriptions is returned. """ return get_schemas(include_columns=include_columns) @app.post("/search", response_model=SearchResponse) def search( body: SearchRequest, limit: int = Query(5, ge=1, le=50, description="Max number of results"), ) -> SearchResponse: """Perform a semantic search over table metadata and return ranked matches. The clients provide a natural language query and receive the most relevant tables with similarity scores and stable corpus indices. """ effective_limit = body.limit or limit results = semantic_search(body.query, top_k=effective_limit) matches = [ {"score": score, "text": text, "index": idx} for score, text, idx in results ] return SearchResponse( query=body.query, results=matches, count=len(matches), limit=effective_limit )