text2sql / app.py
mabosaimi
feat: add routes with proper documentation
e0a827b
from typing import List, Optional
from fastapi import FastAPI, Query
from pydantic import BaseModel, Field
from utils import (
semantic_search,
get_schemas,
get_model_id,
get_corpus_size,
)
app = FastAPI(title="Semantic Table Search API", version="1.0.0")
class SearchRequest(BaseModel):
"""Client request schema for semantic search.
Attributes:
- query: Natural language search text.
- limit: Optional max number of results to return (fallback to query param
when not provided). Included here to allow sending within body when
clients prefer JSON-only interactions.
"""
query: str = Field(..., min_length=1)
limit: Optional[int] = Field(None, ge=1, le=50)
class Match(BaseModel):
"""A single search match result."""
score: float = Field(..., description="Cosine similarity score (-1 to 1)")
text: str = Field(..., description="Matched table metadata text")
index: int = Field(..., description="Stable index of the matched corpus entry")
class SearchResponse(BaseModel):
"""Search results with basic service metadata."""
query: str
results: List[Match]
count: int
limit: int
@app.get("/health")
def health() -> dict:
"""Basic health check including corpus size.
Returns a JSON indicating the service is up along with a few
diagnostic fields.
"""
return {
"status": "ok",
"corpus_size": get_corpus_size(),
"model": get_model_id(),
}
@app.get("/schemas")
def schemas(
include_columns: bool = Query(False, description="Include column metadata"),
) -> List[dict]:
"""List available table schemas and optional column metadata.
Parameters:
- include_columns: When true, return full schema definitions; otherwise a
compact view containing table names and descriptions is returned.
"""
return get_schemas(include_columns=include_columns)
@app.post("/search", response_model=SearchResponse)
def search(
body: SearchRequest,
limit: int = Query(5, ge=1, le=50, description="Max number of results"),
) -> SearchResponse:
"""Perform a semantic search over table metadata and return ranked matches.
The clients provide a natural language
query and receive the most relevant tables with similarity
scores and stable corpus indices.
"""
effective_limit = body.limit or limit
results = semantic_search(body.query, top_k=effective_limit)
matches = [
{"score": score, "text": text, "index": idx} for score, text, idx in results
]
return SearchResponse(
query=body.query, results=matches, count=len(matches), limit=effective_limit
)