|
|
from typing import List, Optional |
|
|
|
|
|
from fastapi import FastAPI, Query |
|
|
from pydantic import BaseModel, Field |
|
|
|
|
|
from utils import ( |
|
|
semantic_search, |
|
|
get_schemas, |
|
|
get_model_id, |
|
|
get_corpus_size, |
|
|
) |
|
|
|
|
|
|
|
|
app = FastAPI(title="Semantic Table Search API", version="1.0.0") |
|
|
|
|
|
|
|
|
class SearchRequest(BaseModel): |
|
|
"""Client request schema for semantic search. |
|
|
|
|
|
Attributes: |
|
|
- query: Natural language search text. |
|
|
- limit: Optional max number of results to return (fallback to query param |
|
|
when not provided). Included here to allow sending within body when |
|
|
clients prefer JSON-only interactions. |
|
|
""" |
|
|
|
|
|
query: str = Field(..., min_length=1) |
|
|
limit: Optional[int] = Field(None, ge=1, le=50) |
|
|
|
|
|
|
|
|
class Match(BaseModel): |
|
|
"""A single search match result.""" |
|
|
|
|
|
score: float = Field(..., description="Cosine similarity score (-1 to 1)") |
|
|
text: str = Field(..., description="Matched table metadata text") |
|
|
index: int = Field(..., description="Stable index of the matched corpus entry") |
|
|
|
|
|
|
|
|
class SearchResponse(BaseModel): |
|
|
"""Search results with basic service metadata.""" |
|
|
|
|
|
query: str |
|
|
results: List[Match] |
|
|
count: int |
|
|
limit: int |
|
|
|
|
|
|
|
|
@app.get("/health") |
|
|
def health() -> dict: |
|
|
"""Basic health check including corpus size. |
|
|
|
|
|
Returns a JSON indicating the service is up along with a few |
|
|
diagnostic fields. |
|
|
""" |
|
|
|
|
|
return { |
|
|
"status": "ok", |
|
|
"corpus_size": get_corpus_size(), |
|
|
"model": get_model_id(), |
|
|
} |
|
|
|
|
|
|
|
|
@app.get("/schemas") |
|
|
def schemas( |
|
|
include_columns: bool = Query(False, description="Include column metadata"), |
|
|
) -> List[dict]: |
|
|
"""List available table schemas and optional column metadata. |
|
|
|
|
|
Parameters: |
|
|
- include_columns: When true, return full schema definitions; otherwise a |
|
|
compact view containing table names and descriptions is returned. |
|
|
""" |
|
|
|
|
|
return get_schemas(include_columns=include_columns) |
|
|
|
|
|
|
|
|
@app.post("/search", response_model=SearchResponse) |
|
|
def search( |
|
|
body: SearchRequest, |
|
|
limit: int = Query(5, ge=1, le=50, description="Max number of results"), |
|
|
) -> SearchResponse: |
|
|
"""Perform a semantic search over table metadata and return ranked matches. |
|
|
|
|
|
The clients provide a natural language |
|
|
query and receive the most relevant tables with similarity |
|
|
scores and stable corpus indices. |
|
|
""" |
|
|
|
|
|
effective_limit = body.limit or limit |
|
|
results = semantic_search(body.query, top_k=effective_limit) |
|
|
matches = [ |
|
|
{"score": score, "text": text, "index": idx} for score, text, idx in results |
|
|
] |
|
|
return SearchResponse( |
|
|
query=body.query, results=matches, count=len(matches), limit=effective_limit |
|
|
) |
|
|
|