File size: 2,679 Bytes
e0a827b
048f0fe
e0a827b
 
048f0fe
e0a827b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
048f0fe
 
e0a827b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
048f0fe
e0a827b
 
 
 
048f0fe
e0a827b
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from typing import List, Optional

from fastapi import FastAPI, Query
from pydantic import BaseModel, Field

from utils import (
    semantic_search,
    get_schemas,
    get_model_id,
    get_corpus_size,
)


app = FastAPI(title="Semantic Table Search API", version="1.0.0")


class SearchRequest(BaseModel):
    """Client request schema for semantic search.

    Attributes:
    - query: Natural language search text.
    - limit: Optional max number of results to return (fallback to query param
      when not provided). Included here to allow sending within body when
      clients prefer JSON-only interactions.
    """

    query: str = Field(..., min_length=1)
    limit: Optional[int] = Field(None, ge=1, le=50)


class Match(BaseModel):
    """A single search match result."""

    score: float = Field(..., description="Cosine similarity score (-1 to 1)")
    text: str = Field(..., description="Matched table metadata text")
    index: int = Field(..., description="Stable index of the matched corpus entry")


class SearchResponse(BaseModel):
    """Search results with basic service metadata."""

    query: str
    results: List[Match]
    count: int
    limit: int


@app.get("/health")
def health() -> dict:
    """Basic health check including corpus size.

    Returns a JSON indicating the service is up along with a few
    diagnostic fields.
    """

    return {
        "status": "ok",
        "corpus_size": get_corpus_size(),
        "model": get_model_id(),
    }


@app.get("/schemas")
def schemas(
    include_columns: bool = Query(False, description="Include column metadata"),
) -> List[dict]:
    """List available table schemas and optional column metadata.

    Parameters:
    - include_columns: When true, return full schema definitions; otherwise a
        compact view containing table names and descriptions is returned.
    """

    return get_schemas(include_columns=include_columns)


@app.post("/search", response_model=SearchResponse)
def search(
    body: SearchRequest,
    limit: int = Query(5, ge=1, le=50, description="Max number of results"),
) -> SearchResponse:
    """Perform a semantic search over table metadata and return ranked matches.

    The clients provide a natural language
    query and receive the most relevant tables with similarity
    scores and stable corpus indices.
    """

    effective_limit = body.limit or limit
    results = semantic_search(body.query, top_k=effective_limit)
    matches = [
        {"score": score, "text": text, "index": idx} for score, text, idx in results
    ]
    return SearchResponse(
        query=body.query, results=matches, count=len(matches), limit=effective_limit
    )