File size: 1,095 Bytes
abd4352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
schema/compressor.py
Compresses a table schema dict into a compact JSON string for embedding.
Includes sample values if available to improve retrieval accuracy.
"""

import json
from typing import Any, Dict


def compress_table_schema(table: Dict[str, Any]) -> str:
    """
    Convert a table schema dict into a compact text string suitable
    for embedding and semantic search.

    Input format:
      {
        "table": "orders",
        "columns": [{"name": "order_id", "type": "integer"}, ...],
        "row_count": 12345
      }
    """
    tname = table.get("table", "unknown")
    columns = table.get("columns", [])
    row_count = table.get("row_count")

    col_strs = [f"{c['name']} ({c.get('type', 'unknown')})" for c in columns]
    col_text = ", ".join(col_strs)

    text = f"Table: {tname} | Columns: {col_text}"
    if row_count is not None:
        text += f" | Rows: {row_count}"

    # Also return as JSON for structured re-parsing
    return json.dumps({
        "table": tname,
        "columns": columns,
        "row_count": row_count,
        "text": text,
    })