File size: 1,095 Bytes
abd4352 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | """
schema/compressor.py
Compresses a table schema dict into a compact JSON string for embedding.
Includes sample values if available to improve retrieval accuracy.
"""
import json
from typing import Any, Dict
def compress_table_schema(table: Dict[str, Any]) -> str:
"""
Convert a table schema dict into a compact text string suitable
for embedding and semantic search.
Input format:
{
"table": "orders",
"columns": [{"name": "order_id", "type": "integer"}, ...],
"row_count": 12345
}
"""
tname = table.get("table", "unknown")
columns = table.get("columns", [])
row_count = table.get("row_count")
col_strs = [f"{c['name']} ({c.get('type', 'unknown')})" for c in columns]
col_text = ", ".join(col_strs)
text = f"Table: {tname} | Columns: {col_text}"
if row_count is not None:
text += f" | Rows: {row_count}"
# Also return as JSON for structured re-parsing
return json.dumps({
"table": tname,
"columns": columns,
"row_count": row_count,
"text": text,
})
|