File size: 2,456 Bytes
abd4352 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | """
tests/unit/test_schema_compressor.py
Tests for schema text compression used for vector embedding.
"""
import json
import pytest
from schema.compressor import compress_table_schema
@pytest.mark.unit
class TestCompressTableSchema:
def test_basic_output_is_json(self):
table = {
"table": "orders",
"columns": [{"name": "id", "type": "integer"}, {"name": "amount", "type": "numeric"}],
"row_count": 100,
}
result = compress_table_schema(table)
parsed = json.loads(result)
assert parsed["table"] == "orders"
def test_contains_table_name(self):
table = {"table": "customers", "columns": [], "row_count": 0}
result = compress_table_schema(table)
assert "customers" in result
def test_contains_column_names(self):
table = {
"table": "orders",
"columns": [
{"name": "order_id", "type": "integer"},
{"name": "total_amount", "type": "numeric"},
],
"row_count": 500,
}
result = compress_table_schema(table)
assert "order_id" in result
assert "total_amount" in result
def test_contains_row_count(self):
table = {"table": "events", "columns": [], "row_count": 9999}
result = compress_table_schema(table)
assert "9999" in result
def test_no_row_count_ok(self):
table = {"table": "logs", "columns": [], "row_count": None}
result = compress_table_schema(table)
parsed = json.loads(result)
assert parsed["table"] == "logs"
def test_parsed_columns_round_trip(self):
columns = [
{"name": "user_id", "type": "uuid"},
{"name": "email", "type": "text"},
]
table = {"table": "users", "columns": columns, "row_count": 42}
parsed = json.loads(compress_table_schema(table))
assert parsed["columns"] == columns
def test_unknown_table_name(self):
"""Should not crash on missing table key."""
table = {"columns": [], "row_count": 0}
result = compress_table_schema(table)
assert "unknown" in result
def test_text_field_present(self):
table = {"table": "sales", "columns": [{"name": "amount", "type": "numeric"}], "row_count": 10}
parsed = json.loads(compress_table_schema(table))
assert "text" in parsed
assert "sales" in parsed["text"]
|