""" tests/unit/test_schema_compressor.py Tests for schema text compression used for vector embedding. """ import json import pytest from schema.compressor import compress_table_schema @pytest.mark.unit class TestCompressTableSchema: def test_basic_output_is_json(self): table = { "table": "orders", "columns": [{"name": "id", "type": "integer"}, {"name": "amount", "type": "numeric"}], "row_count": 100, } result = compress_table_schema(table) parsed = json.loads(result) assert parsed["table"] == "orders" def test_contains_table_name(self): table = {"table": "customers", "columns": [], "row_count": 0} result = compress_table_schema(table) assert "customers" in result def test_contains_column_names(self): table = { "table": "orders", "columns": [ {"name": "order_id", "type": "integer"}, {"name": "total_amount", "type": "numeric"}, ], "row_count": 500, } result = compress_table_schema(table) assert "order_id" in result assert "total_amount" in result def test_contains_row_count(self): table = {"table": "events", "columns": [], "row_count": 9999} result = compress_table_schema(table) assert "9999" in result def test_no_row_count_ok(self): table = {"table": "logs", "columns": [], "row_count": None} result = compress_table_schema(table) parsed = json.loads(result) assert parsed["table"] == "logs" def test_parsed_columns_round_trip(self): columns = [ {"name": "user_id", "type": "uuid"}, {"name": "email", "type": "text"}, ] table = {"table": "users", "columns": columns, "row_count": 42} parsed = json.loads(compress_table_schema(table)) assert parsed["columns"] == columns def test_unknown_table_name(self): """Should not crash on missing table key.""" table = {"columns": [], "row_count": 0} result = compress_table_schema(table) assert "unknown" in result def test_text_field_present(self): table = {"table": "sales", "columns": [{"name": "amount", "type": "numeric"}], "row_count": 10} parsed = json.loads(compress_table_schema(table)) assert "text" in parsed assert "sales" in parsed["text"]