File size: 7,559 Bytes
cc2ed2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | """Π’Π΅ΡΡΡ Π½Π° Π΅Π΄ΠΈΠ½ΡΠΉ SchemaProvider (ΡΠ°Π·Π΄Π΅Π» 4.2 Π°ΡΠ΄ΠΈΡΠ°).
ΠΠΎΠΊΡΡΠ²Π°ΡΡ ΠΎΠ±Π΅ ΡΠ΅Π°Π»ΠΈΠ·Π°ΡΠΈΠΈ: SpiderSchemaProvider (ΡΡΡΡΠΊΡΡΡΠ° PAUQ/Spider)
ΠΈ ConnectionSchemaProvider (ΠΏΡΠΎΠΈΠ·Π²ΠΎΠ»ΡΠ½ΠΎΠ΅ ΠΏΠΎΠ΄ΠΊΠ»ΡΡΠ΅Π½ΠΈΠ΅ ΠΊ SQLite-ΡΠ°ΠΉΠ»Ρ).
"""
import sqlite3
from pathlib import Path
import pytest
from src.data.schema_provider import (
ColumnSchema,
ConnectionSchemaProvider,
SpiderSchemaProvider,
TableSchema,
render_tables,
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Π€ΠΈΠΊΡΡΡΡΡ
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@pytest.fixture
def spider_dir(tmp_path: Path) -> Path:
"""data/databases/uni/uni.sqlite + data/databases/sales/sales.sqlite."""
for db_id in ("uni", "sales"):
(tmp_path / db_id).mkdir()
db = tmp_path / db_id / f"{db_id}.sqlite"
conn = sqlite3.connect(db)
conn.execute(f"CREATE TABLE {db_id}_t (id INTEGER PRIMARY KEY, name TEXT NOT NULL)")
conn.execute(f"INSERT INTO {db_id}_t VALUES (1, '{db_id}-row')")
conn.commit()
conn.close()
return tmp_path
@pytest.fixture
def tiny_db(tmp_path: Path) -> Path:
db = tmp_path / "tiny.sqlite"
conn = sqlite3.connect(db)
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT)")
conn.executemany("INSERT INTO users VALUES (?, ?)", [(1, "ΠΠ²Π°Π½"), (2, "ΠΠ½Π½Π°")])
conn.commit()
conn.close()
return db
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TableSchema.to_ddl
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_table_schema_to_ddl_from_create_sql():
t = TableSchema(
name="t",
create_sql="CREATE TABLE t (id INT PRIMARY KEY, name TEXT)",
)
assert t.to_ddl() == "CREATE TABLE t (id INT PRIMARY KEY, name TEXT);"
def test_table_schema_to_ddl_from_columns():
t = TableSchema(
name="users",
columns=[
ColumnSchema(name="id", type="INTEGER", primary_key=True, nullable=False),
ColumnSchema(name="email", type="TEXT", nullable=False),
],
)
ddl = t.to_ddl()
assert ddl.startswith("CREATE TABLE users")
assert "id INTEGER PRIMARY KEY NOT NULL" in ddl
assert "email TEXT NOT NULL" in ddl
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# SpiderSchemaProvider
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_spider_lists_databases(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
assert p.list_databases() == ["sales", "uni"]
def test_spider_db_path_resolves(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
path = p.db_path("uni")
assert path.exists()
assert path.name == "uni.sqlite"
def test_spider_db_path_raises_on_missing(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
with pytest.raises(FileNotFoundError):
p.db_path("nonexistent")
def test_spider_get_tables_returns_tableschema(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
tables = p.get_tables("uni")
assert len(tables) == 1
assert isinstance(tables[0], TableSchema)
assert tables[0].name == "uni_t"
def test_spider_render_schema_has_create(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
text = p.render_schema("uni")
assert "CREATE TABLE" in text
assert "uni_t" in text
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ConnectionSchemaProvider
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_connection_lists_tables(tiny_db: Path):
p = ConnectionSchemaProvider(str(tiny_db))
assert p.list_tables() == ["users"]
def test_connection_get_tables_columns(tiny_db: Path):
p = ConnectionSchemaProvider(str(tiny_db))
tables = p.get_tables()
assert len(tables) == 1
cols = {c.name for c in tables[0].columns}
assert cols == {"id", "name"}
def test_connection_render_schema_with_samples(tiny_db: Path):
p = ConnectionSchemaProvider(str(tiny_db))
text = p.render_schema(include_samples=True)
assert "CREATE TABLE users" in text
assert "ΠΠ²Π°Π½" in text or "ΠΠ½Π½Π°" in text
def test_connection_test_connection(tiny_db: Path):
p = ConnectionSchemaProvider(str(tiny_db))
assert p.test_connection() is True
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Π¦Π΅ΠΏΠΎΡΠΊΠ° SpiderSchemaProvider.for_database β ConnectionSchemaProvider
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_spider_for_database_returns_connection_provider(spider_dir: Path):
p = SpiderSchemaProvider(spider_dir)
sub = p.for_database("sales")
assert isinstance(sub, ConnectionSchemaProvider)
text = sub.render_schema()
assert "sales_t" in text
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# render_tables β ΠΎΠ±ΡΠ°Ρ ΡΡΠΈΠ»ΠΈΡΠ°
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_render_tables_groups_ddl_and_samples():
tables = [
TableSchema(
name="x",
columns=[ColumnSchema(name="id", type="INT")],
sample_rows=[(1,), (2,)],
),
]
text = render_tables(tables, include_samples=True)
assert "CREATE TABLE x" in text
assert "ΠΡΠΈΠΌΠ΅ΡΡ ΡΡΡΠΎΠΊ" in text
assert "(1," in text and "(2," in text
def test_render_tables_no_samples():
tables = [
TableSchema(
name="x",
columns=[ColumnSchema(name="id", type="INT")],
sample_rows=[(1,)],
),
]
text = render_tables(tables, include_samples=False)
assert "CREATE TABLE x" in text
assert "ΠΡΠΈΠΌΠ΅ΡΡ ΡΡΡΠΎΠΊ" not in text
|