Spaces:
Sleeping
Sleeping
Muhammad Mustehson commited on
Commit ·
bcca921
1
Parent(s): 1a436de
download db
Browse files- app.py +23 -16
- config.yaml +1 -2
- src/models.py +1 -3
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import sys
|
|
| 6 |
import tempfile
|
| 7 |
import uuid
|
| 8 |
from pathlib import Path
|
| 9 |
-
from typing import Tuple
|
| 10 |
|
| 11 |
import duckdb
|
| 12 |
import gradio as gr
|
|
@@ -32,29 +32,36 @@ if not Path("/tmp").exists():
|
|
| 32 |
os.mkdir("/tmp")
|
| 33 |
|
| 34 |
|
| 35 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
try:
|
| 37 |
response = requests.get(url, stream=True)
|
| 38 |
response.raise_for_status()
|
| 39 |
|
| 40 |
with open(save_path, "wb") as out_file:
|
| 41 |
shutil.copyfileobj(response.raw, out_file)
|
| 42 |
-
|
| 43 |
return duckdb.connect(database=save_path)
|
| 44 |
except Exception as e:
|
| 45 |
-
logger.
|
| 46 |
raise
|
| 47 |
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
pipe = Query2Schema(duckdb=conn, chain=LLMChain())
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def get_tables_names(schema_name):
|
| 59 |
tables = conn.execute("SELECT table_name FROM information_schema.tables").fetchall()
|
| 60 |
return [table[0] for table in tables]
|
|
@@ -160,7 +167,7 @@ import duckdb
|
|
| 160 |
@pytest.fixture(scope="session")
|
| 161 |
def mesh_context():
|
| 162 |
|
| 163 |
-
context = Context(paths=".", gateway="duckdb")
|
| 164 |
yield context
|
| 165 |
|
| 166 |
@pytest.fixture
|
|
@@ -171,8 +178,8 @@ def test_back_fill(mesh_context, today_str):
|
|
| 171 |
mesh_context.plan(skip_backfill=False, auto_apply=True)
|
| 172 |
mesh_context.run(start=today_str, end=today_str)
|
| 173 |
|
| 174 |
-
df = mesh_context.fetchdf("SELECT * FROM {model_name} LIMIT 10")
|
| 175 |
-
assert not df.empty
|
| 176 |
"""
|
| 177 |
try:
|
| 178 |
schema = pipe.generate_pandera_schema(
|
|
@@ -430,6 +437,9 @@ with gr.Blocks(
|
|
| 430 |
tables_dropdown.change(
|
| 431 |
update_column_names, inputs=tables_dropdown, outputs=columns_df
|
| 432 |
)
|
|
|
|
|
|
|
|
|
|
| 433 |
run_tests_btn.click(
|
| 434 |
run_tests,
|
| 435 |
inputs=[
|
|
@@ -440,9 +450,6 @@ with gr.Blocks(
|
|
| 440 |
],
|
| 441 |
outputs=[test_logs, result_data, sql_model, result_schema],
|
| 442 |
)
|
| 443 |
-
demo.load(
|
| 444 |
-
fn=update_table_names, inputs=schema_dropdown, outputs=tables_dropdown
|
| 445 |
-
)
|
| 446 |
|
| 447 |
if __name__ == "__main__":
|
| 448 |
demo.launch(debug=True)
|
|
|
|
| 6 |
import tempfile
|
| 7 |
import uuid
|
| 8 |
from pathlib import Path
|
| 9 |
+
from typing import List, Tuple
|
| 10 |
|
| 11 |
import duckdb
|
| 12 |
import gradio as gr
|
|
|
|
| 32 |
os.mkdir("/tmp")
|
| 33 |
|
| 34 |
|
| 35 |
+
def download_file(url: str, save_path: str):
|
| 36 |
+
if Path(save_path).exists():
|
| 37 |
+
print(f"File already exists at {save_path}. Skipping download.")
|
| 38 |
+
return duckdb.connect(database=save_path)
|
| 39 |
+
|
| 40 |
try:
|
| 41 |
response = requests.get(url, stream=True)
|
| 42 |
response.raise_for_status()
|
| 43 |
|
| 44 |
with open(save_path, "wb") as out_file:
|
| 45 |
shutil.copyfileobj(response.raw, out_file)
|
|
|
|
| 46 |
return duckdb.connect(database=save_path)
|
| 47 |
except Exception as e:
|
| 48 |
+
logger.info(f"Error Downloding Chinook DB: {e}")
|
| 49 |
raise
|
| 50 |
|
| 51 |
|
| 52 |
+
conn = download_file(
|
| 53 |
+
url="https://raw.githubusercontent.com/RandomFractals/duckdb-sql-tools/main/data/chinook/duckdb/chinook.duckdb",
|
| 54 |
+
save_path="database/chinook.duckdb",
|
| 55 |
+
)
|
|
|
|
|
|
|
| 56 |
pipe = Query2Schema(duckdb=conn, chain=LLMChain())
|
| 57 |
|
| 58 |
|
| 59 |
+
def get_test_databases() -> List[str]:
|
| 60 |
+
"""Scans the 'tests' directory for subdirectories (representing databases)."""
|
| 61 |
+
|
| 62 |
+
return ["All", "chinook", "Northwind"]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def get_tables_names(schema_name):
|
| 66 |
tables = conn.execute("SELECT table_name FROM information_schema.tables").fetchall()
|
| 67 |
return [table[0] for table in tables]
|
|
|
|
| 167 |
@pytest.fixture(scope="session")
|
| 168 |
def mesh_context():
|
| 169 |
|
| 170 |
+
context = Context(paths=".", gateway="duckdb", load=True)
|
| 171 |
yield context
|
| 172 |
|
| 173 |
@pytest.fixture
|
|
|
|
| 178 |
mesh_context.plan(skip_backfill=False, auto_apply=True)
|
| 179 |
mesh_context.run(start=today_str, end=today_str)
|
| 180 |
|
| 181 |
+
# df = mesh_context.fetchdf("SELECT * FROM {model_name} LIMIT 10")
|
| 182 |
+
# assert not df.empty
|
| 183 |
"""
|
| 184 |
try:
|
| 185 |
schema = pipe.generate_pandera_schema(
|
|
|
|
| 437 |
tables_dropdown.change(
|
| 438 |
update_column_names, inputs=tables_dropdown, outputs=columns_df
|
| 439 |
)
|
| 440 |
+
demo.load(
|
| 441 |
+
fn=update_table_names, inputs=schema_dropdown, outputs=tables_dropdown
|
| 442 |
+
)
|
| 443 |
run_tests_btn.click(
|
| 444 |
run_tests,
|
| 445 |
inputs=[
|
|
|
|
| 450 |
],
|
| 451 |
outputs=[test_logs, result_data, sql_model, result_schema],
|
| 452 |
)
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
if __name__ == "__main__":
|
| 455 |
demo.launch(debug=True)
|
config.yaml
CHANGED
|
@@ -2,8 +2,7 @@ gateways:
|
|
| 2 |
duckdb:
|
| 3 |
connection:
|
| 4 |
type: duckdb
|
| 5 |
-
|
| 6 |
-
local: 'database/chinook.duckdb'
|
| 7 |
|
| 8 |
default_gateway: duckdb
|
| 9 |
|
|
|
|
| 2 |
duckdb:
|
| 3 |
connection:
|
| 4 |
type: duckdb
|
| 5 |
+
database: database/chinook.duckdb
|
|
|
|
| 6 |
|
| 7 |
default_gateway: duckdb
|
| 8 |
|
src/models.py
CHANGED
|
@@ -7,6 +7,4 @@ class SQLQueryModel(BaseModel):
|
|
| 7 |
|
| 8 |
|
| 9 |
class PanderaSchemaModel(BaseModel):
|
| 10 |
-
|
| 11 |
-
..., description="Only Pandera schema to validate the data."
|
| 12 |
-
)
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class PanderaSchemaModel(BaseModel):
|
| 10 |
+
schema: str = Field(..., description="Only Pandera schema to validate the data.")
|
|
|
|
|
|