Muhammad Mustehson commited on
Commit
bcca921
·
1 Parent(s): 1a436de

download db

Browse files
Files changed (3) hide show
  1. app.py +23 -16
  2. config.yaml +1 -2
  3. src/models.py +1 -3
app.py CHANGED
@@ -6,7 +6,7 @@ import sys
6
  import tempfile
7
  import uuid
8
  from pathlib import Path
9
- from typing import Tuple
10
 
11
  import duckdb
12
  import gradio as gr
@@ -32,29 +32,36 @@ if not Path("/tmp").exists():
32
  os.mkdir("/tmp")
33
 
34
 
35
- def create_conn(url: str, save_path: str):
 
 
 
 
36
  try:
37
  response = requests.get(url, stream=True)
38
  response.raise_for_status()
39
 
40
  with open(save_path, "wb") as out_file:
41
  shutil.copyfileobj(response.raw, out_file)
42
-
43
  return duckdb.connect(database=save_path)
44
  except Exception as e:
45
- logger.error(f"Error downloading database: {e}")
46
  raise
47
 
48
 
49
- if not Path("database/chinook.duckdb").exists():
50
- conn = create_conn(
51
- url="https://raw.githubusercontent.com/RandomFractals/duckdb-sql-tools/main/data/chinook/duckdb/chinook.duckdb",
52
- save_path="database/chinook.duckdb",
53
- )
54
-
55
  pipe = Query2Schema(duckdb=conn, chain=LLMChain())
56
 
57
 
 
 
 
 
 
 
58
  def get_tables_names(schema_name):
59
  tables = conn.execute("SELECT table_name FROM information_schema.tables").fetchall()
60
  return [table[0] for table in tables]
@@ -160,7 +167,7 @@ import duckdb
160
  @pytest.fixture(scope="session")
161
  def mesh_context():
162
 
163
- context = Context(paths=".", gateway="duckdb")
164
  yield context
165
 
166
  @pytest.fixture
@@ -171,8 +178,8 @@ def test_back_fill(mesh_context, today_str):
171
  mesh_context.plan(skip_backfill=False, auto_apply=True)
172
  mesh_context.run(start=today_str, end=today_str)
173
 
174
- df = mesh_context.fetchdf("SELECT * FROM {model_name} LIMIT 10")
175
- assert not df.empty
176
  """
177
  try:
178
  schema = pipe.generate_pandera_schema(
@@ -430,6 +437,9 @@ with gr.Blocks(
430
  tables_dropdown.change(
431
  update_column_names, inputs=tables_dropdown, outputs=columns_df
432
  )
 
 
 
433
  run_tests_btn.click(
434
  run_tests,
435
  inputs=[
@@ -440,9 +450,6 @@ with gr.Blocks(
440
  ],
441
  outputs=[test_logs, result_data, sql_model, result_schema],
442
  )
443
- demo.load(
444
- fn=update_table_names, inputs=schema_dropdown, outputs=tables_dropdown
445
- )
446
 
447
  if __name__ == "__main__":
448
  demo.launch(debug=True)
 
6
  import tempfile
7
  import uuid
8
  from pathlib import Path
9
+ from typing import List, Tuple
10
 
11
  import duckdb
12
  import gradio as gr
 
32
  os.mkdir("/tmp")
33
 
34
 
35
+ def download_file(url: str, save_path: str):
36
+ if Path(save_path).exists():
37
+ print(f"File already exists at {save_path}. Skipping download.")
38
+ return duckdb.connect(database=save_path)
39
+
40
  try:
41
  response = requests.get(url, stream=True)
42
  response.raise_for_status()
43
 
44
  with open(save_path, "wb") as out_file:
45
  shutil.copyfileobj(response.raw, out_file)
 
46
  return duckdb.connect(database=save_path)
47
  except Exception as e:
48
+ logger.info(f"Error Downloding Chinook DB: {e}")
49
  raise
50
 
51
 
52
+ conn = download_file(
53
+ url="https://raw.githubusercontent.com/RandomFractals/duckdb-sql-tools/main/data/chinook/duckdb/chinook.duckdb",
54
+ save_path="database/chinook.duckdb",
55
+ )
 
 
56
  pipe = Query2Schema(duckdb=conn, chain=LLMChain())
57
 
58
 
59
+ def get_test_databases() -> List[str]:
60
+ """Scans the 'tests' directory for subdirectories (representing databases)."""
61
+
62
+ return ["All", "chinook", "Northwind"]
63
+
64
+
65
  def get_tables_names(schema_name):
66
  tables = conn.execute("SELECT table_name FROM information_schema.tables").fetchall()
67
  return [table[0] for table in tables]
 
167
  @pytest.fixture(scope="session")
168
  def mesh_context():
169
 
170
+ context = Context(paths=".", gateway="duckdb", load=True)
171
  yield context
172
 
173
  @pytest.fixture
 
178
  mesh_context.plan(skip_backfill=False, auto_apply=True)
179
  mesh_context.run(start=today_str, end=today_str)
180
 
181
+ # df = mesh_context.fetchdf("SELECT * FROM {model_name} LIMIT 10")
182
+ # assert not df.empty
183
  """
184
  try:
185
  schema = pipe.generate_pandera_schema(
 
437
  tables_dropdown.change(
438
  update_column_names, inputs=tables_dropdown, outputs=columns_df
439
  )
440
+ demo.load(
441
+ fn=update_table_names, inputs=schema_dropdown, outputs=tables_dropdown
442
+ )
443
  run_tests_btn.click(
444
  run_tests,
445
  inputs=[
 
450
  ],
451
  outputs=[test_logs, result_data, sql_model, result_schema],
452
  )
 
 
 
453
 
454
  if __name__ == "__main__":
455
  demo.launch(debug=True)
config.yaml CHANGED
@@ -2,8 +2,7 @@ gateways:
2
  duckdb:
3
  connection:
4
  type: duckdb
5
- catalogs:
6
- local: 'database/chinook.duckdb'
7
 
8
  default_gateway: duckdb
9
 
 
2
  duckdb:
3
  connection:
4
  type: duckdb
5
+ database: database/chinook.duckdb
 
6
 
7
  default_gateway: duckdb
8
 
src/models.py CHANGED
@@ -7,6 +7,4 @@ class SQLQueryModel(BaseModel):
7
 
8
 
9
  class PanderaSchemaModel(BaseModel):
10
- schema_name: str = Field(
11
- ..., description="Only Pandera schema to validate the data."
12
- )
 
7
 
8
 
9
  class PanderaSchemaModel(BaseModel):
10
+ schema: str = Field(..., description="Only Pandera schema to validate the data.")