Spaces:

datajoi
/

Dataset-Test-Workflow

Sleeping

App Files Files Community

Mustehson commited on Nov 14, 2024

Commit

666a38f

1 Parent(s): 5f9d608

Close Connection

Browse files

Files changed (1) hide show

app.py +7 -5

app.py CHANGED Viewed

@@ -101,7 +101,7 @@ def create_pipeline(schema):
 def load_pipeline(table_name):
     _conn = duckdb.connect("duckdb_pipeline.duckdb")
-    return _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
 def df_summary(df):
     summary = []
@@ -147,6 +147,7 @@ def process_inputs(inputs) :
 def run_llm(messages):
   try:
     response = llm.invoke(messages)
     tests = json.loads(response.content)
   except Exception as e:
       return e
@@ -246,7 +247,7 @@ def main(table):
     table_name = create_pipeline(schema)
     # Load dlt pipeline
-    df = load_pipeline(table_name)
     # df = get_data_df(schema)
     df_statistics, df_alerts = statistics(df)
@@ -254,7 +255,6 @@ def main(table):
     messages = format_prompt(df=df)
     tests = run_llm(messages)
-    print(tests)
     if isinstance(tests, Exception):
         tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
@@ -264,6 +264,7 @@ def main(table):
     tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
     pandera_results = validate_pandera(tests, df)
     return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
 def user_results(table, text_query):
@@ -274,10 +275,9 @@ def user_results(table, text_query):
     table_name = create_pipeline(schema)
     # Load dlt pipeline
-    df = load_pipeline(table_name)
     messages = format_user_prompt(df=df, user_description=text_query)
-    tests = run_llm(messages)
     print(f'Generated Tests from user input: {tests}')
@@ -289,6 +289,8 @@ def user_results(table, text_query):
     tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
     pandera_results = validate_pandera(tests, df)
     return tests_df, pandera_results
 # Custom CSS styling

 def load_pipeline(table_name):
     _conn = duckdb.connect("duckdb_pipeline.duckdb")
+    return _conn, _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
 def df_summary(df):
     summary = []
 def run_llm(messages):
   try:
     response = llm.invoke(messages)
+    print(response.content)
     tests = json.loads(response.content)
   except Exception as e:
       return e
     table_name = create_pipeline(schema)
     # Load dlt pipeline
+    connection, df = load_pipeline(table_name)
     # df = get_data_df(schema)
     df_statistics, df_alerts = statistics(df)
     messages = format_prompt(df=df)
     tests = run_llm(messages)
     if isinstance(tests, Exception):
         tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
     tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
     pandera_results = validate_pandera(tests, df)
+    connection.close()
     return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
 def user_results(table, text_query):
     table_name = create_pipeline(schema)
     # Load dlt pipeline
+    connection, df = load_pipeline(table_name)
     messages = format_user_prompt(df=df, user_description=text_query)
     print(f'Generated Tests from user input: {tests}')
     tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
     pandera_results = validate_pandera(tests, df)
+    connection.close()
     return tests_df, pandera_results
 # Custom CSS styling