Spaces:
Sleeping
Sleeping
Mustehson commited on
Commit ·
666a38f
1
Parent(s): 5f9d608
Close Connection
Browse files
app.py
CHANGED
|
@@ -101,7 +101,7 @@ def create_pipeline(schema):
|
|
| 101 |
|
| 102 |
def load_pipeline(table_name):
|
| 103 |
_conn = duckdb.connect("duckdb_pipeline.duckdb")
|
| 104 |
-
return _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
|
| 105 |
|
| 106 |
def df_summary(df):
|
| 107 |
summary = []
|
|
@@ -147,6 +147,7 @@ def process_inputs(inputs) :
|
|
| 147 |
def run_llm(messages):
|
| 148 |
try:
|
| 149 |
response = llm.invoke(messages)
|
|
|
|
| 150 |
tests = json.loads(response.content)
|
| 151 |
except Exception as e:
|
| 152 |
return e
|
|
@@ -246,7 +247,7 @@ def main(table):
|
|
| 246 |
table_name = create_pipeline(schema)
|
| 247 |
|
| 248 |
# Load dlt pipeline
|
| 249 |
-
df = load_pipeline(table_name)
|
| 250 |
|
| 251 |
# df = get_data_df(schema)
|
| 252 |
df_statistics, df_alerts = statistics(df)
|
|
@@ -254,7 +255,6 @@ def main(table):
|
|
| 254 |
|
| 255 |
messages = format_prompt(df=df)
|
| 256 |
tests = run_llm(messages)
|
| 257 |
-
print(tests)
|
| 258 |
|
| 259 |
if isinstance(tests, Exception):
|
| 260 |
tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
|
|
@@ -264,6 +264,7 @@ def main(table):
|
|
| 264 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
| 265 |
pandera_results = validate_pandera(tests, df)
|
| 266 |
|
|
|
|
| 267 |
return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
|
| 268 |
|
| 269 |
def user_results(table, text_query):
|
|
@@ -274,10 +275,9 @@ def user_results(table, text_query):
|
|
| 274 |
table_name = create_pipeline(schema)
|
| 275 |
|
| 276 |
# Load dlt pipeline
|
| 277 |
-
df = load_pipeline(table_name)
|
| 278 |
|
| 279 |
messages = format_user_prompt(df=df, user_description=text_query)
|
| 280 |
-
tests = run_llm(messages)
|
| 281 |
|
| 282 |
print(f'Generated Tests from user input: {tests}')
|
| 283 |
|
|
@@ -289,6 +289,8 @@ def user_results(table, text_query):
|
|
| 289 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
| 290 |
pandera_results = validate_pandera(tests, df)
|
| 291 |
|
|
|
|
|
|
|
| 292 |
return tests_df, pandera_results
|
| 293 |
|
| 294 |
# Custom CSS styling
|
|
|
|
| 101 |
|
| 102 |
def load_pipeline(table_name):
|
| 103 |
_conn = duckdb.connect("duckdb_pipeline.duckdb")
|
| 104 |
+
return _conn, _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
|
| 105 |
|
| 106 |
def df_summary(df):
|
| 107 |
summary = []
|
|
|
|
| 147 |
def run_llm(messages):
|
| 148 |
try:
|
| 149 |
response = llm.invoke(messages)
|
| 150 |
+
print(response.content)
|
| 151 |
tests = json.loads(response.content)
|
| 152 |
except Exception as e:
|
| 153 |
return e
|
|
|
|
| 247 |
table_name = create_pipeline(schema)
|
| 248 |
|
| 249 |
# Load dlt pipeline
|
| 250 |
+
connection, df = load_pipeline(table_name)
|
| 251 |
|
| 252 |
# df = get_data_df(schema)
|
| 253 |
df_statistics, df_alerts = statistics(df)
|
|
|
|
| 255 |
|
| 256 |
messages = format_prompt(df=df)
|
| 257 |
tests = run_llm(messages)
|
|
|
|
| 258 |
|
| 259 |
if isinstance(tests, Exception):
|
| 260 |
tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
|
|
|
|
| 264 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
| 265 |
pandera_results = validate_pandera(tests, df)
|
| 266 |
|
| 267 |
+
connection.close()
|
| 268 |
return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
|
| 269 |
|
| 270 |
def user_results(table, text_query):
|
|
|
|
| 275 |
table_name = create_pipeline(schema)
|
| 276 |
|
| 277 |
# Load dlt pipeline
|
| 278 |
+
connection, df = load_pipeline(table_name)
|
| 279 |
|
| 280 |
messages = format_user_prompt(df=df, user_description=text_query)
|
|
|
|
| 281 |
|
| 282 |
print(f'Generated Tests from user input: {tests}')
|
| 283 |
|
|
|
|
| 289 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
| 290 |
pandera_results = validate_pandera(tests, df)
|
| 291 |
|
| 292 |
+
connection.close()
|
| 293 |
+
|
| 294 |
return tests_df, pandera_results
|
| 295 |
|
| 296 |
# Custom CSS styling
|