Spaces:
Sleeping
Sleeping
Mustehson commited on
Commit ·
7c2e7ac
1
Parent(s): 3905cae
Summary Added
Browse files
app.py
CHANGED
|
@@ -69,18 +69,40 @@ def get_data_df(schema):
|
|
| 69 |
print('Getting Dataframe from the Database')
|
| 70 |
return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def format_prompt(df):
|
| 74 |
-
|
| 75 |
-
"max": df.max(),
|
| 76 |
-
"min": df.min(),
|
| 77 |
-
"top": df.mode().iloc[0],
|
| 78 |
-
"nunique": df.nunique(),
|
| 79 |
-
"count": df.count(),
|
| 80 |
-
"dtype": df.dtypes.astype(str)
|
| 81 |
-
}).reset_index().rename(columns={"index": "column"})
|
| 82 |
return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
|
| 83 |
-
summary=
|
| 84 |
def format_user_prompt(df):
|
| 85 |
return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
|
| 86 |
|
|
|
|
| 69 |
print('Getting Dataframe from the Database')
|
| 70 |
return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
|
| 71 |
|
| 72 |
+
def df_summary(df):
|
| 73 |
+
summary = []
|
| 74 |
+
|
| 75 |
+
for column in df.columns:
|
| 76 |
+
if pd.api.types.is_numeric_dtype(df[column]):
|
| 77 |
+
summary.append({
|
| 78 |
+
"column": column,
|
| 79 |
+
"max": df[column].max(),
|
| 80 |
+
"min": df[column].min(),
|
| 81 |
+
"count": df[column].count(),
|
| 82 |
+
"nunique": df[column].nunique(),
|
| 83 |
+
"dtype": str(df[column].dtype),
|
| 84 |
+
"top": None
|
| 85 |
+
})
|
| 86 |
+
|
| 87 |
+
elif pd.api.types.is_categorical_dtype(df[column]) or pd.api.types.is_object_dtype(df[column]):
|
| 88 |
+
top_value = df[column].mode().iloc[0] if not df[column].mode().empty else None
|
| 89 |
+
|
| 90 |
+
summary.append({
|
| 91 |
+
"column": column,
|
| 92 |
+
"max": None,
|
| 93 |
+
"min": None,
|
| 94 |
+
"count": df[column].count(),
|
| 95 |
+
"nunique": df[column].nunique(),
|
| 96 |
+
"dtype": str(df[column].dtype),
|
| 97 |
+
"top": top_value
|
| 98 |
+
})
|
| 99 |
+
summary_df = pd.DataFrame(summary)
|
| 100 |
+
return summary_df.reset_index(drop=True)
|
| 101 |
|
| 102 |
def format_prompt(df):
|
| 103 |
+
summary = df_summary(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
|
| 105 |
+
summary=summary.to_json(orient='records'))
|
| 106 |
def format_user_prompt(df):
|
| 107 |
return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
|
| 108 |
|