Spaces:
Sleeping
Sleeping
profiler
Browse files- pages/__pycache__/solution.cpython-312.pyc +0 -0
- pages/profiler.py +2 -3
- pages/solution.py +200 -152
- requirements.txt +3 -0
- utils/menu.py +2 -2
pages/__pycache__/solution.cpython-312.pyc
CHANGED
|
Binary files a/pages/__pycache__/solution.cpython-312.pyc and b/pages/__pycache__/solution.cpython-312.pyc differ
|
|
|
pages/profiler.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
from utils.menu import menu_with_redirect
|
| 3 |
-
|
| 4 |
|
| 5 |
menu_with_redirect()
|
| 6 |
-
|
|
|
|
|
|
|
| 1 |
from utils.menu import menu_with_redirect
|
| 2 |
+
from pages.solution import data_profiler
|
| 3 |
|
| 4 |
menu_with_redirect()
|
| 5 |
+
data_profiler()
|
pages/solution.py
CHANGED
|
@@ -6,6 +6,8 @@ import boto3
|
|
| 6 |
import time
|
| 7 |
import pandas as pd
|
| 8 |
import duckdb
|
|
|
|
|
|
|
| 9 |
from pygwalker.api.streamlit import StreamlitRenderer
|
| 10 |
import streamlit.components.v1 as components
|
| 11 |
from openai import AzureOpenAI
|
|
@@ -1149,176 +1151,182 @@ def design_insight():
|
|
| 1149 |
st.session_state['data_prompt_value']= ''
|
| 1150 |
st.session_state['graph_prompt_value']= ''
|
| 1151 |
|
| 1152 |
-
col1, col2 = st.columns([1, 3])
|
| 1153 |
-
with col1:
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
|
|
|
|
|
|
|
|
|
|
| 1177 |
with st.expander("Integer Columns", icon=":material/looks_one:"):
|
| 1178 |
st.write("\n\n".join(list(int_cols.values)))
|
| 1179 |
|
| 1180 |
-
|
|
|
|
| 1181 |
st.write("\n\n".join(list(float_cols.values)))
|
| 1182 |
|
|
|
|
| 1183 |
with st.expander("String Columns", icon=":material/abc:"):
|
| 1184 |
st.write("\n\n".join(list(string_cols.values)))
|
| 1185 |
|
|
|
|
| 1186 |
with st.expander("Datetime Columns", icon=":material/calendar_month:"):
|
| 1187 |
st.write("\n\n".join(list(datetime_cols.values)))
|
| 1188 |
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
with col2:
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
st.session_state['data_prompt'] = data_prompt
|
| 1206 |
-
try:
|
| 1207 |
-
query, method_num = answer_guide_question(data_prompt, st.session_state['explore_df'], st.session_state['explore_dtype'], selected_db)
|
| 1208 |
-
if query:
|
| 1209 |
-
try:
|
| 1210 |
-
mydf = df
|
| 1211 |
-
st.session_state['query'] = query
|
| 1212 |
-
result_df = duckdb.query(query).to_df()
|
| 1213 |
-
st.session_state['data_obj'] = result_df
|
| 1214 |
-
logger.info("Insight generated and displayed using AG Grid.")
|
| 1215 |
-
# st.session_state['data_prompt'] = '' # Clear the input field
|
| 1216 |
-
except Exception as e:
|
| 1217 |
-
st.write('Error executing the query. Please try again.')
|
| 1218 |
-
logger.error("Error executing the query: %s", e)
|
| 1219 |
-
else:
|
| 1220 |
-
st.write('Please retry again.')
|
| 1221 |
-
del st.session_state['code_execution_error']
|
| 1222 |
-
except Exception as e:
|
| 1223 |
-
st.write("Please try again with another prompt")
|
| 1224 |
-
logger.error("Error generating insight: %s", e)
|
| 1225 |
-
if st.session_state['data_obj'] is not None:
|
| 1226 |
-
# st.text(st.session_state['data_prompt'])
|
| 1227 |
-
display_paginated_dataframe(st.session_state['data_obj'], "ag_grid_insight")
|
| 1228 |
st.session_state['data_prompt'] = data_prompt
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
# graph_prompt_value = st.session_state.get('graph_prompt', '')
|
| 1233 |
-
graph_prompt = st.text_area("What graph would you like to generate?")#, value=graph_prompt_value)
|
| 1234 |
-
if st.button('Generate Graph'):
|
| 1235 |
-
graph_obj = None
|
| 1236 |
-
if graph_prompt:
|
| 1237 |
-
logger.debug("Graph prompt: %s | Previous graph prompt: %s", st.session_state.get('graph_prompt'), graph_prompt)
|
| 1238 |
-
if st.session_state['graph_prompt'] != graph_prompt:
|
| 1239 |
try:
|
| 1240 |
-
|
| 1241 |
-
st.session_state['
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
logger.info("Graph generated and displayed using Plotly.")
|
| 1247 |
-
else:
|
| 1248 |
-
st.session_state['graph_obj'] = None
|
| 1249 |
-
st.text('Error in generating graph, please try again.')
|
| 1250 |
except Exception as e:
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
else:
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1261 |
try:
|
| 1262 |
st.plotly_chart(st.session_state['graph_obj'], use_container_width=True)
|
| 1263 |
except Exception as e:
|
| 1264 |
st.write("Error in displaying graph, please try again")
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
if
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
|
| 1313 |
-
|
| 1314 |
-
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
|
| 1323 |
def get_insight_list(persona):
|
| 1324 |
try:
|
|
@@ -1427,7 +1435,7 @@ def data_visualize():
|
|
| 1427 |
st.image('logo.png')
|
| 1428 |
with col_bb:
|
| 1429 |
st.subheader("InsightLab - Data Visualize", divider='blue')
|
| 1430 |
-
st.markdown('**Select a dataset that you generated
|
| 1431 |
with col_cc:
|
| 1432 |
st.markdown(APP_TITLE , unsafe_allow_html=True)
|
| 1433 |
|
|
@@ -1445,15 +1453,55 @@ def data_visualize():
|
|
| 1445 |
content = json.loads(blob_content)
|
| 1446 |
sql_query = content['sql']
|
| 1447 |
selected_db = content['database']
|
| 1448 |
-
|
| 1449 |
# Create a StreamlitRenderer instance
|
| 1450 |
-
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1454 |
# pyg_html=pyg.walk(df).to_html()
|
| 1455 |
# components.html(pyg_html, height=1000, scrolling=True)
|
| 1456 |
except Exception as e:
|
| 1457 |
st.error(f"Error loading dataset: {e}")
|
| 1458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1459 |
|
|
|
|
| 6 |
import time
|
| 7 |
import pandas as pd
|
| 8 |
import duckdb
|
| 9 |
+
import ydata_profiling
|
| 10 |
+
from streamlit_pandas_profiling import st_profile_report
|
| 11 |
from pygwalker.api.streamlit import StreamlitRenderer
|
| 12 |
import streamlit.components.v1 as components
|
| 13 |
from openai import AzureOpenAI
|
|
|
|
| 1151 |
st.session_state['data_prompt_value']= ''
|
| 1152 |
st.session_state['graph_prompt_value']= ''
|
| 1153 |
|
| 1154 |
+
# col1, col2 = st.columns([1, 3])
|
| 1155 |
+
# with col1:
|
| 1156 |
+
with st.container():
|
| 1157 |
+
st.subheader('Dataset Columns')
|
| 1158 |
+
s = selected_query[len("ID: "):]
|
| 1159 |
+
end_index = s.find(",")
|
| 1160 |
+
id = s[:end_index]
|
| 1161 |
+
try:
|
| 1162 |
+
blob_content = getBlobContent(f"{query_lib}{st.session_state.userId}/{id}.json")
|
| 1163 |
+
content = json.loads(blob_content)
|
| 1164 |
+
st.session_state['query_file_content'] = content
|
| 1165 |
+
sql_query = content['sql']
|
| 1166 |
+
selected_db = content['database']
|
| 1167 |
+
df = execute_sql(sql_query, selected_db)
|
| 1168 |
+
df = drop_duplicate_columns(df)
|
| 1169 |
+
df_dict = get_column_types(df)
|
| 1170 |
+
df_dtypes = pd.DataFrame.from_dict(df_dict, orient='index', columns=['Dtype'])
|
| 1171 |
+
df_dtypes.reset_index(inplace=True)
|
| 1172 |
+
df_dtypes.rename(columns={'index': 'Column'}, inplace=True)
|
| 1173 |
+
|
| 1174 |
+
int_cols = df_dtypes[df_dtypes['Dtype'] == 'int64']['Column'].reset_index(drop=True)
|
| 1175 |
+
float_cols = df_dtypes[df_dtypes['Dtype'] == 'float64']['Column'].reset_index(drop=True)
|
| 1176 |
+
string_cols = df_dtypes[df_dtypes['Dtype'] == 'string']['Column'].reset_index(drop=True)
|
| 1177 |
+
datetime_cols = df_dtypes[df_dtypes['Dtype'] == 'datetime']['Column'].reset_index(drop=True)
|
| 1178 |
|
| 1179 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 1180 |
+
|
| 1181 |
+
with col1:
|
| 1182 |
with st.expander("Integer Columns", icon=":material/looks_one:"):
|
| 1183 |
st.write("\n\n".join(list(int_cols.values)))
|
| 1184 |
|
| 1185 |
+
with col2:
|
| 1186 |
+
with st.expander("Decimal Columns", icon=":material/pin:"):
|
| 1187 |
st.write("\n\n".join(list(float_cols.values)))
|
| 1188 |
|
| 1189 |
+
with col3:
|
| 1190 |
with st.expander("String Columns", icon=":material/abc:"):
|
| 1191 |
st.write("\n\n".join(list(string_cols.values)))
|
| 1192 |
|
| 1193 |
+
with col4:
|
| 1194 |
with st.expander("Datetime Columns", icon=":material/calendar_month:"):
|
| 1195 |
st.write("\n\n".join(list(datetime_cols.values)))
|
| 1196 |
|
| 1197 |
+
st.session_state['explore_df'] = df
|
| 1198 |
+
st.session_state['explore_dtype'] = df_dtypes
|
| 1199 |
|
| 1200 |
+
logger.info("Dataset columns displayed using AG Grid.")
|
| 1201 |
+
except Exception as e:
|
| 1202 |
+
st.error("Error while loading the dataset")
|
| 1203 |
+
logger.error("Error loading dataset: {}", e)
|
| 1204 |
+
|
| 1205 |
+
# with col2:
|
| 1206 |
+
with st.container():
|
| 1207 |
+
st.subheader('Generate Insight')
|
| 1208 |
+
# data_prompt_value = st.session_state.get('data_prompt', '')
|
| 1209 |
+
data_prompt = st.text_area("What insight would you like to generate?")#, value=data_prompt_value)
|
| 1210 |
+
if st.button('Generate Insight'):
|
| 1211 |
+
st.session_state['data_obj'] = None
|
| 1212 |
+
if data_prompt:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1213 |
st.session_state['data_prompt'] = data_prompt
|
| 1214 |
+
try:
|
| 1215 |
+
query, method_num = answer_guide_question(data_prompt, st.session_state['explore_df'], st.session_state['explore_dtype'], selected_db)
|
| 1216 |
+
if query:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1217 |
try:
|
| 1218 |
+
mydf = df
|
| 1219 |
+
st.session_state['query'] = query
|
| 1220 |
+
result_df = duckdb.query(query).to_df()
|
| 1221 |
+
st.session_state['data_obj'] = result_df
|
| 1222 |
+
logger.info("Insight generated and displayed using AG Grid.")
|
| 1223 |
+
# st.session_state['data_prompt'] = '' # Clear the input field
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1224 |
except Exception as e:
|
| 1225 |
+
st.write('Error executing the query. Please try again.')
|
| 1226 |
+
logger.error("Error executing the query: %s", e)
|
| 1227 |
else:
|
| 1228 |
+
st.write('Please retry again.')
|
| 1229 |
+
del st.session_state['code_execution_error']
|
| 1230 |
+
except Exception as e:
|
| 1231 |
+
st.write("Please try again with another prompt")
|
| 1232 |
+
logger.error("Error generating insight: %s", e)
|
| 1233 |
+
if st.session_state['data_obj'] is not None:
|
| 1234 |
+
# st.text(st.session_state['data_prompt'])
|
| 1235 |
+
display_paginated_dataframe(st.session_state['data_obj'], "ag_grid_insight")
|
| 1236 |
+
st.session_state['data_prompt'] = data_prompt
|
| 1237 |
+
|
| 1238 |
+
with st.container():
|
| 1239 |
+
st.subheader('Generate Graph')
|
| 1240 |
+
# graph_prompt_value = st.session_state.get('graph_prompt', '')
|
| 1241 |
+
graph_prompt = st.text_area("What graph would you like to generate?")#, value=graph_prompt_value)
|
| 1242 |
+
if st.button('Generate Graph'):
|
| 1243 |
+
graph_obj = None
|
| 1244 |
+
if graph_prompt:
|
| 1245 |
+
logger.debug("Graph prompt: %s | Previous graph prompt: %s", st.session_state.get('graph_prompt'), graph_prompt)
|
| 1246 |
+
if st.session_state['graph_prompt'] != graph_prompt:
|
| 1247 |
+
try:
|
| 1248 |
+
graph_obj, st.session_state['graph_code'] = generate_graph(graph_prompt, st.session_state['explore_df'], st.session_state['explore_dtype'], selected_db)
|
| 1249 |
+
st.session_state['graph_obj'] = graph_obj
|
| 1250 |
+
|
| 1251 |
+
if graph_obj is not None:
|
| 1252 |
+
# st.text(st.session_state['graph_prompt'])
|
| 1253 |
+
st.plotly_chart(graph_obj, use_container_width=True)
|
| 1254 |
+
logger.info("Graph generated and displayed using Plotly.")
|
| 1255 |
+
else:
|
| 1256 |
+
st.session_state['graph_obj'] = None
|
| 1257 |
+
st.text('Error in generating graph, please try again.')
|
| 1258 |
+
except Exception as e:
|
| 1259 |
+
logger.error("Error in generating graph: %s", e)
|
| 1260 |
+
st.write("Error in generating graph, please try again")
|
| 1261 |
+
else:
|
| 1262 |
try:
|
| 1263 |
st.plotly_chart(st.session_state['graph_obj'], use_container_width=True)
|
| 1264 |
except Exception as e:
|
| 1265 |
st.write("Error in displaying graph, please try again")
|
| 1266 |
+
st.session_state['graph_prompt'] = graph_prompt
|
| 1267 |
+
else:
|
| 1268 |
+
if st.session_state['graph_obj'] is not None:
|
| 1269 |
+
try:
|
| 1270 |
+
st.plotly_chart(st.session_state['graph_obj'], use_container_width=True)
|
| 1271 |
+
except Exception as e:
|
| 1272 |
+
st.write("Error in displaying graph, please try again")
|
| 1273 |
+
logger.error("Error in displaying graph: %s", e)
|
| 1274 |
+
with st.container():
|
| 1275 |
+
if 'graph_obj' in st.session_state or 'data_obj' in st.session_state:
|
| 1276 |
+
user_persona = st.selectbox('Select a persona to save the result of your exploration', persona_list)
|
| 1277 |
+
insight_desc = st.text_area(label='Describe the purpose of this insight for your reference later')
|
| 1278 |
+
if st.button('Save in Library'):
|
| 1279 |
+
base_prompt = st.session_state['query_file_content']['prompt']
|
| 1280 |
+
base_code = st.session_state['query_file_content']['sql']
|
| 1281 |
|
| 1282 |
+
insight_prompt = st.session_state.get('data_prompt', '')
|
| 1283 |
+
insight_code = st.session_state.get('query', '')
|
| 1284 |
|
| 1285 |
+
chart_prompt = st.session_state.get('graph_prompt', '')
|
| 1286 |
+
chart_code = st.session_state.get('graph_code', '')
|
| 1287 |
|
| 1288 |
+
try:
|
| 1289 |
+
result = get_existing_insight(base_code, user_persona)
|
| 1290 |
+
if result:
|
| 1291 |
+
existing_insight, file_number = result
|
| 1292 |
+
if insight_prompt and insight_code is not None:
|
| 1293 |
+
existing_insight['prompt'][f'prompt_{len(existing_insight["prompt"]) + 1}'] = {
|
| 1294 |
+
'insight_prompt': insight_prompt,
|
| 1295 |
+
'insight_code': insight_code
|
| 1296 |
+
}
|
| 1297 |
+
if chart_prompt and chart_code is not None:
|
| 1298 |
+
existing_insight['chart'][f'chart_{len(existing_insight["chart"]) + 1}'] = {
|
| 1299 |
+
'chart_prompt': chart_prompt,
|
| 1300 |
+
'chart_code': chart_code
|
| 1301 |
+
}
|
| 1302 |
+
try:
|
| 1303 |
+
update_insight(existing_insight, user_persona, file_number)
|
| 1304 |
+
st.text('Insight updated with new Graph and/or Data.')
|
| 1305 |
+
logger.info("Insight updated successfully.")
|
| 1306 |
+
except Exception as e:
|
| 1307 |
+
st.write('Could not update the insight file. Please try again')
|
| 1308 |
+
logger.error("Error while updating insight file: {}", e)
|
| 1309 |
+
else:
|
| 1310 |
+
# Create a new insight entry
|
| 1311 |
+
if not check_blob_exists(f"insight_library/{user_persona}/{st.session_state.userId}"):
|
| 1312 |
+
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
| 1313 |
+
container_client = blob_service_client.get_container_client(container_name)
|
| 1314 |
+
logger.info("Creating a new folder in the blob storage:", f"insight_library/{user_persona}/{st.session_state.userId}")
|
| 1315 |
+
folder_path = f"insight_library/{user_persona}/{st.session_state.userId}/"
|
| 1316 |
+
container_client.upload_blob(folder_path, data=b'')
|
| 1317 |
+
next_file_number = get_max_blob_num(f"insight_library/{user_persona}/{st.session_state.userId}/") + 1
|
| 1318 |
+
# logger.info(f"Next file number: {next_file_number}")
|
| 1319 |
+
|
| 1320 |
+
try:
|
| 1321 |
+
save_insight(next_file_number, user_persona, insight_desc, base_prompt, base_code,selected_db, insight_prompt, insight_code, chart_prompt, chart_code)
|
| 1322 |
+
st.text(f'Insight #{next_file_number} with Graph and/or Data saved.')
|
| 1323 |
+
# logger.info(f'Insight #{next_file_number} with Graph and/or Data saved.')
|
| 1324 |
+
except Exception as e:
|
| 1325 |
+
st.write('Could not write the insight file.')
|
| 1326 |
+
logger.error(f"Error while writing insight file: {e}")
|
| 1327 |
+
except Exception as e:
|
| 1328 |
+
st.write(f"Please try again")
|
| 1329 |
+
logger.error(f"Error checking existing insights: {e}")
|
| 1330 |
|
| 1331 |
def get_insight_list(persona):
|
| 1332 |
try:
|
|
|
|
| 1435 |
st.image('logo.png')
|
| 1436 |
with col_bb:
|
| 1437 |
st.subheader("InsightLab - Data Visualize", divider='blue')
|
| 1438 |
+
st.markdown('**Select a dataset that you generated to visualize the dataset.**')
|
| 1439 |
with col_cc:
|
| 1440 |
st.markdown(APP_TITLE , unsafe_allow_html=True)
|
| 1441 |
|
|
|
|
| 1453 |
content = json.loads(blob_content)
|
| 1454 |
sql_query = content['sql']
|
| 1455 |
selected_db = content['database']
|
| 1456 |
+
st.session_state['visualize_df'] = execute_sql(sql_query, selected_db)
|
| 1457 |
# Create a StreamlitRenderer instance
|
| 1458 |
+
if st.session_state.get('visualize_df') is not None:
|
| 1459 |
+
with st.expander(label = '**Raw Dataset**'):
|
| 1460 |
+
display_paginated_dataframe(st.session_state['visualize_df'], "base_dataset_for_visualization")
|
| 1461 |
+
# st.write(st.session_state['visualize_df'])
|
| 1462 |
+
if st.button('Perform Visualizing'):
|
| 1463 |
+
pyg_app = StreamlitRenderer(st.session_state['visualize_df'])
|
| 1464 |
+
# Display the interactive visualization
|
| 1465 |
+
pyg_app.explorer()
|
| 1466 |
# pyg_html=pyg.walk(df).to_html()
|
| 1467 |
# components.html(pyg_html, height=1000, scrolling=True)
|
| 1468 |
except Exception as e:
|
| 1469 |
st.error(f"Error loading dataset: {e}")
|
| 1470 |
|
| 1471 |
+
def data_profiler():
|
| 1472 |
+
col_aa, col_bb, col_cc = st.columns([1, 4, 1], gap="small", vertical_alignment="center")
|
| 1473 |
+
with col_aa:
|
| 1474 |
+
st.image('logo.png')
|
| 1475 |
+
with col_bb:
|
| 1476 |
+
st.subheader("InsightLab - Data Profiler", divider='blue')
|
| 1477 |
+
st.markdown('**Select a dataset that you generated for detailed profiling report.**')
|
| 1478 |
+
with col_cc:
|
| 1479 |
+
st.markdown(APP_TITLE , unsafe_allow_html=True)
|
| 1480 |
+
|
| 1481 |
+
get_saved_query_blob_list()
|
| 1482 |
+
selected_query = st.selectbox('Select a saved query', [""] + list(st.session_state['query_display_dict'].keys()))
|
| 1483 |
+
|
| 1484 |
+
if len(selected_query) > 0:
|
| 1485 |
+
if 'selected_query' not in st.session_state or st.session_state['selected_query'] != selected_query:
|
| 1486 |
+
with st.container():
|
| 1487 |
+
s = selected_query[len("ID: "):]
|
| 1488 |
+
end_index = s.find(",")
|
| 1489 |
+
id = s[:end_index]
|
| 1490 |
+
try:
|
| 1491 |
+
blob_content = getBlobContent(f"{query_lib}{st.session_state.userId}/{id}.json")
|
| 1492 |
+
content = json.loads(blob_content)
|
| 1493 |
+
sql_query = content['sql']
|
| 1494 |
+
selected_db = content['database']
|
| 1495 |
+
st.session_state['profile_df'] = execute_sql(sql_query, selected_db)
|
| 1496 |
+
|
| 1497 |
+
if st.session_state.get('profile_df') is not None:
|
| 1498 |
+
with st.expander(label = '**Raw Dataset**'):
|
| 1499 |
+
display_paginated_dataframe(st.session_state['profile_df'], "base_dataset_for_profiling")
|
| 1500 |
+
# st.write(st.session_state['profile_df'])
|
| 1501 |
+
if st.button('Perform Profiling'):
|
| 1502 |
+
pr = st.session_state['profile_df'].profile_report()
|
| 1503 |
+
st_profile_report(pr)
|
| 1504 |
+
except Exception as e:
|
| 1505 |
+
st.error(f"Error loading dataset: {e}")
|
| 1506 |
+
|
| 1507 |
|
requirements.txt
CHANGED
|
@@ -9,6 +9,9 @@ altair_saver==0.5.0
|
|
| 9 |
httpx==0.27.2
|
| 10 |
duckdb
|
| 11 |
pygwalker
|
|
|
|
|
|
|
|
|
|
| 12 |
plotly
|
| 13 |
boto3
|
| 14 |
pyodbc
|
|
|
|
| 9 |
httpx==0.27.2
|
| 10 |
duckdb
|
| 11 |
pygwalker
|
| 12 |
+
ydata_profiling
|
| 13 |
+
pkg_resources
|
| 14 |
+
streamlit-pandas-profiling
|
| 15 |
plotly
|
| 16 |
boto3
|
| 17 |
pyodbc
|
utils/menu.py
CHANGED
|
@@ -90,8 +90,8 @@ def _authenticated_menu():
|
|
| 90 |
|
| 91 |
st.sidebar.page_link("pages/composer.py", label="Dataset Composer")
|
| 92 |
st.session_state.page = "composer"
|
| 93 |
-
|
| 94 |
-
|
| 95 |
st.sidebar.page_link("pages/visualize.py", label="Data Visualizer")
|
| 96 |
st.session_state.page = "visualize"
|
| 97 |
st.sidebar.page_link("pages/designer.py", label="Insight Designer")
|
|
|
|
| 90 |
|
| 91 |
st.sidebar.page_link("pages/composer.py", label="Dataset Composer")
|
| 92 |
st.session_state.page = "composer"
|
| 93 |
+
st.sidebar.page_link("pages/profiler.py",label="Data Profiler")
|
| 94 |
+
st.session_state.page = "profiler"
|
| 95 |
st.sidebar.page_link("pages/visualize.py", label="Data Visualizer")
|
| 96 |
st.session_state.page = "visualize"
|
| 97 |
st.sidebar.page_link("pages/designer.py", label="Insight Designer")
|