Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,261 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from streamlit_ace import st_ace
|
|
|
|
| 3 |
|
| 4 |
-
code= "def f1(x): return str(x * 3)"
|
| 5 |
-
exec(code)
|
| 6 |
-
st.write(f1(3))
|
| 7 |
|
| 8 |
-
content = st_ace(language="python")
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
st.
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
from streamlit_option_menu import option_menu
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import plotly.express as px
|
| 6 |
+
from plotly.subplots import make_subplots
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
from streamlit_ace import st_ace
|
| 9 |
+
from streamlit_pandas_profiling import st_profile_report
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
+
def set_data_files_session_object(file_name, file_path):
|
| 14 |
+
if 'data_files' not in st.session_state:
|
| 15 |
+
files_dictionary = {}
|
| 16 |
+
files_dictionary[file_name] = file_path
|
| 17 |
+
st.session_state['data_files'] = files_dictionary
|
| 18 |
+
else:
|
| 19 |
+
files_dictionary = st.session_state['data_files']
|
| 20 |
+
files_dictionary[file_name] = file_path
|
| 21 |
+
st.session_state['data_files'] = files_dictionary
|
| 22 |
+
|
| 23 |
+
def set_filtered_data_session_object(df, file_name):
|
| 24 |
+
if 'filtered_data' not in st.session_state:
|
| 25 |
+
filtered_data_dictionary = {}
|
| 26 |
+
filtered_data_dictionary[file_name] = df
|
| 27 |
+
st.session_state['filtered_data'] = filtered_data_dictionary
|
| 28 |
+
else:
|
| 29 |
+
filtered_data_dictionary = st.session_state['filtered_data']
|
| 30 |
+
filtered_data_dictionary[file_name] = df
|
| 31 |
+
st.session_state['filtered_data'] = filtered_data_dictionary
|
| 32 |
+
|
| 33 |
+
def set_dataframe_session_object(file_name, file_path):
|
| 34 |
+
if 'data_frames' not in st.session_state:
|
| 35 |
+
data_frame_dictionary = {}
|
| 36 |
+
data_frame_dictionary[file_name] = pd.read_csv(file_path)
|
| 37 |
+
st.session_state['data_frames'] = data_frame_dictionary
|
| 38 |
+
else:
|
| 39 |
+
data_frame_dictionary = st.session_state['data_frames']
|
| 40 |
+
data_frame_dictionary[file_name] = pd.read_csv(file_path)
|
| 41 |
+
st.session_state['data_frames'] = data_frame_dictionary
|
| 42 |
+
|
| 43 |
+
def save_file(file_object):
|
| 44 |
+
file_path = os.path.join(os.getcwd(), "uploaded_files", file_object.name)
|
| 45 |
+
with open(file_path, "wb") as f:
|
| 46 |
+
f.write(file_object.getbuffer())
|
| 47 |
+
|
| 48 |
+
set_data_files_session_object(file_object.name, file_path)
|
| 49 |
+
set_dataframe_session_object(file_object.name, file_path)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def create_upload_file_component():
|
| 55 |
+
uploaded_files = st.file_uploader("Upload one file at a time.", type=['csv', 'xls', 'xlsx', 'pkl', 'pdf'],
|
| 56 |
+
accept_multiple_files=True)
|
| 57 |
+
|
| 58 |
+
if uploaded_files:
|
| 59 |
+
|
| 60 |
+
os.makedirs(os.path.join(os.getcwd(), "uploaded_files"), mode=0o777, exist_ok=True)
|
| 61 |
+
for uploaded_file in uploaded_files:
|
| 62 |
+
save_file(uploaded_file)
|
| 63 |
+
|
| 64 |
+
def create_component_to_add_target_func(selected_files, dfs, i):
|
| 65 |
+
target_var_name = st.text_input("Name of the target variable",key="target_var" + str(i))
|
| 66 |
+
# content = st_ace(language="python")
|
| 67 |
+
# if content:
|
| 68 |
+
code= "def f1(x): return str(x * 3)"
|
| 69 |
+
exec(code)
|
| 70 |
+
st.write(f1(3))
|
| 71 |
+
|
| 72 |
+
# st.write(len(content.splitlines()))
|
| 73 |
+
# exec(content)
|
| 74 |
+
# code= "def f1(x): return str(x * 3)"
|
| 75 |
+
|
| 76 |
+
# exec(code)
|
| 77 |
+
# st.text(content)
|
| 78 |
+
# st.write(f1(3))
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def create_component_for_analysis_for_single_df(selected_files, dfs, i):
|
| 82 |
+
st.subheader(selected_files[i])
|
| 83 |
+
df = dfs[selected_files[i]]
|
| 84 |
+
|
| 85 |
+
filter_data = st.checkbox("Analyse on Filtered Data",key="filter_data_check"+str(i))
|
| 86 |
+
|
| 87 |
+
if filter_data:
|
| 88 |
+
action = "data_filter"
|
| 89 |
+
col_to_filter = st.selectbox("Select the field to Filter on ", df.columns.values,
|
| 90 |
+
key= action + "_col_filter_" + str(i))
|
| 91 |
+
filter_operation = st.selectbox("Operation ",
|
| 92 |
+
['Greater Than', 'Equals', 'Less Than', "In", "In Between"],
|
| 93 |
+
key=action + "_col_filter_op_" + str(i))
|
| 94 |
+
selected_filter_vals = None
|
| 95 |
+
|
| 96 |
+
if filter_operation:
|
| 97 |
+
if filter_operation == 'In':
|
| 98 |
+
selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
|
| 99 |
+
key=action + "_col_filter_val_" + str(i))
|
| 100 |
+
if selected_filter_vals:
|
| 101 |
+
filtered_df = df[df[col_to_filter].isin(selected_filter_vals)]
|
| 102 |
+
elif filter_operation == 'Equals':
|
| 103 |
+
selected_filter_vals = st.text_input("Enter a numeric value",
|
| 104 |
+
key=action + "_col_filter_val_" + str(i))
|
| 105 |
+
if selected_filter_vals:
|
| 106 |
+
filtered_df = df[df[col_to_filter] == selected_filter_vals]
|
| 107 |
+
elif filter_operation == 'Greater Than':
|
| 108 |
+
selected_filter_vals = st.text_input("Enter a numeric value",
|
| 109 |
+
key=action + "_col_filter_val_" + str(i))
|
| 110 |
+
if selected_filter_vals:
|
| 111 |
+
filtered_df = df[df[col_to_filter] > selected_filter_vals]
|
| 112 |
+
elif filter_operation == 'Less Than':
|
| 113 |
+
selected_filter_vals = st.text_input("Enter a numeric value",
|
| 114 |
+
key=action + "_col_filter_val_" + str(i))
|
| 115 |
+
if selected_filter_vals:
|
| 116 |
+
filtered_df = df[df[col_to_filter] < selected_filter_vals]
|
| 117 |
+
elif filter_operation == 'In Between':
|
| 118 |
+
selected_filter_vals = st.select_slider("Select range",
|
| 119 |
+
(df[col_to_filter].min(), df[col_to_filter].max()),
|
| 120 |
+
key=action + "_col_filter_val_" + str(i))
|
| 121 |
+
if selected_filter_vals:
|
| 122 |
+
filtered_df = df[df[col_to_filter] < selected_filter_vals]
|
| 123 |
+
|
| 124 |
+
if selected_filter_vals:
|
| 125 |
+
set_filtered_data_session_object(filtered_df,selected_files[i])
|
| 126 |
+
# st.write(df.shape)
|
| 127 |
+
# st.write( st.session_state['filtered_data'][selected_files[i]].shape)
|
| 128 |
+
|
| 129 |
+
analysis_actions = st.multiselect("What analysis do you wish to do?",
|
| 130 |
+
['Summary of Data', 'Sample Data','Get Profile' ,'Univariate Analysis',
|
| 131 |
+
'Bivariate Analysis','Add a Target Column'], key='analysis_action_' + str(i))
|
| 132 |
+
if analysis_actions:
|
| 133 |
+
|
| 134 |
+
df_for_analysis = st.session_state['filtered_data'][selected_files[i]] if filter_data else df
|
| 135 |
+
|
| 136 |
+
for action in analysis_actions:
|
| 137 |
+
|
| 138 |
+
if action == 'Sample Data':
|
| 139 |
+
st.write(df_for_analysis.sample(10))
|
| 140 |
+
elif action == 'Get Profile':
|
| 141 |
+
|
| 142 |
+
pr = df_for_analysis.profile_report()
|
| 143 |
+
st_profile_report(pr)
|
| 144 |
+
|
| 145 |
+
elif action == 'Summary of Data':
|
| 146 |
+
st.write(df_for_analysis.describe())
|
| 147 |
+
# col_to_filter = st.selectbox("Select the field to Filter on ", df.columns.values,
|
| 148 |
+
# key=action + "_col_filter_" + str(i))
|
| 149 |
+
# selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
|
| 150 |
+
# key=action + "_col_filter_val_" + str(i))
|
| 151 |
+
elif action == 'Univariate Analysis':
|
| 152 |
+
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
|
| 153 |
+
for col in cols_for_analysis:
|
| 154 |
+
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
|
| 155 |
+
|
| 156 |
+
fig = px.scatter(x=df_for_analysis.index, y=df_for_analysis[col],labels=dict(x="Index", y=col))
|
| 157 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 158 |
+
|
| 159 |
+
elif str(df_for_analysis[col].dtype) in ['object','category'] or df_for_analysis[col].nunique() <= 10:
|
| 160 |
+
|
| 161 |
+
value_dist_df = df_for_analysis[col].value_counts(normalize=True)[:20].reset_index()
|
| 162 |
+
value_dist_df.columns = [col,'% Distribution']
|
| 163 |
+
|
| 164 |
+
value_dist_df_counts = df_for_analysis[col].value_counts()[:20].reset_index()
|
| 165 |
+
value_dist_df_counts.columns = [col,'Count']
|
| 166 |
+
value_dist_df = value_dist_df.merge(value_dist_df_counts,on=col)
|
| 167 |
+
|
| 168 |
+
trace1 = go.Bar(x=value_dist_df[col],y=value_dist_df['Count'],name='Count',marker=dict(color='rgb(34,163,192)'))
|
| 169 |
+
trace2 = go.Scatter(x=value_dist_df[col],y=value_dist_df['% Distribution'],name='% Distribution',yaxis='y2')
|
| 170 |
+
|
| 171 |
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
| 172 |
+
fig.add_trace(trace1)
|
| 173 |
+
fig.add_trace(trace2,secondary_y=True)
|
| 174 |
+
|
| 175 |
+
fig['layout'].update(height = 600, width = 800, title = f"{col} data distribution",xaxis=dict(tickangle=-90))
|
| 176 |
+
|
| 177 |
+
# fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
|
| 178 |
+
|
| 179 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 180 |
+
|
| 181 |
+
elif action == "Add a Target Column":
|
| 182 |
+
# create_component_to_add_target_func(selected_files, dfs, i)
|
| 183 |
+
code= "def f1(x): return str(x * 3)"
|
| 184 |
+
exec(code)
|
| 185 |
+
st.write(f1(3))
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def create_component_for_data_analysis():
|
| 189 |
+
if 'data_files' in st.session_state:
|
| 190 |
+
|
| 191 |
+
selected_files = st.multiselect("Select the File(S) to analyze", st.session_state['data_files'].keys())
|
| 192 |
+
|
| 193 |
+
if selected_files:
|
| 194 |
+
cols = st.columns(len(selected_files))
|
| 195 |
+
|
| 196 |
+
dfs = {}
|
| 197 |
+
|
| 198 |
+
for selected_file in selected_files:
|
| 199 |
+
if selected_file in st.session_state['data_frames']:
|
| 200 |
+
dfs[selected_file] = st.session_state['data_frames'][selected_file]
|
| 201 |
+
else:
|
| 202 |
+
st.session_state['data_frames'][selected_file] = pd.read_csv(st.session_state['data_files'][selected_file])
|
| 203 |
+
dfs[selected_file] = st.session_state['data_frames'][selected_file]
|
| 204 |
+
|
| 205 |
+
for i, col in enumerate(cols):
|
| 206 |
+
with col:
|
| 207 |
+
create_component_for_analysis_for_single_df(selected_files, dfs, i)
|
| 208 |
+
|
| 209 |
+
else:
|
| 210 |
+
st.write("Upload a file to start analysis")
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# def build_interface_for_model_analysis():
|
| 214 |
+
st.title("Model Results Analyzer")
|
| 215 |
+
with st.sidebar:
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
selected_menu = option_menu(None, ["Home", "Upload Data", "Add Features","Analyze Data"],
|
| 219 |
+
icons=['house', 'cloud-upload', "list-task", 'gear'],
|
| 220 |
+
menu_icon="cast", default_index=0, orientation="vertical",
|
| 221 |
+
styles={
|
| 222 |
+
"container": {"padding": "0!important", "background-color": "#fafafa"},
|
| 223 |
+
"icon": {"color": "orange", "font-size": "15px"},
|
| 224 |
+
"nav-link": {"font-size": "15px", "text-align": "left", "margin": "0px",
|
| 225 |
+
"--hover-color": "#eee"},
|
| 226 |
+
"nav-link-selected": {"background-color": "green"},
|
| 227 |
+
})
|
| 228 |
+
|
| 229 |
+
if selected_menu == "Home":
|
| 230 |
+
st.markdown('**This is to analyse models performance.**')
|
| 231 |
+
|
| 232 |
+
elif selected_menu == "Upload Data":
|
| 233 |
+
|
| 234 |
+
create_upload_file_component()
|
| 235 |
+
|
| 236 |
+
if 'data_files' in st.session_state:
|
| 237 |
+
st.write(pd.DataFrame(
|
| 238 |
+
data={"File Name": pd.DataFrame.from_dict(st.session_state['data_files'], orient='index').index}))
|
| 239 |
+
|
| 240 |
+
elif selected_menu == "Analyze Data":
|
| 241 |
+
create_component_for_data_analysis()
|
| 242 |
+
|
| 243 |
+
elif selected_menu == "Add Features":
|
| 244 |
+
if 'data_files' in st.session_state:
|
| 245 |
+
selected_file = st.selectbox("Select the File(S) to analyze", st.session_state['data_files'].keys())
|
| 246 |
+
|
| 247 |
+
if selected_file:
|
| 248 |
+
df = st.session_state['data_frames'][selected_file]
|
| 249 |
+
st.header("Enter the function definiton to create a new feature")
|
| 250 |
+
feature_name = st.text_input("Enter the New Feature Name")
|
| 251 |
+
st.warning("please retain the function signature as 'add_feature(row)'")
|
| 252 |
+
|
| 253 |
+
content = st_ace(language="python",value="def add_feature(row):")
|
| 254 |
+
|
| 255 |
+
if content != 'def add_feature(row):':
|
| 256 |
+
exec(content)
|
| 257 |
+
df[feature_name] = df.apply(lambda x:add_feature(x),axis=1)
|
| 258 |
+
|
| 259 |
+
st.session_state['data_frames'][selected_file] = df
|
| 260 |
+
st.write(df.columns.values)
|
| 261 |
+
|