Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| #### Importing Modules #### | |
| import base64 | |
| import pandas as pd | |
| import streamlit as st | |
| from autoclean import data_clean | |
| from model_pipeline_steps import get_problem_type1, model_build | |
| from PIL import Image | |
| from DA_P1 import get_details, imbalnce_ratio, word_cloud, plotly_target, plot_ngram | |
| import pickle | |
| from NLP_text_classification import model_train, predict_text, predict_csv | |
| from kmeans import k_means | |
| from jinja2.ext import i18n | |
| info = {} | |
| #********* Handling rturn variable in cache memory to solve reloading issue in streamlit ******# | |
| def get_details_local(data): | |
| final_output = get_details(data) | |
| return final_output | |
| def clean(dataset, drop_features): | |
| cleaned_data, steps_dict = data_clean(dataset, drop_features) | |
| return cleaned_data, steps_dict | |
| def get_problem_type_local(cleaned_data, target_data): | |
| p_type = get_problem_type1(cleaned_data, target_data) | |
| return p_type | |
| def model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict): | |
| model = model_build(cleaned_data, target_data, p_type, balance_data, steps_dict) | |
| return model | |
| def model_train_local(dataset, input_feature, target_data, balance_data): | |
| model_info = model_train(dataset, input_feature, target_data, balance_data) | |
| return model_info | |
| def word_cloud_local(dataset, input_col): | |
| plt = word_cloud(dataset, input_col) | |
| return plt | |
| def plotly_target_local(dataset, tg_col): | |
| plt = plotly_target(dataset, tg_col) | |
| return plt | |
| def plot_ngram_local(dataset, tg_col): | |
| plt = plot_ngram(dataset, tg_col) | |
| return plt | |
| #******************************************************************# | |
| def main(): | |
| try: | |
| # setting tab title and icon | |
| st.set_page_config(page_title="AiNext", | |
| page_icon="image.png") | |
| # Hiding streamlit wateermark | |
| hide_streamlit_style = """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
| # To do Navigation Menu | |
| st.markdown( | |
| '<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">', | |
| unsafe_allow_html=True) | |
| st.markdown(""" | |
| <nav class="navbar fixed-top navbar-expand-lg navbar-dark" style="background-color: #AED6F1;"> | |
| <a class="navbar-brand" href=""><b><font color = "#8b0000">Ai</font><i style="color:#1997E5 ;">Next</i></b></a> | |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> | |
| <span class="navbar-toggler-icon"></span> | |
| </button> | |
| <div class="collapse navbar-collapse" id="navbarNav"> | |
| <ul class="navbar-nav"> | |
| <li class="nav-item active"> | |
| <a class="nav-link disabled" href="#" style="color:black ;">Home <span class="sr-only">(current)</span></a> | |
| </li> | |
| <li class="nav-item"> | |
| <a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:black ;" target="_blank">Contact Us</a> | |
| </li> | |
| </ul> | |
| </div> | |
| <div> | |
| <a style="color:red;" href="https://www.datamatics.com/" target="_blank"><b>DATAMATICS</b></a> | |
| </div> | |
| </nav> | |
| """, unsafe_allow_html=True) | |
| # Image in sidebar and link to mail | |
| image_loan = Image.open("AI.jpg") | |
| st.sidebar.image(image_loan, use_column_width=True) | |
| st.sidebar.markdown( | |
| """<a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:white ;" target="_blank">Mail us at - <u>technology.coe@digital.datamatics.com</u></a>""", | |
| unsafe_allow_html=True) | |
| # Upload CSV File | |
| st.header("Upload Input csv file") | |
| file_upload = st.file_uploader(" ", type=["csv"]) | |
| if file_upload is not None: | |
| # Selecting Experiment type (Supervised or UnSupervised) | |
| st.subheader("Select the Experiment type") | |
| exp_type = st.selectbox(label=' ', options=['Select', 'Supervised', 'UnSupervised']) | |
| print(exp_type) | |
| # **************************** Supervised Section ********************************** # | |
| if exp_type == "Supervised": | |
| st.subheader("Supervised") | |
| # read Dataset | |
| dataset = pd.read_csv(file_upload) | |
| # read columns | |
| cols = dataset.columns.tolist() | |
| st.text(" ") | |
| # Selecting features to drop | |
| st.subheader("choose the features which you want to drop") | |
| drop_features = st.multiselect('', cols) | |
| # print(drop_features) | |
| # Selecting target feature | |
| st.text(" ") | |
| st.subheader("Pick Your Target feature") | |
| target_data = st.selectbox(label=' ', options=cols, index=len(cols) - 1) | |
| # print(target_data) | |
| # **** Following code is to identify problem type is NLP text classification or Predictive analysis using Input feature **** # | |
| total_len = len(cols) | |
| drop_len = len(drop_features) | |
| problem_statement = "" | |
| input_feature_temp = "" | |
| st.sidebar.text(" ") | |
| sidebar_col1, sidebar_col2, sidebar_col3 = st.sidebar.beta_columns(3) | |
| if st.checkbox("Check Problem Type"): | |
| if (target_data not in drop_features) and ((total_len - drop_len) == 2): | |
| temp_data = dataset.drop(drop_features, axis=1) | |
| temp_data = temp_data.drop(target_data, axis=1) | |
| temp_col = temp_data.columns.tolist() | |
| print(temp_data.dtypes[temp_col[0]]) | |
| if temp_data.dtypes[temp_col[0]] == "object": | |
| print("NLP text Classification") | |
| html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>NLP Text Classification</button>" | |
| sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
| problem_statement = "NLP text Classification" | |
| input_feature_temp = temp_col[0] | |
| else: | |
| html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>" | |
| sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
| print("Predictive Analytics") | |
| problem_statement = "Predictive Analytics" | |
| elif (target_data not in drop_features) and ((total_len - drop_len) > 2): | |
| html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>" | |
| sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
| print("Predictive Analytics") | |
| problem_statement = "Predictive Analytics" | |
| elif (target_data in drop_features): | |
| st.error("Selected Target column is also selected to drop.So Can't proceed") | |
| #******************************************************************************************# | |
| # *********************** Predictive Analytics Section *************************************# | |
| if problem_statement == "Predictive Analytics" and problem_statement != "": | |
| # ************ Data Analysis Code goes here ********** # | |
| final_output = get_details_local(dataset) | |
| # print(dataset) | |
| st.text(" ") | |
| first = dataset.head(10) | |
| # last = dataset.tail(10) | |
| if st.button("Click here to Analyze Data"): | |
| container = st.beta_expander("Data Analysis and visualization Details") | |
| # c1,c2=container.beta_columns(2) | |
| container.subheader("First 10 Rows") | |
| container.write(first) | |
| # c2.subheader("Last 10 Rows") | |
| # c2.write(last) | |
| container.text(" ") | |
| overview_con = container.beta_container() | |
| overview_con.subheader("Overview of Dataset") | |
| overview_con.text(" ") | |
| ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3) | |
| ov_c1.write("Statistics") | |
| for key, value in final_output['overview']['data_statistics'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c1.text(temp) | |
| ov_c2.write("Variable Info") | |
| for key, value in final_output['overview']['variable_type'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c2.text(temp) | |
| ov_c3.write("Reproduction") | |
| for key, value in final_output['reproduction'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c3.text(temp) | |
| container.text(" ") | |
| numeric_con = container.beta_container() | |
| numeric_con.subheader("Numeric Variable Information") | |
| numeric_con.text(" ") | |
| for key, value in final_output['numerical_variable_info']['variable_info'].items(): | |
| numeric_con.text(" ") | |
| temp_key = "Numeric Column:" + str(key) | |
| numeric_con.write(temp_key) | |
| num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4) | |
| i = 1 | |
| for key1, value1 in value.items(): | |
| temp = str(key1) + ": " + str(value1) | |
| if (i <= 7): | |
| num_c1.text(temp) | |
| elif (i > 7 and i <= 14): | |
| num_c2.text(temp) | |
| elif (i > 14 and i <= 21): | |
| num_c3.text(temp) | |
| elif i > 21 and i <= 24: | |
| num_c4.text(temp) | |
| elif i > 24: | |
| numeric_con.plotly_chart(value1, config={'displaylogo': False}) | |
| i = i + 1 | |
| container.text(" ") | |
| categorical_con = container.beta_container() | |
| categorical_con.subheader("Categorical Variable Information") | |
| categorical_con.text(" ") | |
| for key, value in final_output['categorical_variable_info']['variable_info'].items(): | |
| categorical_con.text(" ") | |
| temp_key = "Categorical Column:" + str(key) | |
| categorical_con.write(temp_key) | |
| num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4) | |
| i = 1 | |
| for key1, value1 in value.items(): | |
| temp = str(key1) + ": " + str(value1) | |
| if (i <= 5): | |
| num_c1.text(temp) | |
| elif (i > 5 and i <= 10): | |
| num_c2.text(temp) | |
| elif (i > 10 and i <= 15): | |
| num_c3.text(temp) | |
| elif i > 15 and i <= 16: | |
| num_c4.text(temp) | |
| elif i > 16: | |
| categorical_con.plotly_chart(value1, config={'displaylogo': False}) | |
| i = i + 1 | |
| container.text(" ") | |
| container.text("Scatter chart Matrix") | |
| container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False}) | |
| container.text(" ") | |
| container.text(" ") | |
| corr_con = container.beta_container() | |
| corr_con.subheader("Correlation Matrix Information") | |
| corr_con.text(" ") | |
| # corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3) | |
| # j = 0 | |
| for key1, value1 in final_output['correlation_matrix_info'].items(): | |
| corr_con.text(" ") | |
| corr_con.write(key1) | |
| # col.pyplot(value) | |
| corr_con.plotly_chart(value1, config={'displaylogo': False}) | |
| # col.plotly_chart(value1,use_container_width=True) | |
| # j=j+1 | |
| container.text(" ") | |
| missing_con = container.beta_container() | |
| missing_con.subheader("Missing Values Information") | |
| missing_con.text(" ") | |
| mis_c1, mis_c2 = missing_con.beta_columns(2) | |
| mis_c3, mis_c4 = missing_con.beta_columns(2) | |
| k = 0 | |
| for key, value in final_output['missing_values_info'].items(): | |
| corr_con.text(" ") | |
| col = mis_c1 | |
| if k == 0: | |
| col = mis_c1 | |
| elif k == 1: | |
| col = mis_c2 | |
| elif k == 2: | |
| col = mis_c3 | |
| elif k == 3: | |
| col = mis_c4 | |
| col.write(key) | |
| col.pyplot(value) | |
| k = k + 1 | |
| # ********************************************************# | |
| # ****** Option for handling Imbalanced Dataset ******# | |
| st.text(" ") | |
| ir_res = imbalnce_ratio(dataset, target_data) | |
| ir_res = "Imbalance Ratio (" + ir_res + ")" | |
| st.subheader("Select below option to Handle Imbalanced Dataset (optional)") | |
| st.text(ir_res) | |
| balance_data = st.selectbox(label=' ', options=["Auto", "False"]) | |
| #*******************************************************# | |
| #********* Data Cleaning and Model Building code goes here *********# | |
| st.text(" ") | |
| if (st.checkbox('Start build model') is True) and (target_data not in drop_features): | |
| st.text(" ") | |
| cleaned_data, steps_dict = clean(dataset, drop_features) | |
| sample_data = cleaned_data.head() | |
| info['clean_data'] = sample_data | |
| info['auto_drop'] = steps_dict['auto_drop'] | |
| p_type = get_problem_type_local(cleaned_data, target_data) | |
| statement_ptype = "Problem type :" + p_type | |
| info['problem'] = statement_ptype | |
| statement_target = "Target column: " + target_data | |
| info['target_statement'] = statement_target | |
| info['target'] = target_data | |
| model = model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict) | |
| info['model'] = model | |
| info['step_dict'] = steps_dict | |
| elif target_data in drop_features: | |
| st.error("Selected Target column is also selected to drop.So Can't proceed") | |
| #**********************************************************************************# | |
| # print(info) | |
| # ******************* Model Result ***********************# | |
| if info: | |
| for columns in info['auto_drop']: | |
| txt = "automatically dropped column: " + columns | |
| st.write(txt) | |
| st.text(" ") | |
| st.subheader("After Cleaning data") | |
| st.write(info['clean_data']) | |
| st.write(info['problem']) | |
| st.write(info['target_statement']) | |
| # print(info['model']) | |
| for key, val in info['model'].items(): | |
| st.text(" ") | |
| # if key == "Regression graph" : | |
| # st.write(key) | |
| # st.pyplot(val) | |
| if key == "Best pipeline" or key == "step_dict": | |
| pass | |
| elif key == "ROC Curve" or key == "model_comparison" or key == "Regression graph": | |
| st.write(key) | |
| st.plotly_chart(val, config={'displaylogo': False}) | |
| elif key == "Classification Report": | |
| st.write(key) | |
| st.text(val) | |
| elif key == "Handling Imbalanced Dataset": | |
| st.write(key) | |
| for key1, val1 in val.items(): | |
| st.write(key1) | |
| st.text(val1) | |
| else: | |
| st.write(key) | |
| st.write(val) | |
| st.text(" ") | |
| st.text(" ") | |
| # ***************************************************************# | |
| # ************************** Prediction **************************# | |
| st.subheader("Upload csv file for Predictions : ") | |
| file_upload1 = st.file_uploader(" ", type=["csv"]) | |
| print(file_upload1) | |
| if file_upload1 is not None: | |
| try: | |
| test_data = pd.read_csv(file_upload1) | |
| data = test_data.copy() | |
| data.drop(info['step_dict']['dropped_features'], axis=1, inplace=True) | |
| for col in data.columns: | |
| data[col].fillna(info['step_dict']['missing_values'][col], inplace=True) | |
| # print(info['target']) | |
| for data1 in info['step_dict']['categorical_to_numeric']: | |
| for key, value in data1.items(): | |
| col_name = key.split('_encoded')[0] | |
| if col_name != info['target']: | |
| # print(col_name) | |
| # print(value) | |
| data[col_name].replace(value, inplace=True) | |
| if info['target'] in data.columns: data.drop([info['target']], axis=1, inplace=True) | |
| final_model = info['model']['Best pipeline'] | |
| # print(final_model) | |
| predictions = final_model.predict(data) | |
| # print(predictions) | |
| print(len(test_data)) | |
| print(len(predictions)) | |
| predict_column_name = info['target'] + "_prediction" | |
| test_data[predict_column_name] = predictions | |
| for data1 in info['step_dict']['categorical_to_numeric']: | |
| for key, value in data1.items(): | |
| col_name = key.split('_encoded')[0] | |
| if col_name == info['target']: | |
| # print(col_name) | |
| # print(value) | |
| d = {} | |
| for i, v in value.items(): | |
| d[v] = i | |
| test_data[predict_column_name].replace(d, inplace=True) | |
| # csv = test_data.to_csv(index=False) | |
| # b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here | |
| # href = f'<a href="data:file/csv;base64,{b64}">Download The Prediction Results CSV File</a> (right-click and save as <some_name>.csv)' | |
| csv = test_data.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| output_model = pickle.dumps(final_model) | |
| b64 = base64.b64encode(output_model).decode() | |
| href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> ' | |
| st.markdown(href, unsafe_allow_html=True) | |
| except Exception as e: | |
| st.text(e) | |
| st.error("Uploaded wrong data for prediction") | |
| # ***************************************************************************# | |
| # *********************** End of Predictive Analytics Section *************************************# | |
| # *********************** NLP text Classification Section *************************************# | |
| elif problem_statement == "NLP text Classification" and problem_statement != "": | |
| try: | |
| # ********* Data Analysis and visualization code ************** # | |
| st.text(" ") | |
| vis_con = st.beta_expander("Data Visualization") | |
| st.text(" ") | |
| vis_con.subheader("Select Input Feature") | |
| select_col = ["Select"] | |
| t_cols = select_col + cols | |
| input_col = vis_con.selectbox(label=' ', options=t_cols) | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| res = word_cloud_local(dataset, input_col) | |
| if res is not None: vis_con.plotly_chart(res) | |
| true_bigrams = plot_ngram_local(dataset, input_col) | |
| if true_bigrams is not None: vis_con.plotly_chart(true_bigrams, config={'displaylogo': False}) | |
| st.text(" ") | |
| vis_con.subheader("Select target Feature") | |
| tg_col = vis_con.selectbox(label=' ', options=t_cols) | |
| plot_res = plotly_target_local(dataset, tg_col) | |
| if plot_res is not None: vis_con.plotly_chart(plot_res, config={'displaylogo': False}) | |
| #*****************************************************************************************# | |
| # ****** Option for handling Imbalanced Dataset ****** # | |
| input_feature = input_feature_temp | |
| st.text(" ") | |
| ir_res = imbalnce_ratio(dataset, target_data) | |
| ir_res = "Imbalance Ratio (" + ir_res + ")" | |
| st.subheader("Select below option to Handle Imbalanced Dataset (optional)") | |
| st.text(ir_res) | |
| balance_data = st.selectbox(label=' ', options=["Auto", "False"]) | |
| #***********************************************************# | |
| # ********* Data Cleaning and Model Building code goes here *********# | |
| st.text(" ") | |
| if st.checkbox("Start Build model") and input_feature != target_data: | |
| model_info = model_train_local(dataset, input_feature, target_data, balance_data) | |
| #************ Model Result ***************# | |
| for key, val in model_info.items(): | |
| st.text(" ") | |
| if key == "Classification Report": | |
| st.write(key) | |
| st.text(val) | |
| elif key == "model_comparison" or key == "ROC Curve": | |
| st.write(key) | |
| st.plotly_chart(val, config={'displaylogo': False}) | |
| elif key == "Handling Imbalanced Dataset": | |
| st.write(key) | |
| for key1, val1 in val.items(): | |
| st.write(key1) | |
| st.text(val1) | |
| elif key == "Best pipeline" or key == "tfidf_vector": | |
| pass | |
| else: | |
| st.write(key) | |
| st.write(val) | |
| #***********************************************************# | |
| # ****************** Prediction ******************* # | |
| c1, c2 = st.beta_columns(2) | |
| exp1 = c1.beta_expander("Prediction on text data") | |
| exp2 = c2.beta_expander("Prediction on csv data") | |
| form_predict = exp1.form("predict") | |
| text_val = form_predict.text_area("Enter text for prediction") | |
| if form_predict.form_submit_button("Predict") and text_val != "": | |
| prediction = predict_text(text_val, model_info["Best pipeline"], | |
| model_info["tfidf_vector"]) | |
| prediction = "Result :" + str(prediction[0]) | |
| form_predict.write(prediction) | |
| f_up = exp2.file_uploader("predict_csv", type=["csv"]) | |
| if f_up and exp2.button("Predict"): | |
| df = pd.read_csv(f_up, encoding='ISO-8859-1') | |
| df_copy = df.copy() | |
| predictions = predict_csv(df_copy, model_info["Best pipeline"], | |
| model_info["tfidf_vector"], input_feature) | |
| predict_column_name = target_data + "_prediction" | |
| df[predict_column_name] = predictions | |
| csv = df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>' | |
| exp2.markdown(href, unsafe_allow_html=True) | |
| output_model = pickle.dumps(model_info["Best pipeline"]) | |
| b64 = base64.b64encode(output_model).decode() | |
| href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> ' | |
| exp2.markdown(href, unsafe_allow_html=True) | |
| print("completed") | |
| elif target_data == input_feature: | |
| st.error("Input feature and target data cannot be same") | |
| except Exception as e: | |
| st.error(e) | |
| st.error("Something went wrong") | |
| # ****************************************************** # | |
| # *********************** End of NLP text Classification Section *************************************# | |
| # ************************* End of Supervised Section **************************************************# | |
| # **************************** UnSupervised Section (In Progress) ********************************** # | |
| elif exp_type == "UnSupervised": | |
| st.subheader("UnSupervised") | |
| # ************ Data Analysis Code goes here ********** # | |
| dataset = pd.read_csv(file_upload) | |
| final_output = get_details_local(dataset) | |
| cols = dataset.columns.tolist() | |
| # print(dataset) | |
| st.text(" ") | |
| first = dataset.head(10) | |
| # last = dataset.tail(10) | |
| if st.button("Click here to Analyze Data"): | |
| container = st.beta_expander("Data Analysis and visualization Details") | |
| # c1,c2=container.beta_columns(2) | |
| container.subheader("First 10 Rows") | |
| container.write(first) | |
| # c2.subheader("Last 10 Rows") | |
| # c2.write(last) | |
| container.text(" ") | |
| overview_con = container.beta_container() | |
| overview_con.subheader("Overview of Dataset") | |
| overview_con.text(" ") | |
| ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3) | |
| ov_c1.write("Statistics") | |
| for key, value in final_output['overview']['data_statistics'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c1.text(temp) | |
| ov_c2.write("Variable Info") | |
| for key, value in final_output['overview']['variable_type'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c2.text(temp) | |
| ov_c3.write("Reproduction") | |
| for key, value in final_output['reproduction'].items(): | |
| temp = str(key) + ": " + str(value) | |
| ov_c3.text(temp) | |
| container.text(" ") | |
| numeric_con = container.beta_container() | |
| numeric_con.subheader("Numeric Variable Information") | |
| numeric_con.text(" ") | |
| for key, value in final_output['numerical_variable_info']['variable_info'].items(): | |
| numeric_con.text(" ") | |
| temp_key = "Numeric Column:" + str(key) | |
| numeric_con.write(temp_key) | |
| num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4) | |
| i = 1 | |
| for key1, value1 in value.items(): | |
| temp = str(key1) + ": " + str(value1) | |
| if (i <= 7): | |
| num_c1.text(temp) | |
| elif (i > 7 and i <= 14): | |
| num_c2.text(temp) | |
| elif (i > 14 and i <= 21): | |
| num_c3.text(temp) | |
| elif i > 21 and i <= 24: | |
| num_c4.text(temp) | |
| elif i > 24: | |
| numeric_con.plotly_chart(value1, config={'displaylogo': False}) | |
| i = i + 1 | |
| container.text(" ") | |
| categorical_con = container.beta_container() | |
| categorical_con.subheader("Categorical Variable Information") | |
| categorical_con.text(" ") | |
| for key, value in final_output['categorical_variable_info']['variable_info'].items(): | |
| categorical_con.text(" ") | |
| temp_key = "Categorical Column:" + str(key) | |
| categorical_con.write(temp_key) | |
| num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4) | |
| i = 1 | |
| for key1, value1 in value.items(): | |
| temp = str(key1) + ": " + str(value1) | |
| if (i <= 5): | |
| num_c1.text(temp) | |
| elif (i > 5 and i <= 10): | |
| num_c2.text(temp) | |
| elif (i > 10 and i <= 15): | |
| num_c3.text(temp) | |
| elif i > 15 and i <= 16: | |
| num_c4.text(temp) | |
| elif i > 16: | |
| categorical_con.plotly_chart(value1, config={'displaylogo': False}) | |
| i = i + 1 | |
| container.text(" ") | |
| container.text("Scatter chart Matrix") | |
| container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False}) | |
| container.text(" ") | |
| container.text(" ") | |
| corr_con = container.beta_container() | |
| corr_con.subheader("Correlation Matrix Information") | |
| corr_con.text(" ") | |
| # corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3) | |
| # j = 0 | |
| for key1, value1 in final_output['correlation_matrix_info'].items(): | |
| corr_con.text(" ") | |
| corr_con.write(key1) | |
| # col.pyplot(value) | |
| corr_con.plotly_chart(value1, config={'displaylogo': False}) | |
| # col.plotly_chart(value1,use_container_width=True) | |
| # j=j+1 | |
| container.text(" ") | |
| missing_con = container.beta_container() | |
| missing_con.subheader("Missing Values Information") | |
| missing_con.text(" ") | |
| mis_c1, mis_c2 = missing_con.beta_columns(2) | |
| mis_c3, mis_c4 = missing_con.beta_columns(2) | |
| k = 0 | |
| for key, value in final_output['missing_values_info'].items(): | |
| corr_con.text(" ") | |
| col = mis_c1 | |
| if k == 0: | |
| col = mis_c1 | |
| elif k == 1: | |
| col = mis_c2 | |
| elif k == 2: | |
| col = mis_c3 | |
| elif k == 3: | |
| col = mis_c4 | |
| col.write(key) | |
| col.pyplot(value) | |
| k = k + 1 | |
| # ********************************************************# | |
| # *********** Selecting Model for clustering ***********# | |
| st.subheader("Select the Model") | |
| model = st.selectbox(label=' ', options=['Select', 'KMeans']) | |
| #********************************************************# | |
| # ******* Data cleaning and checking with elbow technique using Kmeans clustering *******# | |
| if model == "KMeans": | |
| st.text(" ") | |
| st.subheader("choose the features which you want to drop") | |
| drop_features = st.multiselect('', cols) | |
| st.text(" ") | |
| cleaned_data, steps_dict = clean(dataset, drop_features) | |
| sample_data = cleaned_data.head() | |
| info['clean_data'] = sample_data | |
| info['auto_drop'] = steps_dict['auto_drop'] | |
| val1 = k_means(dataset, cols, drop_features, sample_data) | |
| st.write("Elbow-Curve") | |
| st.plotly_chart(val1, config={'displaylogo': False}) | |
| # st.write("Silhouette-Score") | |
| # st.plotly_chart(val2, config={'displaylogo': False}) | |
| # ******************************************************************************* # | |
| else: | |
| pass | |
| # **************************** End of UnSupervised Section ********************************** # | |
| except Exception as e: | |
| st.header(e) | |
| if __name__ == '__main__': | |
| main() | |