import streamlit as st import pandas as pd import os from scipy.stats import shapiro, kstest from scipy import stats import numpy as np import matplotlib.pyplot as plt from io import StringIO st.title("Distribution Predictor") st.divider() st.subheader("Select the type of file needed to be Uploaded: ") f_format = st.radio( "Select the data format", ['.csv', '.txt'], index=None ) st.divider() if f_format == '.csv': col_name = st.text_input('Column to get') col_name = str(col_name) st.divider() upload_file = st.file_uploader("Upload File") st.divider() if upload_file is not None: df = pd.read_csv(upload_file) data = np.array(df[col_name]) fig, ax = plt.subplots() ax.hist(data, bins=100, density=True) stat_n, p_val_n = shapiro(data) stat_p, p_val_p = kstest(data, 'poisson', (5, 0)) st.divider() if p_val_n > 0.1: st.write("Data follows Normal Distribution") else: st.write("Data does not follow Normal Distribution") st.pyplot(fig) elif f_format == '.txt': upload_file = st.file_uploader("Upload File") st.divider() if upload_file is not None: stringio = StringIO(upload_file.getvalue().decode("utf-8")) stringio = stringio.getvalue() data = stringio.split(',') data_f = [] for i in range(len(data) -1): data_f.append(float(data[i])) fig, ax = plt.subplots() ax.hist(data_f, bins=100, density=True) stat_n, p_val_n = shapiro(data_f) stat_p, p_val_p = kstest(data_f, 'poisson', (5, 0)) st.divider() if p_val_n > 0.1: st.write("Data follows Normal Distribution") else: st.write("Data does not follow Normal Distribution") st.pyplot(fig)