import streamlit as st
import pandas as pd
import os
from scipy.stats import shapiro, kstest
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO

st.title("Distribution Predictor")
st.divider()
st.subheader("Select the type of file needed to be Uploaded: ")
f_format = st.radio(
    "Select the data format",
    ['.csv', '.txt'],
    index=None
)
st.divider()

if f_format == '.csv':
    col_name = st.text_input('Column to get')
    col_name = str(col_name)
    st.divider()
    upload_file = st.file_uploader("Upload File")
    st.divider()

    if upload_file is not None:
        df = pd.read_csv(upload_file)
        data = np.array(df[col_name])
        fig, ax = plt.subplots()
        ax.hist(data, bins=100, density=True)
        stat_n, p_val_n = shapiro(data)
        stat_p, p_val_p = kstest(data, 'poisson', (5, 0))
        
        st.divider()
        if p_val_n > 0.1:
            st.write("Data follows Normal Distribution")
        else:
            st.write("Data does not follow Normal Distribution")
        
        st.pyplot(fig)

elif f_format == '.txt':
    upload_file = st.file_uploader("Upload File")
    st.divider()
    if upload_file is not None:
        stringio = StringIO(upload_file.getvalue().decode("utf-8"))
        stringio = stringio.getvalue()
        data = stringio.split(',')
        data_f = []
        for i in range(len(data) -1):
            data_f.append(float(data[i]))
        fig, ax = plt.subplots()
        ax.hist(data_f, bins=100, density=True)
        stat_n, p_val_n = shapiro(data_f)
        stat_p, p_val_p = kstest(data_f, 'poisson', (5, 0))
        st.divider()
        if p_val_n > 0.1:
            st.write("Data follows Normal Distribution")
        else:
            st.write("Data does not follow Normal Distribution")
        
        st.pyplot(fig)