EXPLORATORY DATA ANALYSIS

import numpy as np
import streamlit as st
import pandas as pd
import os
from ui import *
from mLogsFunctions import *

#Streamlit Dashboard------------------------------------------------------------------------------------------
pagetile = """<center><h1>EXPLORATORY DATA ANALYSIS</h1></center>"""
set_page_config(page='custom')
hide_menu_button()
condense_layout()

logo_site, info_site = st.columns([1.5, 8.5])
with logo_site:
    st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
with info_site:
    # st.set_option('deprecation.showfileUploaderEncoding', False)
    # st.set_option('maxUploadSize', 200*1024) # 200 MB
    st.markdown(pagetile, unsafe_allow_html=True)
    # Option 1: CSV File Loading
    st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
    st.subheader("1. CSV File Loading")
    df = csv_uploader()
    df = tweak_data(df,resample=False, reindex=True)

    # Option 2: CSV from LAS Exploration
    st.subheader("2. CSV from LAS Exploration")
    dir_path = 'data/merged/'
    csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
    selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')

    # # Đọc file csv được chọn vào DataFrame
    if selected_csv_file: # Nếu người dùng đã chọn file CSV
        # Đọc file csv được chọn vào DataFrame
        file_path = 'data/merged/'
        merged_data = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
        df = tweak_data(merged_data, resample=False, reindex=True)
    else: # Nếu người dùng không chọn file CSV
        merged_data = df
        df = tweak_data(merged_data, resample=False, reindex=True)
#|CHECK DATA EXISTENCE-----------------------------------------------------------------------------------------
if df is not None:
    curves = columns_list(df, no_depth=True, no_well=True)
    well_names = np.sort(df.WELL.unique())
#|TABS-ESTABLISHING-----------------------------------------------------------------------------------------
    tab1, tab2, tab3, tab4, tab5 = st.tabs(['DataFrame', 
                                      'DataStatistics',
                                      '3D Scatter Points', 
                                      'CurvesView',
                                      'OutliersRemoval'
                                      ])
    #|TABS-1-----------------------------------------------------------------------------------------
    st.write('---')
    with tab1:
        st.dataframe(df, width=1400, height=500)

    #|TABS-2-----------------------------------------------------------------------------------------
    with tab2:
        st.radio('DataVisualizationMethod',
                key='displayTab2',
                options=['DataStatistics', 
                        'Missing Statistic', 
                        'Curve Distribution', 
                        'Histogram Overlay',
                        'Cross-Plot',
                        'PairPlot'],
                horizontal=True)
        if st.session_state.displayTab2 == 'DataStatistics':
            subtab21(df, well_names)
        elif st.session_state.displayTab2 == 'Missing Statistic':
            subtab22(df)
        elif st.session_state.displayTab2 == 'Curve Distribution':
            subtab23(df, curves)
        elif st.session_state.displayTab2 == 'Histogram Overlay':
            subtab24(df, curves)
        elif st.session_state.displayTab2 == 'Cross-Plot':
            subtab25(df, curves)
        elif st.session_state.displayTab2 == 'PairPlot':
            subtab26(df, curves)
        else:
            subtab21(df, well_names)

        #|TABS-3-----------------------------------------------------------------------------------------
    with tab3:
        scatterPoint3D(df)
    #|TABS-4-----------------------------------------------------------------------------------------
    with tab4:
        stViewCurves(df)
    #|TABS-5-----------------------------------------------------------------------------------------
    with tab5:
        rmOutliers(df)