Fracture_Webapp / pages /2_Exploratory_Data_Analysis.py
Sonnt's picture
Update pages/2_Exploratory_Data_Analysis.py
dfa8b70 verified
import numpy as np
import streamlit as st
import pandas as pd
import os
from ui import *
from mLogsFunctions import *
#Streamlit Dashboard------------------------------------------------------------------------------------------
pagetile = """<center><h1>EXPLORATORY DATA ANALYSIS</h1></center>"""
set_page_config(page='custom')
hide_menu_button()
condense_layout()
logo_site, info_site = st.columns([1.5, 8.5])
with logo_site:
st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
with info_site:
# st.set_option('deprecation.showfileUploaderEncoding', False)
# st.set_option('maxUploadSize', 200*1024) # 200 MB
st.markdown(pagetile, unsafe_allow_html=True)
# Option 1: CSV File Loading
st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
st.subheader("1. CSV File Loading")
df = csv_uploader()
df = tweak_data(df,resample=False, reindex=True)
# Option 2: CSV from LAS Exploration
st.subheader("2. CSV from LAS Exploration")
dir_path = 'data/merged/'
csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
# # Đọc file csv được chọn vào DataFrame
if selected_csv_file: # Nếu người dùng đã chọn file CSV
# Đọc file csv được chọn vào DataFrame
file_path = 'data/merged/'
merged_data = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
df = tweak_data(merged_data, resample=False, reindex=True)
else: # Nếu người dùng không chọn file CSV
merged_data = df
df = tweak_data(merged_data, resample=False, reindex=True)
#|CHECK DATA EXISTENCE-----------------------------------------------------------------------------------------
if df is not None:
curves = columns_list(df, no_depth=True, no_well=True)
well_names = np.sort(df.WELL.unique())
#|TABS-ESTABLISHING-----------------------------------------------------------------------------------------
tab1, tab2, tab3, tab4, tab5 = st.tabs(['DataFrame',
'DataStatistics',
'3D Scatter Points',
'CurvesView',
'OutliersRemoval'
])
#|TABS-1-----------------------------------------------------------------------------------------
st.write('---')
with tab1:
st.dataframe(df, width=1400, height=500)
#|TABS-2-----------------------------------------------------------------------------------------
with tab2:
st.radio('DataVisualizationMethod',
key='displayTab2',
options=['DataStatistics',
'Missing Statistic',
'Curve Distribution',
'Histogram Overlay',
'Cross-Plot',
'PairPlot'],
horizontal=True)
if st.session_state.displayTab2 == 'DataStatistics':
subtab21(df, well_names)
elif st.session_state.displayTab2 == 'Missing Statistic':
subtab22(df)
elif st.session_state.displayTab2 == 'Curve Distribution':
subtab23(df, curves)
elif st.session_state.displayTab2 == 'Histogram Overlay':
subtab24(df, curves)
elif st.session_state.displayTab2 == 'Cross-Plot':
subtab25(df, curves)
elif st.session_state.displayTab2 == 'PairPlot':
subtab26(df, curves)
else:
subtab21(df, well_names)
#|TABS-3-----------------------------------------------------------------------------------------
with tab3:
scatterPoint3D(df)
#|TABS-4-----------------------------------------------------------------------------------------
with tab4:
stViewCurves(df)
#|TABS-5-----------------------------------------------------------------------------------------
with tab5:
rmOutliers(df)