Sonnt's picture
Upload 44 files
052f08d
import numpy as np
import pandas as pd
import streamlit as st
import altair as alt
from streamlit_vega_lite import altair_component
from .fx import *
from mLogsFunctions import *
def rmOutliers(df):
_o1, _o2 = st.columns([1,8])
with _o1:
st.session_state = selection_info(df,"method", "option_w", "option_x", "option_y", "option_c")
#Crossplot and bar plot-----------------------------------------------------------------------
with _o2:
def rm_outliers(data):
interval = interval_define()
col21, col22 = st.columns(2)
with col21:
selected_points = altair_component(make_selection(data,
interval,
st.session_state.option_x,
st.session_state.option_y,
st.session_state.option_c,
)
)
if len(selected_points) > 0:
del[selected_points['name']]
with col22:
selected_df = None
if len(selected_points) != 0:
query = ' & '.join(
f'{crange[0]} <= `{col}` <= {crange[1]}'
for col, crange in selected_points.items())
selected_df = data.query(query)
st.write(f"Total selected points: {len(selected_df)}")
st.dataframe(selected_df, width=800, height=260,use_container_width=False)
else:
st.write("No Selection")
if selected_df is not None:
st.write("Histogram of selected data:")
histogram_x = bar_plot(selected_df, st.session_state.option_x)
histogram_y = bar_plot(selected_df, st.session_state.option_y)
st.write(alt.hconcat(histogram_x,histogram_y))
else:
st.write("Histogram of entire data:")
histogram_x = bar_plot(data, st.session_state.option_x)
histogram_y = bar_plot(data, st.session_state.option_y)
st.write(alt.hconcat(histogram_x,histogram_y))
#Outlier Removal-----------------------------------------------------------------------
st.write('---')
df_nomarlized = data.copy()
curve_editting = st.selectbox("Select curve to edit:",
key="selected_curve",
options=columns_list(data, no_depth=True, no_well=True),
)
n_value = int(st.text_input("Number of rows for Mean calculation ", "5"))
def normalize_outlier(df_nomarlized, selected_df, curve, n_value):
n=n_value//2
for i in selected_df.index:
df_nomarlized.loc[[i],curve.upper()] = df_nomarlized.loc[i-n:i+n,curve.upper()].mean()
return df_nomarlized
def remove_data_point(df_nomarlized, selected_df, curve):
for i in selected_df.index:
df_nomarlized[i, curve] = 0 #ERROR ALARM!!!!
# df_nomarlized = df_nomarlized.drop(index=i) #ERROR ALARM!!!!
return df_nomarlized
if st.button("Outliers Processing"):
st.session_state.fdata = normalize_outlier(df_nomarlized, selected_df, curve_editting, n_value)
_well = "".join((st.session_state.fdata.WELL.unique()).tolist())
st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0)
selected_df = None
if st.button("Remove"):
st.session_state.fdata = remove_data_point(df_nomarlized, selected_df, curve_editting)
_well = "".join((st.session_state.fdata.WELL.unique()).tolist())
st.write(_well)
st.write(type(_well))
st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0)
selected_df = None
#Curve View-----------------------------------------------------------------------
def plt_curs(data, option_w):
data_plt = data[data["WELL"] == option_w]
if plotting_curves != []:
for i, c in enumerate(plotting_curves):
charts_dict[i] = curve_plot(data=data_plt,filted_data=selected_df, x_column=c)
# with col2:
charts_dict={}
plotting_curves = st.multiselect("Select curves to plot:", key="curvs_plt", options=columns_list(data, no_depth=True, no_well=True))
if st.session_state.option_w is not None:
if 'loc_data' not in st.session_state:
plt_curs(df_nomarlized, st.session_state.option_w)
else:
plt_curs(st.session_state.loc_data, st.session_state.option_w)
#Show Curve-----------------------------------------------------------------------
st.write(alt.concat(*charts_dict.values()).configure(autosize='fit'))#.configure_concat(spacing=0))
#------------------------
def check_method(df):
if st.session_state.method == "Single Well":
data = df[df.WELL == st.session_state.option_w]
data = data.sort_values(by=['DEPTH'])
data = data.reset_index().drop(["index"], axis=1)
else:
data = df
return data
#------------------------
if 'loc_data' not in st.session_state:
data = check_method(df)
else:
data = check_method(st.session_state.loc_data)
rm_outliers(data)
# # Download --------------------------------------------------------------
st.write('---')
st.write("Download final result to csv file")
if "loc_data" not in st.session_state:
saving_df = df
else:
saving_df = st.session_state.loc_data
st.download_button(label='Download',
data = saving_df.to_csv(),
file_name='Query_data.csv',
mime='text/csv')