import numpy as np import pandas as pd import streamlit as st import altair as alt from streamlit_vega_lite import altair_component from .fx import * from mLogsFunctions import * def rmOutliers(df): _o1, _o2 = st.columns([1,8]) with _o1: st.session_state = selection_info(df,"method", "option_w", "option_x", "option_y", "option_c") #Crossplot and bar plot----------------------------------------------------------------------- with _o2: def rm_outliers(data): interval = interval_define() col21, col22 = st.columns(2) with col21: selected_points = altair_component(make_selection(data, interval, st.session_state.option_x, st.session_state.option_y, st.session_state.option_c, ) ) if len(selected_points) > 0: del[selected_points['name']] with col22: selected_df = None if len(selected_points) != 0: query = ' & '.join( f'{crange[0]} <= `{col}` <= {crange[1]}' for col, crange in selected_points.items()) selected_df = data.query(query) st.write(f"Total selected points: {len(selected_df)}") st.dataframe(selected_df, width=800, height=260,use_container_width=False) else: st.write("No Selection") if selected_df is not None: st.write("Histogram of selected data:") histogram_x = bar_plot(selected_df, st.session_state.option_x) histogram_y = bar_plot(selected_df, st.session_state.option_y) st.write(alt.hconcat(histogram_x,histogram_y)) else: st.write("Histogram of entire data:") histogram_x = bar_plot(data, st.session_state.option_x) histogram_y = bar_plot(data, st.session_state.option_y) st.write(alt.hconcat(histogram_x,histogram_y)) #Outlier Removal----------------------------------------------------------------------- st.write('---') df_nomarlized = data.copy() curve_editting = st.selectbox("Select curve to edit:", key="selected_curve", options=columns_list(data, no_depth=True, no_well=True), ) n_value = int(st.text_input("Number of rows for Mean calculation ", "5")) def normalize_outlier(df_nomarlized, selected_df, curve, n_value): n=n_value//2 for i in selected_df.index: df_nomarlized.loc[[i],curve.upper()] = df_nomarlized.loc[i-n:i+n,curve.upper()].mean() return df_nomarlized def remove_data_point(df_nomarlized, selected_df, curve): for i in selected_df.index: df_nomarlized[i, curve] = 0 #ERROR ALARM!!!! # df_nomarlized = df_nomarlized.drop(index=i) #ERROR ALARM!!!! return df_nomarlized if st.button("Outliers Processing"): st.session_state.fdata = normalize_outlier(df_nomarlized, selected_df, curve_editting, n_value) _well = "".join((st.session_state.fdata.WELL.unique()).tolist()) st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0) selected_df = None if st.button("Remove"): st.session_state.fdata = remove_data_point(df_nomarlized, selected_df, curve_editting) _well = "".join((st.session_state.fdata.WELL.unique()).tolist()) st.write(_well) st.write(type(_well)) st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0) selected_df = None #Curve View----------------------------------------------------------------------- def plt_curs(data, option_w): data_plt = data[data["WELL"] == option_w] if plotting_curves != []: for i, c in enumerate(plotting_curves): charts_dict[i] = curve_plot(data=data_plt,filted_data=selected_df, x_column=c) # with col2: charts_dict={} plotting_curves = st.multiselect("Select curves to plot:", key="curvs_plt", options=columns_list(data, no_depth=True, no_well=True)) if st.session_state.option_w is not None: if 'loc_data' not in st.session_state: plt_curs(df_nomarlized, st.session_state.option_w) else: plt_curs(st.session_state.loc_data, st.session_state.option_w) #Show Curve----------------------------------------------------------------------- st.write(alt.concat(*charts_dict.values()).configure(autosize='fit'))#.configure_concat(spacing=0)) #------------------------ def check_method(df): if st.session_state.method == "Single Well": data = df[df.WELL == st.session_state.option_w] data = data.sort_values(by=['DEPTH']) data = data.reset_index().drop(["index"], axis=1) else: data = df return data #------------------------ if 'loc_data' not in st.session_state: data = check_method(df) else: data = check_method(st.session_state.loc_data) rm_outliers(data) # # Download -------------------------------------------------------------- st.write('---') st.write("Download final result to csv file") if "loc_data" not in st.session_state: saving_df = df else: saving_df = st.session_state.loc_data st.download_button(label='Download', data = saving_df.to_csv(), file_name='Query_data.csv', mime='text/csv')