Spaces:

Sonnt
/

Fracture_Webapp

Sleeping

App Files Files Community

Sonnt commited on Oct 21, 2023

Commit

052f08d

1 Parent(s): 62ee75e

Upload 44 files

Browse files

Files changed (45) hide show

packages.txt +2 -0
.gitattributes +3 -0
Antuns/__pycache__/p1_import_csv.cpython-39.pyc +0 -0
Antuns/__pycache__/page_setting.cpython-39.pyc +0 -0
Antuns/__pycache__/prediction_LGBM.cpython-39.pyc +0 -0
Antuns/page_setting.py +14 -0
Antuns/prediction_LGBM.py +21 -0
app.py +62 -0
backup/081222/Antuns/p1_import_csv.py +13 -0
backup/081222/Antuns/page_setting.py +14 -0
backup/081222/Antuns/prediction_LGBM.py +21 -0
backup/081222/Homepage.py +10 -0
backup/081222/models/LGBM_20221125.json +3 -0
backup/081222/models/LightGBM_0.45.pbz2 +3 -0
backup/model.json +0 -0
data/LogoVPI.png +0 -0
data/all_file_merged_updated_with_All_geometry.csv +3 -0
data/merged/Training_data.csv +0 -0
data/merged/merged_df.csv +0 -0
mLogsFunctions/__init__.py +31 -0
mLogsFunctions/__pycache__/__init__.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/dataloading.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/dataprocessing.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/fx.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/lightGBMPred.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/rmOutliers.cpython-39.pyc +0 -0
mLogsFunctions/__pycache__/viewCurves.cpython-39.pyc +0 -0
mLogsFunctions/dataloading.py +32 -0
mLogsFunctions/dataprocessing.py +14 -0
mLogsFunctions/fx.py +360 -0
mLogsFunctions/lightGBMPred.py +21 -0
mLogsFunctions/rmOutliers.py +135 -0
mLogsFunctions/viewCurves.py +102 -0
models/05_13_2023_11_50_38_model_LGBM.json +0 -0
pages/1_LAS_Exploratory.py +289 -0
pages/2_Exploratory_Data_Analysis.py +92 -0
pages/3_Fracture_Training_Models.py +388 -0
pages/4_Fracture_Prediction.py +190 -0
requirements.txt +13 -0
ui/PageComponents.py +176 -0
ui/UIConfigs.py +41 -0
ui/__init__.py +18 -0
ui/__pycache__/PageComponents.cpython-39.pyc +0 -0
ui/__pycache__/UIConfigs.cpython-39.pyc +0 -0
ui/__pycache__/__init__.cpython-39.pyc +0 -0

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ freeglut3-dev
2	+ libgtk2.0-dev

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+backup/081222/models/LGBM_20221125.json filter=lfs diff=lfs merge=lfs -text
+backup/081222/models/LightGBM_0.45.pbz2 filter=lfs diff=lfs merge=lfs -text
+data/all_file_merged_updated_with_All_geometry.csv filter=lfs diff=lfs merge=lfs -text

Antuns/__pycache__/p1_import_csv.cpython-39.pyc ADDED Viewed

Binary file (616 Bytes). View file

Antuns/__pycache__/page_setting.cpython-39.pyc ADDED Viewed

Binary file (726 Bytes). View file

Antuns/__pycache__/prediction_LGBM.cpython-39.pyc ADDED Viewed

Binary file (678 Bytes). View file

Antuns/page_setting.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+from PIL import Image
+img = Image.open("/work/2022_VPIMLogs_WebApp/data/LogoVPI.png")
+def page_intro():
+    st.set_page_config(# Alternate names: setup_page, page, layout
+                        layout="wide",  # Can be "centered" or "wide". In the future also "dashboard", etc.
+                        initial_sidebar_state="auto",  # Can be "auto", "expanded", "collapsed"
+                        page_title="VPI-MLogs",  # String or None. Strings get appended with "• Streamlit".
+                        page_icon=img,  # String, anything supported by st.image, or None.
+    )
+    col_1, col_2, col_3, col_4, col_5, = st.columns(5)
+    with col_3:
+        st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", width=250)
+    st.header("Welcome to VPI-MLOGS!")

Antuns/prediction_LGBM.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+def Prediction_LGBM(trained_models:list=None, data:pd.DataFrame=None, feature_names:list=None):
+    """
+    mode: "predict", "predict_proba"
+    """
+    data_copy = data.copy()
+    # if mode == "predict":
+    #     for i, model in enumerate(trained_models):
+    #         y_preds = model.predict(data_copy[feature_names])
+    #         data_copy[f"model_{i}"] = y_preds
+    #
+    # else:
+    for i, model in enumerate(trained_models):
+        y_preds = model.predict(data_copy[feature_names])
+        data_copy[f"model_{i}"] = y_preds
+    return data_copy
+if __name__ == '__main__':
+    Prediction_LGBM()

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import streamlit as st
+from ui.UIConfigs import *
+#Streamlit dashboard------------------------------------------------------------------------------------------
+set_page_config(page='home', logo_size=200)
+hide_menu_button()
+condense_layout()
+st.sidebar.success("")
+st.markdown("""
+<style>
+h2 {
+    text-align: left;
+}
+</style>
+""", unsafe_allow_html=True)
+# st.markdown("""
+# <center><h2>VPI-MLOGs Web App</h2></center>
+# <b>VPI-MLOGs</b> is a web app designed for log data analysis and visualization. It provides various functions to help users process, analyze, and visualize log data.<br>
+# <h4>1. Read LAS Files and Convert to CSV Files</h4>
+# <b>VPI-MLOGs</b> allows users to upload LAS files and convert them to CSV format. This feature makes it easy to work with log data in other programs, such as Excel or Python.
+# <h4>2. Explore Data Analysis with Multiple Charts</h4>
+# <b>VPI-MLOGs</b> provides several exploratory data analysis (EDA) functions, such as detecting missing data, generating histograms, and visualizing distribution. These functions help users understand the structure and characteristics of the log data.<br>
+# <h4>3. Training LGBM Model</h4>
+# <b>VPI-MLOGs</b> provides a machine learning feature that allows users to train a LGBM (Light Gradient Boosting Machine) model using their log data.
+# <h4>4. Prediction</h4>
+# <b>VPI-MLOGs</b> Users can also make predictions using the trained model.
+# """
+st.markdown("""
+<center><h2>VPI-MLOGs Web App</h2></center>
+**Read LAS Files and Convert to CSV Files**
+VPI-MLOGs enables the reading of LAS files, a commonly used format for storing log data. Once uploaded, VPI-MLOGs can convert LAS files to the CSV format, which is more widely compatible with other programs like Excel or Python.
+**Explore Data Analysis with Multiple Charts**
+VPI-MLOGs offers various exploratory data analysis (EDA) functions to better understand the characteristics of log data. These EDA functions include:
+- **Missing Data Detection**: Identifies any missing data points in the log data.
+- **Histogram Generation**: Creates graphical representations of data value distributions.
+- **Distribution Visualization**: Creates graphical representations showcasing the spread of data values.
+- **Outliers Removal**: Identifies any data points that are significantly different from the rest of the data.
+**Training LGBM Model**
+VPI-MLOGs provides a machine learning feature that enables users to train a LGBM (Light Gradient Boosting Machine) model using their log data. LGBM is a versatile machine learning algorithm suitable for various tasks like classification and regression. Once trained, the LGBM model can be used to make predictions on new data.
+**Prediction**
+VPI-MLOGs allows users to make predictions using the trained model. These predictions can be applied to new data not present in the training set. This functionality proves beneficial for tasks like identifying potential drilling targets or predicting rock formation properties.
+In summary, VPI-MLOGs is a powerful tool for processing, analyzing, and visualizing log data. It offers a diverse range of functions that aid users in gaining a deeper understanding of their data, enabling them to make more informed decisions.
+"""
+            ,unsafe_allow_html=True)

backup/081222/Antuns/p1_import_csv.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import streamlit as st
+import pandas as pd
+def upload_csv():
+    df = None
+    uploaded_file = st.file_uploader(label='Upload *csv file from your drive! Choose a file:', type='csv')
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file, na_values=-9999)
+        st.success("Loading finished!")
+        st.dataframe(df, width=1400, height=300)
+        st.write('---')
+    return df

backup/081222/Antuns/page_setting.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+from PIL import Image
+img = Image.open("data/LogoVPI.png")
+def page_intro():
+    st.set_page_config(# Alternate names: setup_page, page, layout
+                        layout="wide",  # Can be "centered" or "wide". In the future also "dashboard", etc.
+                        initial_sidebar_state="auto",  # Can be "auto", "expanded", "collapsed"
+                        page_title="VPI-MLogs",  # String or None. Strings get appended with "• Streamlit".
+                        page_icon=img,  # String, anything supported by st.image, or None.
+    )
+    col_1, col_2, col_3, col_4, col_5, = st.columns(5)
+    with col_3:
+        st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", width=250)
+    st.header("Welcome to VPI-MLOGS!")

backup/081222/Antuns/prediction_LGBM.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+def Prediction_LGBM(trained_models:list=None, data:pd.DataFrame=None, feature_names:list=None):
+    """
+    mode: "predict", "predict_proba"
+    """
+    data_copy = data.copy()
+    # if mode == "predict":
+    #     for i, model in enumerate(trained_models):
+    #         y_preds = model.predict(data_copy[feature_names])
+    #         data_copy[f"model_{i}"] = y_preds
+    #
+    # else:
+    for i, model in enumerate(trained_models):
+        y_preds = model.predict(data_copy[feature_names])
+        data_copy[f"model_{i}"] = y_preds
+    return data_copy
+if __name__ == '__main__':
+    Prediction_LGBM()

backup/081222/Homepage.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+from Antuns.page_setting import page_intro
+#Streamlit dashboard------------------------------------------------------------------------------------------
+page_intro()
+st.sidebar.success("")
+st.markdown("""
+            :flag-vn:
+            """
+            ,unsafe_allow_html=False)

backup/081222/models/LGBM_20221125.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02b0cb118e7b9a39b3003abb0701c7be91f1acb1b0df30635a93a149a4968049
+size 14032845

backup/081222/models/LightGBM_0.45.pbz2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e960d2e933f4884dc0df6a55c33819807ae4beb86a35489685dd209a7699aeab
+size 1936897

backup/model.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/LogoVPI.png ADDED Viewed

data/all_file_merged_updated_with_All_geometry.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58c41e72f17af5c6235cbd79ec9700d539c96303d998b0f33018bfaaad4402b1
+size 12897889

data/merged/Training_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/merged/merged_df.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

mLogsFunctions/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from .dataloading import *
+from .dataprocessing import *
+from .viewCurves import *
+from .rmOutliers import *
+from .lightGBMPred import *
+__all__ = ['csv_uploader',
+           'tweak_data',
+           'columns_list',
+           'well_filter',
+           'view_curves',
+           'rmOutliers',
+           'Prediction_LGBM',
+           #FX
+           'upload_csv',
+           'selection_info',
+           'interval_define',
+           'make_selection',
+           'bar_plot',
+           'curve_plot',
+           'missing_bar',
+           'missing_box',
+           'hist_line_plot',
+           'crossplot',
+           'pairplot',
+           'heatmap',
+           'plotly_3d',
+            ]

mLogsFunctions/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (541 Bytes). View file

mLogsFunctions/__pycache__/dataloading.cpython-39.pyc ADDED Viewed

Binary file (1.2 kB). View file

mLogsFunctions/__pycache__/dataprocessing.cpython-39.pyc ADDED Viewed

Binary file (647 Bytes). View file

mLogsFunctions/__pycache__/fx.cpython-39.pyc ADDED Viewed

Binary file (7.84 kB). View file

mLogsFunctions/__pycache__/lightGBMPred.cpython-39.pyc ADDED Viewed

Binary file (571 Bytes). View file

mLogsFunctions/__pycache__/rmOutliers.cpython-39.pyc ADDED Viewed

Binary file (4.17 kB). View file

mLogsFunctions/__pycache__/viewCurves.cpython-39.pyc ADDED Viewed

Binary file (11.3 kB). View file

mLogsFunctions/dataloading.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import pandas as pd
+import streamlit as st
+def csv_uploader(label:str='', type:str='csv'):
+    uploaded_file = st.file_uploader(label=label, type=type)
+    loading_data = None
+    if uploaded_file is not None:
+        if type == 'csv':
+            loading_data = pd.read_csv(uploaded_file, na_values=-9999)
+    return loading_data
+@st.cache_data
+def tweak_data(data, reindex:bool=True, resample:bool=False, max_number:int=5000):
+    if data is not None:
+        data = data.rename(str.upper, axis='columns')
+        if len(data) > max_number and resample:
+            data = data.sample(max_number, random_state=42)
+        if set(["DEPTH", 'WELL']).issubset(data.columns.unique()):
+            data.assign(
+                        DEPTH=data.DEPTH.astype('float64'),
+                        WELL=data.WELL.astype("string"),
+                        )
+        if "FRACTURE_ZONE" in data.columns.unique():
+            data = data.rename(columns={'FRACTURE_ZONE':'FRACTUREZONE'})
+        if "FRACTURE INTENSITY" in data.columns.unique():
+            data = data.rename(columns={'FRACTURE INTENSITY':'FRACTURE_INTENSITY'})
+        # if "FRACTUREZONE" in data.columns.unique():
+            # data = data.reset_index(drop=True).drop(["FRACTUREZONE"], axis=1)
+        if reindex:
+            data = data.reindex(sorted(data.columns), axis=1) #Reindex and sort columns name
+    return data

mLogsFunctions/dataprocessing.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import pandas as pd
+import streamlit as st
+def columns_list(data:pd.DataFrame, no_depth:bool=None, no_well:bool=None):
+    columns_list = list(data.columns.unique())
+    columns_list.sort()
+    if "DEPTH" in columns_list and no_depth == True:
+        columns_list.remove("DEPTH")
+    if "WELL" in columns_list and no_well == True:
+        columns_list.remove("WELL")
+    return columns_list
+def well_filter(df, well_name):
+    return df[df.WELL == well_name]

mLogsFunctions/fx.py ADDED Viewed

	@@ -0,0 +1,360 @@

+import pandas as pd
+import numpy as np
+# import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.express as px
+import altair as alt
+import streamlit as st
+import streamlit_nested_layout
+from streamlit_vega_lite import altair_component
+from mLogsFunctions import *
+#LOADING DATA------------------------------------------------------------------------------------------
+def upload_csv():
+    df = None
+    uploaded_file = st.file_uploader(label='Upload *csv file from your drive! Choose a file:', type='csv')
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file, na_values=-9999)
+        st.success("Loading finished!")
+        st.write('---')
+    return df
+#PLOTTING------------------------------------------------------------------------------------------
+# Store the initial value of widgets in session state
+def selection_info(df, method, option_w, option_x, option_y, option_c):
+    if "method" not in st.session_state:
+        st.session_state.method:str = "Single Well"
+        st.session_state.option_w:str = "15-1-SNN-3P"
+        st.session_state.option_x:str = "RHOB"
+        st.session_state.option_y:str = "DTC"
+        st.session_state.option_c:str = "WELL"
+    well_names = np.sort(df.WELL.unique())
+    st.radio("",
+        key=method,
+        options=["All Wells", "Single Well"],)
+    st.radio(
+        "WELL",
+        key=option_w,
+        options=well_names,)
+    st.selectbox(
+        "X Axis",
+        key=option_x,
+        options=(df.columns.sort_values().str.upper().drop(["WELL", "DEPTH"])),)
+    st.selectbox(
+        "Y Axis",
+        key=option_y,
+        options=(df.columns.sort_values().str.upper().drop(["WELL", "DEPTH"])),)
+    st.selectbox(
+        "Color Axis",
+        key=option_c,
+        options=df.columns.sort_values().str.upper())
+    return st.session_state
+#Interactive Charts-----------------------------------------------------------------------
+@st.cache_resource
+def interval_define():
+    return alt.selection_interval()
+@st.cache_resource
+def make_selection(df, _interval, option_x, option_y, option_c):
+    def c_(df, _interval, option_x, option_y, x_log:str="linear", y_log:str="linear"):
+        return alt.Chart(df,
+                        title="Crossplot "+option_x+" vs "+option_y+"",
+                        ).mark_point().encode(
+                        x = alt.X(option_x.upper(),
+                                axis=alt.Axis(title=option_x),
+                                scale= alt.Scale(zero=False, type=x_log
+                                    )
+                                ),
+                        y = alt.Y(option_y.upper(),
+                                axis=alt.Axis(title=option_y),
+                                scale=alt.Scale(zero=False,type=y_log
+                                )
+                                ),
+                        color=alt.condition(_interval, option_c, alt.value('lightgray')),
+                        ).properties(
+                        selection=_interval,
+                        height=570,
+                        width=600)#.transform_regression(option_x.upper(), option_y.upper()).mark_line()
+    if option_x in ["LLD", "LLS"]:
+        x_log = "log"
+    else:
+        x_log = "linear"
+    if option_y in ["LLD", "LLS"]:
+        y_log = "log"
+    else:
+        y_log = "linear"
+    return c_(df, _interval, option_x, option_y, x_log, y_log)
+#Histogram-----------------------------------------------------------------------
+def bar_plot(data, option_x):
+    def c_(data, option_x, _log):
+        return alt.Chart(title="Histogram of "+option_x+"",
+                        data=data
+                        ).mark_bar().encode(
+                                            x = alt.X(option_x.upper(),
+                                                    bin=alt.Bin(maxbins=30),
+                                                    axis=alt.Axis(title=option_x),
+                                                    scale=alt.Scale(zero=False)
+                                                    ),
+                                            y = alt.Y('count()',
+                                                    axis=alt.Axis(title='Number of Values'),
+                                                    scale=alt.Scale(zero=False, type=_log),
+                                                    ),
+                                            color = alt.Color('WELL', legend=None
+                                                            )
+                                            ).properties(
+                                                height=250,
+                                                width=250
+                                            )
+    if option_x in ["LLD", "LLS"]:
+        return c_(data, option_x, "symlog")
+    else:
+        return c_(data, option_x, "linear")
+#Curve View-----------------------------------------------------------------------
+def curve_plot(data,filted_data, x_column):
+    def c_(data,filted_data, x_column, _log):
+        color_codes = {"GR":"lime",
+                        "LLD":"red",
+                        "LLS":"dodgerblue",
+                        "NPHI":"blue",
+                        "RHOB":"red",
+                        "DTC":"red",
+                        "DTS":"magenta",
+                        "FRACTURE_ZONE":"lightcoral",
+                        "FRACTURE_ZONE_PRED":"lightgreen"
+                        }
+        if x_column in color_codes.keys():
+            color_ = color_codes[x_column]
+        else:
+            color_ = "blue"
+        return alt.Chart(data
+                        ).mark_line(size=1,
+                                    orient='horizontal',
+                                    color=color_,
+                                    point=alt.OverlayMarkDef(color="", size=1) #Show raw points
+                                    ).encode(
+                                            x=alt.X(x_column.upper(),
+                                                    scale=alt.Scale(zero=False, type=_log),
+                                                    axis=alt.Axis(title=x_column.upper(),
+                                                                titleAnchor='middle',
+                                                                orient='top',
+                                                                labelAngle=0,
+                                                                titleColor=color_,
+                                                                labelColor=color_,
+                                                                tickColor=color_,
+                                                                )
+                                                    ),
+                                            y=alt.Y('DEPTH',
+                                                    scale=alt.Scale(zero=False,
+                                                                    reverse=True,
+                                                                    ),
+                                                    axis=alt.Axis(title=None,
+                                                                labelColor=color_,
+                                                                tickColor=color_,
+                                                                  )
+                                                    )
+                                    ).properties(height=500,
+                                                width=129
+                                    )
+    if x_column in ["LLD", "LLS"]:
+        curve = c_(data,filted_data, x_column, "log")
+    else:
+        curve = c_(data,filted_data, x_column, "linear")
+    if filted_data is not None:
+        point_plot = alt.Chart(filted_data).mark_circle(size=20,
+                                                        color='red',
+                                                        opacity=1
+                                                        ).encode(
+                                                        x=x_column,
+                                                        y='DEPTH'
+                                                        )
+        return curve + point_plot
+    else:
+        return curve
+# import altair as alt
+# def curve_plot(data, filted_data, x_column):
+#     def c_(data, filted_data, x_column, _log):
+#         color_codes = {
+#             "GR": "lime",
+#             "LLD": "red",
+#             "LLS": "dodgerblue",
+#             "NPHI": "blue",
+#             "RHOB": "red",
+#             "DTC": "red",
+#             "DTS": "magenta",
+#             "FRACTURE_ZONE": "lightcoral",
+#             "FRACTURE_ZONE_PRED": "lightgreen"
+#         }
+#         if x_column in color_codes.keys():
+#             color_ = color_codes[x_column]
+#         else:
+#             color_ = "blue"
+#         return alt.Chart(data).mark_line(size=1, orient='horizontal', color=color_, point=alt.OverlayMarkDef(color="", size=1)).encode(
+#             y=alt.X(x_column.upper(),
+#                     scale=alt.Scale(zero=False, type=_log),
+#                     axis=alt.Axis(title=x_column.upper(),
+#                                   titleAnchor='middle',
+#                                   orient='top',
+#                                   labelAngle=0,
+#                                   titleColor=color_,
+#                                   labelColor=color_,
+#                                   tickColor=color_,
+#                                   )
+#                     ),
+#             x=alt.Y('DEPTH',
+#                     scale=alt.Scale(zero=False, reverse=True),
+#                     axis=alt.Axis(title=None, labelColor=color_, tickColor=color_))
+#         ).properties(
+#             height=500,
+#             width=700
+#         )
+#     if x_column in ["LLD", "LLS"]:
+#         curve = c_(data, filted_data, x_column, "log")
+#     else:
+#         curve = c_(data, filted_data, x_column, "linear")
+#     if filted_data is not None:
+#         point_plot = alt.Chart(filted_data).mark_circle(size=20, color='red', opacity=1).encode(
+#             y=alt.X(x_column, scale=alt.Scale(zero=False)),
+#             x=alt.Y('DEPTH', scale=alt.Scale(zero=False, reverse=True))
+#         )
+#         return (curve + point_plot).resolve_scale(y='shared')
+#     else:
+#         return curve
+#MissingBar-----------------------------------------------------------------------
+def missing_bar(data, x_title):
+    return alt.Chart(data).mark_bar().encode(
+            x=alt.X('Columns', sort='-y', title=x_title),
+            y='Count missing (%)',
+            color=alt.condition(
+                alt.datum['Count missing (%)'] >10,  # If count missing is > 10%, returns True,
+                alt.value('orange'),             # which sets the bar orange.
+                alt.value('steelblue')           # And if it's not true it sets the bar steelblue.
+            )
+            ).properties(
+            width=500,
+            height=250
+            ).configure_axis(
+            grid=False
+            )
+#BoxPLot-----------------------------------------------------------------------
+def missing_box(data, curve):
+    if curve in ["LLD", "LLS"]:
+        return alt.Chart(data).mark_boxplot(extent='min-max').encode(
+            x=alt.X('WELL:O', title=None,
+                    ),
+            y=alt.Y(f'{curve}:Q', title=curve,scale=alt.Scale(zero=False, type="log")
+                    ),
+            color='WELL:N'
+        ).properties(
+            width=500,
+            height=300
+        )
+    else:
+        return alt.Chart(data).mark_boxplot(extent='min-max').encode(
+            x=alt.X('WELL:O', title=None
+                    ),
+            y=alt.Y(f'{curve}:Q', title=curve,scale=alt.Scale(zero=False)
+                    ),
+            color='WELL:N'
+        ).properties(
+            width=500,
+            height=300
+        )
+#Histogram Line-----------------------------------------------------------------------
+def hist_line_plot(data, curve):
+    st.caption(f"Histogram of {curve}")
+    if curve in ["LLD", "LLS"]:
+        fig = sns.displot(data, x=curve, hue="WELL", kind="kde", height=5,aspect=1.2, log_scale=True)
+        fig.set(ylabel="Values")
+        st.pyplot(fig)
+    else:
+        fig = sns.displot(data, x=curve, hue="WELL", kind="kde", height=5,aspect=1.2)
+        fig.set(ylabel="Values")
+        st.pyplot(fig)
+#CrossPlot-----------------------------------------------------------------------
+def crossplot(data, x_curve, y_curve):
+    fig = sns.jointplot(data=data, x=x_curve, y=y_curve, hue="WELL")
+    if x_curve in ["LLD", "LLS"]:
+        fig.ax_joint.set_xscale('log')
+        fig.ax_marg_x.set_xscale('log')
+    if y_curve in ["LLD", "LLS"]:
+        fig.ax_joint.set_yscale('log')
+        fig.ax_marg_y.set_yscale('log')
+    st.pyplot(fig)
+#PairPlot-----------------------------------------------------------------------
+def pairplot(data, rows, cols,color_):
+    return alt.Chart(data).mark_circle().encode(
+            alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
+            alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False)),
+            color=color_
+            ).properties(
+                width=100,
+                height=100
+            ).repeat(
+                row = rows,
+                column = cols
+            ).configure_axis(
+                grid=False
+            )
+#Heatmap----------------------------------------------------------------
+def heatmap(df):
+    fig = sns.heatmap(df, annot=True)
+    st.pyplot(fig)
+#Heatmap----------------------------------------------------------------
+def plotly_3d(data, x, y, z, color, size, symbol, log_x, log_y, log_z):
+    #Data slicer
+    curvs_ = columns_list(data, no_well=True)
+    def slicer_(data, sli_key, val_key,):
+        slicer1_, slicer2_ = st.columns([4, 6])
+        # sli=curvs_[0]
+        with slicer1_:
+            sli = st.selectbox("Data slicer", key=sli_key, options=curvs_)
+        with slicer2_:
+            values = st.slider('Select a range of values',
+                                min_value = float(data[sli].min()),
+                                max_value = float(data[sli].max()),
+                                value=(float(data[sli].min()), float(data[sli].max())),
+                                key=val_key,
+                                )
+        data = data.query(f"{sli} >= {values[0]} and {sli} <= {values[1]}")
+        return data
+    c1, c2, c3 = st.columns(3)
+    with c1:
+        data = slicer_(data, "slicer_1", "sli1_value")
+    with c2:
+        data = slicer_(data, "slicer_2", "sli2_value")
+    with c3:
+        data = slicer_(data, "slicer_3", "sli3_value")
+    fig = px.scatter_3d(data, x=x,
+                        y=y,
+                        z=z,
+                        color=color,
+                        size=size,
+                        size_max=18,
+                        symbol=symbol,
+                        opacity=0.7,
+                        log_x=log_x,
+                        log_y=log_y,
+                        log_z = log_z,
+                        width=1000, height=700,
+                        color_continuous_scale="blugrn")
+    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), #tight layout
+                    #   paper_bgcolor="LightSteelBlue"
+                    template="none")
+    st.plotly_chart(fig)

mLogsFunctions/lightGBMPred.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+def Prediction_LGBM(trained_models:list=None, data:pd.DataFrame=None, feature_names:list=None):
+    """
+    mode: "predict", "predict_proba"
+    """
+    data_copy = data.copy()
+    # if mode == "predict":
+    #     for i, model in enumerate(trained_models):
+    #         y_preds = model.predict(data_copy[feature_names])
+    #         data_copy[f"model_{i}"] = y_preds
+    #
+    # else:
+    for i, model in enumerate(trained_models):
+        y_preds = model.predict(data_copy[feature_names])
+        data_copy[f"model_{i}"] = y_preds
+    return data_copy
+if __name__ == '__main__':
+    Prediction_LGBM()

mLogsFunctions/rmOutliers.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import numpy as np
+import pandas as pd
+import streamlit as st
+import altair as alt
+from streamlit_vega_lite import altair_component
+from .fx import *
+from mLogsFunctions import *
+def rmOutliers(df):
+    _o1, _o2 = st.columns([1,8])
+    with _o1:
+        st.session_state = selection_info(df,"method", "option_w", "option_x", "option_y", "option_c")
+    #Crossplot and bar plot-----------------------------------------------------------------------
+    with _o2:
+        def rm_outliers(data):
+            interval = interval_define()
+            col21, col22 = st.columns(2)
+            with col21:
+                selected_points = altair_component(make_selection(data,
+                                                                interval,
+                                                                st.session_state.option_x,
+                                                                st.session_state.option_y,
+                                                                st.session_state.option_c,
+                                                                )
+                                                )
+                if len(selected_points) > 0:
+                    del[selected_points['name']]
+            with col22:
+                selected_df = None
+                if len(selected_points) != 0:
+                    query = ' & '.join(
+                    f'{crange[0]} <= `{col}` <= {crange[1]}'
+                    for col, crange in selected_points.items())
+                    selected_df = data.query(query)
+                    st.write(f"Total selected points: {len(selected_df)}")
+                    st.dataframe(selected_df, width=800, height=260,use_container_width=False)
+                else:
+                    st.write("No Selection")
+                if selected_df is not None:
+                    st.write("Histogram of selected data:")
+                    histogram_x = bar_plot(selected_df, st.session_state.option_x)
+                    histogram_y = bar_plot(selected_df, st.session_state.option_y)
+                    st.write(alt.hconcat(histogram_x,histogram_y))
+                else:
+                    st.write("Histogram of entire data:")
+                    histogram_x = bar_plot(data, st.session_state.option_x)
+                    histogram_y = bar_plot(data, st.session_state.option_y)
+                    st.write(alt.hconcat(histogram_x,histogram_y))
+        #Outlier Removal-----------------------------------------------------------------------
+            st.write('---')
+            df_nomarlized = data.copy()
+            curve_editting = st.selectbox("Select curve to edit:",
+                                        key="selected_curve",
+                                        options=columns_list(data, no_depth=True, no_well=True),
+                                        )
+            n_value = int(st.text_input("Number of rows for Mean calculation ", "5"))
+            def normalize_outlier(df_nomarlized, selected_df, curve, n_value):
+                n=n_value//2
+                for i in selected_df.index:
+                    df_nomarlized.loc[[i],curve.upper()] = df_nomarlized.loc[i-n:i+n,curve.upper()].mean()
+                return df_nomarlized
+            def remove_data_point(df_nomarlized, selected_df, curve):
+                for i in selected_df.index:
+                    df_nomarlized[i, curve] = 0                   #ERROR ALARM!!!!
+                    # df_nomarlized = df_nomarlized.drop(index=i)     #ERROR ALARM!!!!
+                return df_nomarlized
+            if st.button("Outliers Processing"):
+                st.session_state.fdata = normalize_outlier(df_nomarlized, selected_df, curve_editting, n_value)
+                _well = "".join((st.session_state.fdata.WELL.unique()).tolist())
+                st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0)
+                selected_df = None
+            if st.button("Remove"):
+                st.session_state.fdata = remove_data_point(df_nomarlized, selected_df, curve_editting)
+                _well = "".join((st.session_state.fdata.WELL.unique()).tolist())
+                st.write(_well)
+                st.write(type(_well))
+                st.session_state.loc_data = pd.concat([df[(df["WELL"] != _well)],st.session_state.fdata], axis=0)
+                selected_df = None
+        #Curve View-----------------------------------------------------------------------
+            def plt_curs(data, option_w):
+                data_plt = data[data["WELL"] == option_w]
+                if plotting_curves != []:
+                    for i, c in enumerate(plotting_curves):
+                        charts_dict[i] = curve_plot(data=data_plt,filted_data=selected_df, x_column=c)
+            # with col2:
+            charts_dict={}
+            plotting_curves = st.multiselect("Select curves to plot:", key="curvs_plt", options=columns_list(data, no_depth=True, no_well=True))
+            if st.session_state.option_w is not None:
+                if 'loc_data' not in st.session_state:
+                    plt_curs(df_nomarlized, st.session_state.option_w)
+                else:
+                    plt_curs(st.session_state.loc_data, st.session_state.option_w)
+            #Show Curve-----------------------------------------------------------------------
+                st.write(alt.concat(*charts_dict.values()).configure(autosize='fit'))#.configure_concat(spacing=0))
+        #------------------------
+        def check_method(df):
+            if st.session_state.method == "Single Well":
+                data = df[df.WELL == st.session_state.option_w]
+                data = data.sort_values(by=['DEPTH'])
+                data = data.reset_index().drop(["index"], axis=1)
+            else:
+                data = df
+            return data
+        #------------------------
+        if 'loc_data' not in st.session_state:
+            data = check_method(df)
+        else:
+            data = check_method(st.session_state.loc_data)
+        rm_outliers(data)
+    # # Download --------------------------------------------------------------
+    st.write('---')
+    st.write("Download final result to csv file")
+    if "loc_data" not in st.session_state:
+        saving_df = df
+    else:
+        saving_df = st.session_state.loc_data
+    st.download_button(label='Download',
+                    data = saving_df.to_csv(),
+                    file_name='Query_data.csv',
+                    mime='text/csv')

mLogsFunctions/viewCurves.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import pandas as pd
+import streamlit as st
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import json
+#Curve View--------------------------------------------------------------------------------------------------------------
+#OPERATIONS--------------------------------------------------------------------------------------------------------------
+#Make subplots layout
+def view_curves(data:pd.DataFrame=None, curves:list=[], log:list=[],
+         depth_col:str='DEPTH', new_color:dict={}, new_unit:dict={},
+         ):
+    #FUNCTIONS DESIGNING-------------------------------------------------------------------------------------------------
+    #Function of Plotting single curve
+    def add_trace(curv:str=None, color:str='#38a2e8', xaxis:str='x1', unit:str=None, id_subplt:int=1, log:list=None):
+        fig.add_trace(go.Scattergl(y=data[depth_col], x=data[curv], name=curv, mode="lines", line=dict(color=color, width=1.), xaxis=xaxis),1 ,id_subplt)\
+            .update_xaxes(title_text=f'{curv} ({unit})', row=1, col=id_subplt, color=color,
+                          tickcolor=color, minor=dict(tickcolor=color), tickfont=dict(color=color))
+        if curv in log:
+            fig.update_xaxes(type='log', row=1, col=id_subplt)
+    #Function to update X-Axis parameters
+    def update_xaxes(type:str=None):
+                fig.update_xaxes(
+                                title_font=dict(size=16, family='Arial'),
+                                title_standoff = 0, side='top', nticks=5, fixedrange=True,
+                                ticks="inside", tickwidth=1, ticklen=8, ticklabelstep=1,
+                                tickfont=dict(family="Arial", size=12),
+                                minor_ticks="inside", minor=dict(ticklen=4, showgrid=True),
+                                showline=True, linewidth=1, linecolor='black', mirror=True,
+                                showgrid=True, gridwidth=1, gridcolor='#d9d9d9',
+                                showspikes=True, spikecolor="#8d9199", spikesnap="hovered data", spikemode="across", spikethickness=1, spikedash='solid',
+                                )
+                if type is not None:
+                        fig.update_xaxes(type=type)
+    #Function to update Y-Axis parameters
+    def update_yaxes(type:str=None):
+            fig.update_yaxes(
+                            # title_font=dict(size=18, family='Arial', color='#393533'),
+                            # matches='y'
+                            title_standoff = 0,
+                            autorange="reversed",
+                            nticks=10,
+                            ticks="outside", tickwidth=1, tickcolor='black', ticklen=8, ticklabelstep=1,
+                            tickfont=dict(family="Arial", color='black', size=12),
+                            minor_ticks="outside", minor=dict(ticklen=4, tickcolor="black", showgrid=False),
+                            showline=True, linewidth=1, linecolor='black', mirror=True,
+                            showgrid=True, gridwidth=1, gridcolor='#d9d9d9',
+                            rangemode='nonnegative', #(normal or tozero or nonnegative)
+                            showspikes=True, spikecolor="#8d9199", spikesnap="cursor", spikemode="across", spikethickness=1, spikedash='solid',
+                            )
+            if type is not None:
+                    fig.update_yaxes(type=type)
+    #Function to update FIGURE LAYOUT parameters
+    def update_layout(_size:list=[400,800]):
+            fig.update_layout(
+                            width=_size[0], height=_size[1],
+                            showlegend=False,
+                            plot_bgcolor="white",
+                            margin=dict(t=10,l=10,b=10,r=10),
+                            hovermode="y", #"y unified", "y", "x", "closest"
+                            )
+    #VARIABLES DEFINITION------------------------------------------------------------------------------------------------
+    #Sort dataframe by DEPTH
+    data = data.sort_values(by=depth_col)
+    #Default color codes
+    meta_info = '{"Unit":{"HCAL":"in","CALI":"in","CALS":"in","C1":"in","C2":"in","RHOB":"g\\/c3","RHO8":"g\\/c3","RHO8_filte":"g\\/c3","RHOZ":"g\\/c3","ZDEN":"g\\/c3","ZDNC":"g\\/c3","HDRA":"g\\/c3","DRHO":"g\\/c3","DSOZ":"g\\/c3","ECD_ARC":"g\\/c3","NPHI":"v\\/v","TNPH":"v\\/v","TNPH_CH":"v\\/v","HNPO":"v\\/v","HNPO_filte":"v\\/v","CNCF":"% (pu)","NPOR":"v\\/v","CH_NPHI":"v\\/v","CH_NPHIds":"v\\/v","CFTC":"Hz","CNTC":"Hz","DTC":"us\\/f","DTCO":"us\\/f","DT":"us\\/f","DT4P":"us\\/f","DT5":"us\\/f","DT_SPL":"us\\/f","DTS":"us\\/f","DTSM":"us\\/f","DT2":"us\\/f","DTSO":"us\\/f","DT4S":"us\\/f","GR":"GAPI","GR1":"GAPI","GR_ARC":"GAPI","CGR":"GAPI","EHGR":"GAPI","ECGR":"GAPI","SGR":"GAPI","GR_SPULSE_BHC":"GAPI","GR_IMP":"GAPI","GRM1":"GAPI","MWD_GR_BHC":"GAPI","GRAM":"GAPI","GR_SPL":"GAPI","GR_CDR":"GAPI","ARC_GR_RT":"GAPI","GR_LWD":"GAPI","POTA":"%","THOR":"ppm","URAN":"ppm","TPRA":"v\\/v","TURA":"v\\/v","UPRA":"v\\/v","LLD":"Ohmm","LLS":"Ohmm","DIL":"Ohmm","ILD":"Ohmm","ILM":"Ohmm","RILD":"Ohmm","RILM":"Ohmm","HRI":"Ohmm","RIDPH":"Ohmm","RIMPH":"Ohmm","RSFL":"Ohmm","HDRS":"Ohmm","HMRS":"Ohmm","LL3":"Ohmm","RIPD":"Ohmm","RIPM":"Ohmm","AT10":"Ohmm","AT20":"Ohmm","AT30":"Ohmm","AT60":"Ohmm","AT90":"Ohmm","AO10":"Ohmm","AO20":"Ohmm","AO30":"Ohmm","AO60":"Ohmm","AO90":"Ohmm","AF10":"Ohmm","AF20":"Ohmm","AF30":"Ohmm","AF60":"Ohmm","AF90":"Ohmm","AHO10":"Ohmm","AHO20":"Ohmm","AHO30":"Ohmm","AHO60":"Ohmm","AHO90":"Ohmm","M2RX":"Ohmm","M2R9":"Ohmm","M2R6":"Ohmm","M2R3":"Ohmm","M2R1":"Ohmm","P16H_UNC":"Ohmm","P22H_UNC":"Ohmm","P28H_UNC":"Ohmm","P34H_UNC":"Ohmm","P40H_UNC":"Ohmm","A28H_UNC":"Ohmm","A34H_UNC":"Ohmm","A40H_UNC":"Ohmm","P16L_UNC":"Ohmm","P22L_UNC":"Ohmm","P28L_UNC":"Ohmm","P34L_UNC":"Ohmm","P40L_UNC":"Ohmm","P22H_UNC_RT":"Ohmm","P40H_UNC_RT":"Ohmm","P40H_UNC_R":"Ohmm","P22H_UNC_R":"Ohmm","HLLD":"Ohmm","HLLS":"Ohmm","RES_DEEP":"Ohmm","RES_SHAL":"Ohmm","Rdeep":"Ohmm","Rshallow":"Ohmm","P33H_UNC":"Ohmm","P33L_UNC":"Ohmm","A33H_UNC":"Ohmm","A33L_UNC":"Ohmm","RACHM":"Ohmm","RACLM":"Ohmm","RPCHM":"Ohmm","RPCLM":"Ohmm","RPCSHM":"Ohmm","RPCSLM":"Ohmm","RACSHM":"Ohmm","RACSLM":"Ohmm","RAC1HM":"Ohmm","RAC1LM":"Ohmm","RPC1HM":"Ohmm","RPC1LM":"Ohmm","RAC2HM":"Ohmm","RAC2LM":"Ohmm","RPC2HM":"Ohmm","RPC2LM":"Ohmm","RAC3HM":"Ohmm","RAC3LM":"Ohmm","RPC3HM":"Ohmm","RPC3LM":"Ohmm","RAC4HM":"Ohmm","RAC4LM":"Ohmm","RPC4HM":"Ohmm","RPC4LM":"Ohmm","MSFL":"Ohmm","RXO8":"Ohmm","ATR":"Ohmm","PSR":"Ohmm","RT":"Ohmm","TAB_CDR_RES":"hr","TAB_ARC_RES":"s","TAB_RES":"s","PEF":"_","PEF8":"_","PERF":"_","PEFZ":"_","ROP5_RM":"m\\/hr","ROP":"m\\/hr","ROP5":"m\\/hr","ROPS":"m\\/hr","RXOZ":"Ohmm","RSOZ":"Ohmm","ITT":"s","TENS":"lbf","TTEN":"lbf","HTEN":"lbf","CDF":"lbf","ATMP":"degC","TEMP_MCR":"_","TVDE":"m","SP":"mV","VPVS":"v\\/v","LSN":"cps","SSN":"cps","APRS_ARC":"psi","ARTM":"_","AGTK":"_","ARTK":"_","Temp":"degC","TEMP_LWD":"degC","WTBH":"degC","WTBH1":"degC","TEMP_DNI_RT":"degC","HSTEMP":"degC","TCDM":"degC","RPTHM":"mins","CHT":"lbf","DPHI":"v\\/v","SPHI":"v\\/v","PHIE":"v\\/v","PR":"_","ICV":"m3","IHV":"m3","GTEM":"degC","DHTEMP":"degC","TTEM_CDR":"degC","SVEL":"m\\/s","TTSL":"us","TT":"us","CBSL":"mV","CBL":"mV","WF2":"_","WF1":"_","CBLF":"mV","CCL":"_","BS":"_","TGAS":"ppm","Oilshow":"_"},"Color":{"HCAL":"#bf2273","CALI":"#bf2273","CALS":"#bf2273","C1":"#bf2273","C2":"#bf2273","RHOB":"#f20a0a","RHO8":"#f20a0a","RHO8_filte":"#f20a0a","RHOZ":"#f20a0a","ZDEN":"#f20a0a","ZDNC":"#f20a0a","HDRA":"#f20a0a","DRHO":"#f20a0a","DSOZ":"#f20a0a","ECD_ARC":"#f20a0a","NPHI":"#0a44f2","TNPH":"#0a44f2","TNPH_CH":"#0a44f2","HNPO":"#0a44f2","HNPO_filte":"#0a44f2","CNCF":"#0a44f2","NPOR":"#0a44f2","CH_NPHI":"#0a44f2","CH_NPHIds":"#0a44f2","CFTC":"#0a44f2","CNTC":"#0a44f2","DTC":"#ea0af2","DTCO":"#ea0af3","DT":"#ea0af4","DT4P":"#ea0af5","DT5":"#ea0af6","DT_SPL":"#ea0af7","DTS":"#630af2","DTSM":"#630af3","DT2":"#630af4","DTSO":"#630af6","DT4S":"#630af7","GR":"#40f20a","GR1":"#40f20a","GR_ARC":"#40f20a","CGR":"#40f20a","EHGR":"#40f20a","ECGR":"#40f20a","SGR":"#40f20a","GR_SPULSE_BHC":"#40f20a","GR_IMP":"#40f20a","GRM1":"#40f20a","MWD_GR_BHC":"#40f20a","GRAM":"#40f20a","GR_SPL":"#40f20a","GR_CDR":"#40f20a","ARC_GR_RT":"#40f20a","GR_LWD":"#40f20a","POTA":"#0a0a0a","THOR":"#0a0a0a","URAN":"#0a0a0a","TPRA":"#0a0a0a","TURA":"#0a0a0a","UPRA":"#0a0a0a","LLD":"#f20a0a","LLS":"#0a44f2","DIL":"#0a44f2","ILD":"#f20a0a","ILM":"#0a44f2","RILD":"#f20a0a","RILM":"#0a44f2","HRI":"#f20a0a","RIDPH":"#0a44f2","RIMPH":"#eb0edc","RSFL":"#f20a0a","HDRS":"#0a44f2","HMRS":"#eb0edc","LL3":"#f20a0a","RIPD":"#0a44f2","RIPM":"#eb0edc","AT10":"#f20a0a","AT20":"#f20a0a","AT30":"#f20a0a","AT60":"#0a44f2","AT90":"#0a44f2","AO10":"#f20a0a","AO20":"#f20a0a","AO30":"#f20a0a","AO60":"#0a44f2","AO90":"#0a44f2","AF10":"#f20a0a","AF20":"#f20a0a","AF30":"#f20a0a","AF60":"#0a44f2","AF90":"#0a44f2","AHO10":"#f20a0a","AHO20":"#f20a0a","AHO30":"#f20a0a","AHO60":"#0a44f2","AHO90":"#0a44f2","M2RX":"#0a44f2","M2R9":"#0a44f2","M2R6":"#0a44f2","M2R3":"#f20a0a","M2R1":"#f20a0a","P16H_UNC":"#f20a0a","P22H_UNC":"#f20a0a","P28H_UNC":"#f20a0a","P34H_UNC":"#0a44f2","P40H_UNC":"#0a44f2","A28H_UNC":"#f20a0a","A34H_UNC":"#0a44f2","A40H_UNC":"#0a44f2","P16L_UNC":"#f20a0a","P22L_UNC":"#f20a0a","P28L_UNC":"#f20a0a","P34L_UNC":"#0a44f2","P40L_UNC":"#0a44f2","P22H_UNC_RT":"#f20a0a","P40H_UNC_RT":"#0a44f2","P40H_UNC_R":"#0a44f2","P22H_UNC_R":"#f20a0a","HLLD":"#f20a0a","HLLS":"#0a44f2","RES_DEEP":"#f20a0a","RES_SHAL":"#0a44f2","Rdeep":"#f20a0a","Rshallow":"#0a44f2","P33H_UNC":"#0a44f2","P33L_UNC":"#f20a0a","A33H_UNC":"#0a44f2","A33L_UNC":"#f20a0a","RACHM":"#0a44f2","RACLM":"#f20a0a","RPCHM":"#0a44f2","RPCLM":"#f20a0a","RPCSHM":"#0a44f2","RPCSLM":"#f20a0a","RACSHM":"#0a44f2","RACSLM":"#f20a0a","RAC1HM":"#0a44f2","RAC1LM":"#f20a0a","RPC1HM":"#0a44f2","RPC1LM":"#f20a0a","RAC2HM":"#0a44f2","RAC2LM":"#f20a0a","RPC2HM":"#0a44f2","RPC2LM":"#f20a0a","RAC3HM":"#0a44f2","RAC3LM":"#f20a0a","RPC3HM":"#0a44f2","RPC3LM":"#f20a0a","RAC4HM":"#0a44f2","RAC4LM":"#f20a0a","RPC4HM":"#0a44f2","RPC4LM":"#f20a0a","MSFL":"#0a44f2","RXO8":"#f20a0a","ATR":"#0a44f2","PSR":"#f20a0a","RT":"#f20a0a","TAB_CDR_RES":"#f20a0a","TAB_ARC_RES":"#f20a0a","TAB_RES":"#f20a0a","PEF":"#f70ad0","PEF8":"#f70ad1","PERF":"#f70ad2","PEFZ":"#f70ad3","ROP5_RM":"#f20a0a","ROP":"#f20a0a","ROP5":"#f20a0a","ROPS":"#f20a0a","RXOZ":"#0e33eb","RSOZ":"#0e33eb","ITT":"#2291f2","TENS":"#11f2f2","TTEN":"#11f2f3","HTEN":"#11f2f4","CDF":"#11f2f5","ATMP":"#fa0202","TEMP_MCR":"#fa0203","TVDE":"#0f1d29","SP":"#fa4402","VPVS":"#e102fa","LSN":"#ed0510","SSN":"#0533ed","APRS_ARC":"#ed0510","ARTM":"#f20a0a","AGTK":"#40f20a","ARTK":"#f20a0a","Temp":"#fa0202","TEMP_LWD":"#fa0203","WTBH":"#fa0204","WTBH1":"#fa0205","TEMP_DNI_RT":"#fa0206","HSTEMP":"#fa0207","TCDM":"#fa0208","RPTHM":"#454141","CHT":"#34fa02","DPHI":"#fa4402","SPHI":"#fa02e9","PHIE":"#fa0249","PR":"#fa02f6","ICV":"#9c959b","IHV":"#9c959b","GTEM":"#fa0202","DHTEMP":"#fa0203","TTEM_CDR":"#fa0205","SVEL":"#dd02fa","TTSL":"#dd02fa","TT":"#dd02fa","CBSL":"#fa0202","CBL":"#fa0203","WF2":"#630af2","WF1":"#630af2","CBLF":"#9c959b","CCL":"#9c959b","BS":"#9c959b","TGAS":"#fa0202","Oilshow":"#078238"}}'
+    curve_info = json.loads(meta_info)
+    curve_info['Unit'].update(new_unit) if len(new_unit) != 0 else curve_info['Unit']
+    curve_info['Color'].update(new_color) if len(new_color) != 0 else curve_info['Color']
+    log_type = ['LLD', 'LLS', 'DIL', 'ILD', 'ILM', 'RILD', 'RILM', 'HRI', 'RIDPH', 'RIMPH', 'RSFL', 'HDRS', 'HMRS', 'LL3', 'RIPD', 'RIPM', 'AT10', 'AT20', 'AT30', 'AT60', 'AT90', 'AO10', 'AO20', 'AO30', 'AO60', 'AO90', 'AF10', 'AF20', 'AF30', 'AF60', 'AF90', 'AHO10', 'AHO20', 'AHO30', 'AHO60', 'AHO90', 'M2RX', 'M2R9', 'M2R6', 'M2R3', 'M2R1', 'P16H_UNC', 'P22H_UNC', 'P28H_UNC', 'P34H_UNC', 'P40H_UNC', 'A28H_UNC', 'A34H_UNC', 'A40H_UNC', 'P16L_UNC', 'P22L_UNC', 'P28L_UNC', 'P34L_UNC', 'P40L_UNC', 'P22H_UNC_RT', 'P40H_UNC_RT', 'P40H_UNC_R', 'P22H_UNC_R', 'HLLD', 'HLLS', 'RES_DEEP', 'RES_SHAL', 'Rdeep', 'Rshallow', 'P33H_UNC', 'P33L_UNC', 'A33H_UNC', 'A33L_UNC', 'RACHM', 'RACLM', 'RPCHM', 'RPCLM', 'RPCSHM', 'RPCSLM', 'RACSHM', 'RACSLM', 'RAC1HM', 'RAC1LM', 'RPC1HM', 'RPC1LM', 'RAC2HM', 'RAC2LM', 'RPC2HM', 'RPC2LM', 'RAC3HM', 'RAC3LM', 'RPC3HM', 'RPC3LM', 'RAC4HM', 'RAC4LM', 'RPC4HM', 'RPC4LM', 'MSFL', 'RXO8', 'ATR', 'PSR', 'RT', 'RXOZ', 'RSOZ']
+    log_type_update = list(set(log_type + log))
+    #Define curves in columns list
+    if len(curves) != 0:
+        curves_list = curves
+    else:
+        curves_list = data.select_dtypes(include=['float64']).columns.drop(depth_col)
+    #Calculate numbers of plotting columns and size of whole figure
+    cols = len(curves_list); height= 800;
+    width = cols * 150 if cols*150 < 1300 else 1300
+    #Make subplots layout
+    fig = go.Figure()
+    fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, horizontal_spacing=0.01)
+    #Check selected curves for plotting
+    for i, curve in enumerate(curves_list):
+        #Assign color code for single curve
+        color = curve_info['Color'][curve] if curve in curve_info['Color'].keys() else '#38a2e8'
+        unit = curve_info['Unit'][curve] if curve in curve_info['Unit'].keys() else '_'
+        #Add trace to subplots
+        add_trace(curv=curve, color=color, xaxis=f'x{i+1}', unit=unit, id_subplt=i+1, log=log_type_update)
+    #Setup the Axes and Layout parameters
+    update_xaxes(); update_yaxes(); update_layout([width, height])
+    #Show the main figure
+    # fig.show(config=dict(displayModeBar=True))
+    st.plotly_chart(fig)

models/05_13_2023_11_50_38_model_LGBM.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pages/1_LAS_Exploratory.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import pandas as pd
+import matplotlib.pyplot as plt
+import lasio, os
+import numpy as np
+import os
+import streamlit as st
+import altair as alt
+from streamlit_vega_lite import altair_component
+from io import StringIO
+from ui import *
+from Antuns.page_setting import page_intro
+from mLogsFunctions import *
+from mLogsFunctions.fx import *
+#-----------------------------------------------------------------------------
+page_intro()
+#1_LOADINGDATA-----------------------------------------------------------------
+st.write("Log ASCII standard (LAS) is a standard file format common in the oil-and-gas and water well industries to store well log information. Well logging is used to investigate and characterize the subsurface stratigraphy in a well.")
+# pagetile = """<center><h1>LAS EXPLORATORY</h1></center>"""
+# st.markdown(pagetile, unsafe_allow_html=True)
+st.subheader('To begin using the app, load your LAS file using the file upload option below.')
+st.subheader("1. LAS File Loading:")
+#------------------------------------------------------------------------------
+#Streamlit Dashboard------------------------------------------------------------------------------------------
+# set_page_config(page='custom')
+hide_menu_button()
+condense_layout()
+#-----------------------------------------------------------------------------
+@st.cache_data() #allow_output_mutation=True, suppress_st_warning=True
+def upload_las(uploaded_files):
+    dataframes = {}
+    las_data_list = []
+    las_data = []
+    if uploaded_files is not None:
+        for file in uploaded_files:
+            try:
+                bytes_data = file.read()
+                str_io = StringIO(bytes_data.decode('Windows-1252'))
+                las_data = lasio.read(str_io)
+                well_data = las_data.df()
+                well_data['WELL'] = las_data.well.WELL.value
+                if well_data.index.name == 'DEPT':
+                    well_data.reset_index('DEPT', inplace=True)  # Set 'DEPT' as the new index
+                    well_data.index.name = 'DEPT'
+                    if len(well_data) > 0:  # Kiểm tra xem dataframe có dữ liệu không
+                        dataframes[file.name] = well_data
+                        las_data_list.append(las_data)
+                    else:
+                        st.warning(f"No data in file {file.name}")
+                else:
+                    well_data.index.name == 'DEPTH'
+                    well_data.reset_index('DEPTH', inplace=True)  # Set 'DEPTH' as the new index
+                    well_data.index.name = 'DEPTH'
+                    if len(well_data) > 0:  # Kiểm tra xem dataframe có dữ liệu không
+                        dataframes[file.name] = well_data
+                        las_data_list.append(las_data)
+            except Exception as e:
+                st.error(f"Error loading {file.name}: {e}")
+    return dataframes, las_data_list, las_data
+# Sidebar Options & File Upload
+uploaded_files = st.file_uploader(label='Upload LAS files:', accept_multiple_files=True, type='las')
+dataframes, las_data_list, las_data = upload_las(uploaded_files)
+# print("print las data", las_data_list)
+well_names = {}
+if dataframes:
+    merged_df = []
+    for file_name, df in dataframes.items():
+        well_name = file_name.split(".")[0]
+        # Lấy danh sách các tên giếng
+        well_names = list(dataframes.keys())
+    # Cho phép người dùng chọn giếng và hiển thị DataFrame tương ứng
+    selected_well = st.selectbox("Select Well", well_names, key = "selected_well_1")
+    # st.write(f"Data for {selected_well}:")
+    st.write(dataframes[selected_well])
+    # Tạo một danh sách các DataFrame
+    dfs = [df for _, df in dataframes.items()]
+    merged_df = pd.concat([df for df in dfs])
+    # Hiển thị DataFrame tổng thể
+    st.write("Merged DataFrame:")
+    st.write(merged_df)
+else:
+    print ("Please select LAS files for uploading ")
+    st.warning('No valid LAS files were uploaded.')
+curves = []
+wellname = []
+las = las_data
+list_well = []
+if las:
+    for las_data in las_data_list:
+        well_name = las_data.well['WELL'].value
+        list_well.append(well_name)
+        # print(list_well)
+    st.success('File Uploaded Successfully')
+    st.write(f'<b>Well Name</b>: {list_well}', unsafe_allow_html=True)
+    # 2_CURVES_INFOMATION-----------------------------------------------------------------
+    if las:
+        st.subheader("2. Curve logs details:")
+        selected_well = st.selectbox("Select Well", well_names, key = "selected_well_2")
+        st.caption("All curve logs in data:")
+        curves = []
+        for well, las_file in zip(well_names, las_data_list):
+            if well == selected_well:
+                las = las_file
+                break
+        # print("in ra las:", las)
+        for curve in las.curves:
+            st.write(curve.mnemonic)
+            curves.append(curve.mnemonic)
+        for count, curve in enumerate(las.curves):
+            st.write("---")
+            st.write(f"Curve: {curve.mnemonic}, \t Units: {curve.unit}, \t Description: {curve.descr}")
+            st.write(f"There are a total of: {count+1} curves present within this file")
+    #3_DATAFRAME-----------------------------------------------------------------
+        if "selected_well" not in st.session_state:
+            st.session_state.selected_well = None
+            st.session_state.selected_well_multi = None
+        st.subheader("3. Converting LAS to DataFrame:")
+        st.caption("3.1 Preview of all Dataframe")
+        selected_well = st.selectbox("Select Well", well_names, key = "selected_well_5")
+        # print("Well_name", well_names)
+        # print("las_data_list", las_data_list)
+        for well, las_file in zip(well_names, las_data_list):
+            if well == selected_well:
+                las = las_file
+                break
+            # break
+        well = las.df()
+        well['WELL'] = las.well.WELL.value
+        well['DEPTH'] = well.index
+        well = well.reset_index(drop=True)
+        well = well.reindex(columns=['DEPTH'] + [col for col in well.columns if col != 'DEPTH'])
+        st.write(well.head())
+        st.caption("3.2 Well curves Statistics")
+        st.write(well.describe())
+        # print("in ra danh sách giếng", list_well)
+        # create a selectbox to choose the well
+    selected_well_multi = st.multiselect(" 3.3 Select well for download", list_well)
+    st.session_state.changename = st.button("Create", key="create_curve")
+    if st.session_state.changename:
+        dataframes_df = pd.concat(dataframes.values(), ignore_index=True)
+        st.session_state.selected_well_multi = dataframes_df.loc[dataframes_df['WELL'].isin(selected_well_multi)].reset_index(drop=True)
+        st.dataframe(st.session_state.selected_well_multi)
+        st.write(" Download DataFrame")
+        st.download_button(label='Download CSV File',
+                    data = st.session_state.selected_well_multi.to_csv(),
+                    file_name=f"{selected_well_multi}.csv",
+                    mime='text/csv')
+    #4_Data Preprocessing-----------------------------------------------------------------
+    st.subheader("4. Data Preprocessing:")
+    st.session_state.old_name:str
+    st.session_state.new_name:str
+    st.session_state.changename:bool
+    st.session_state.well = None
+    st.write("4.1 Rename curves")
+    st.session_state.selected_well_rename = None
+    selected_well_rename = st.selectbox("Select Well", list_well, key="well_selectbox")
+    well_to_las = {}
+    well = []
+    data_rename_1 =[]
+    df_all_full = pd.DataFrame()
+    st.session_state.selected_well_rename = None
+    for i in range(len(well_names)):
+        well_to_las[well_names[i]] = las_data_list[i]
+        # print("key: ",well_names[i][:len(selected_well_rename)], " value: ",las_data_list[i])
+        if  selected_well_rename == well_names[i][:len(selected_well_rename)]:
+            las = las_data_list[i]
+            break
+        # print("In ra las:", las)
+    well = las.df()
+    well['WELL'] = las.well.WELL.value
+    # print("In ra well2:", well)
+    curves = well.columns.tolist()
+    # print ("print ra cuvers:", curves) # save the number of curves for the selected well in session state
+    st.session_state.num_curves = len(curves)
+    df_rename = pd.DataFrame()
+    st.session_state.selected_well_rename = df_rename
+    st.session_state.selected_well_rename = well
+import pandas as pd
+# Khởi tạo DataFrame từ dữ liệu có sẵn
+# Khởi tạo thuộc tính selected_well_rename trong st.session_state
+st.session_state.setdefault('selected_well_rename', 'Default value')
+# Truy cập thuộc tính selected_well_rename
+data = st.session_state.selected_well_rename
+if 'columns' in data:
+    n_cols = 4
+    n_rows = -(-len(data.columns) // n_cols)  # Round up division
+    for i in range(n_rows):
+        cols = st.columns(n_cols)
+        for j in range(n_cols):
+            idx = i * n_cols + (j-1)
+            if idx < len(data.columns):
+                col = data.columns[idx]
+                # Lưu trữ tên cũ trong biến old_col
+                old_col = col
+                # new_col == well_names[i][:len(col)]
+                new_col = cols[j-1].text_input(f"Enter new name for '{col}'", key=f"input_{cols}")
+                # Kiểm tra nếu người dùng không nhập tên mới
+                if not new_col:
+                    # Sử dụng tên cũ thay thế
+                    new_col = old_col
+                data = data.rename(columns={col: new_col})
+                data["DEPTH"] = well.index
+                data.insert(0, 'DEPTH', data.pop('DEPTH'))
+                data = data.reset_index(drop=True)
+                st.dataframe(data)
+else:
+    print ("Please select LAS files for input data")
+# Hiển thị lại bảng
+def my_function(data):
+    # Lưu trữ DataFrame khi người dùng nhấn vào nút "L��u"
+    if st.button("Lưu", key="saved_rename"):
+        # Tạo tên file CSV dựa trên biến selected_well_rename
+        file_name = f"/work/2022_VPIMLogs_WebApp/data/change_name_logs/{selected_well_rename}.csv"
+        # Lưu trữ DataFrame vào tệp CSV với tên file tương ứng
+        data.to_csv(file_name, index=False)
+    # Trả về giá trị của biến result
+    return data
+result = my_function(data)
+# for name in selected_well_multi:
+#     # dataframe_merged_df = pd.DataFrame()
+#     merged_df = pd.read_csv("data/change_name_logs/merged_df.csv")
+#     dataframe_merged_df.append(merged_df[merged_df.WELL==name])
+#     dataframe_merged_df.to_csv("data/change_name_logs/merged_df.csv")
+#     st.dataframe(dataframe_merged_df, width=1400)
+selected_well_multi= st.multiselect(" 4.3 Select well for download", list_well, key = 'selected_well_multi_lasts')
+# dowload_dataframes_df = pd.DataFrame()
+st.session_state.changename_download = st.button("Create", key="selected_well_multi_rename_curve_111")
+# Đường dẫn đến thư mục chứa các file csv
+dir_path = '/work/2022_VPIMLogs_WebApp/data/change_name_logs/'
+if st.session_state.changename_download:
+    # Tạo một DataFrame rỗng để chứa dữ liệu
+    merged_df = pd.DataFrame()
+    dataframe_merged_df = pd.DataFrame()
+    # Duyệt qua tất cả các file trong thư mục và gộp chúng vào DataFrame
+    for filename in os.listdir(dir_path):
+        if filename.endswith('.csv'):
+            filepath = os.path.join(dir_path, filename)
+            df = pd.read_csv(filepath)
+            merged_df = pd.concat([merged_df, df], ignore_index=True).reset_index(drop=True)
+        merged_df.to_csv(f"/work/2022_VPIMLogs_WebApp/data/merged/{selected_well_multi}_merged_df.csv")
+    dataframes_df = pd.read_csv(f"/work/2022_VPIMLogs_WebApp/data/merged/{selected_well_multi}_merged_df.csv")
+    st.session_state.selected_well_multi = dataframes_df.loc[dataframes_df['WELL'].isin(selected_well_multi)].reset_index(drop=True).drop('Unnamed: 0', axis = 1)
+    st.dataframe(st.session_state.selected_well_multi , width=1400)
+        # 4.3_DOWNLOAD-----------------------------------------------------------------
+    st.write("4.3 Download well curves with renamed names")
+    st.download_button(label='Download CSV File',
+                data = st.session_state.selected_well_multi.to_csv(),
+                file_name=f"{selected_well_multi}.csv",
+                mime='text/csv')
+    for filename in os.listdir(dir_path):
+        if filename.endswith('.csv'):
+            os.remove(os.path.join(dir_path, filename))
+#'''Adding the ‘download’ tag attribute as shown below allows you to provide a file name and extension.
+#f'<a href="data:file/csv;base64,{b64}" download="myfilename.csv">Download csv file</a>'''
+hide_menu_button()
+condense_layout()

pages/2_Exploratory_Data_Analysis.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import numpy as np
+import streamlit as st
+import pandas as pd
+import os
+from ui import *
+from mLogsFunctions import *
+#Streamlit Dashboard------------------------------------------------------------------------------------------
+pagetile = """<center><h1>EXPLORATORY DATA ANALYSIS</h1></center>"""
+set_page_config(page='custom')
+hide_menu_button()
+condense_layout()
+logo_site, info_site = st.columns([1.5, 8.5])
+with logo_site:
+    st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
+with info_site:
+    # st.set_option('deprecation.showfileUploaderEncoding', False)
+    # st.set_option('maxUploadSize', 200*1024) # 200 MB
+    st.markdown(pagetile, unsafe_allow_html=True)
+    # Option 1: CSV File Loading
+    st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
+    st.subheader("1. CSV File Loading")
+    df = csv_uploader()
+    df = tweak_data(df,resample=False, reindex=True)
+    # Option 2: CSV from LAS Exploration
+    st.subheader("2. CSV from LAS Exploration")
+    dir_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+    csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
+    selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
+    # # Đọc file csv được chọn vào DataFrame
+    if selected_csv_file: # Nếu người dùng đã chọn file CSV
+        # Đọc file csv được chọn vào DataFrame
+        file_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+        merged_data = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
+        df = tweak_data(merged_data, resample=False, reindex=True)
+    else: # Nếu người dùng không chọn file CSV
+        merged_data = df
+        df = tweak_data(merged_data, resample=False, reindex=True)
+#|CHECK DATA EXISTENCE-----------------------------------------------------------------------------------------
+if df is not None:
+    curves = columns_list(df, no_depth=True, no_well=True)
+    well_names = np.sort(df.WELL.unique())
+#|TABS-ESTABLISHING-----------------------------------------------------------------------------------------
+    tab1, tab2, tab3, tab4, tab5 = st.tabs(['DataFrame',
+                                      'DataStatistics',
+                                      '3D Scatter Points',
+                                      'CurvesView',
+                                      'OutliersRemoval'
+                                      ])
+    #|TABS-1-----------------------------------------------------------------------------------------
+    st.write('---')
+    with tab1:
+        st.dataframe(df, width=1400, height=500)
+    #|TABS-2-----------------------------------------------------------------------------------------
+    with tab2:
+        st.radio('DataVisualizationMethod',
+                key='displayTab2',
+                options=['DataStatistics',
+                        'Missing Statistic',
+                        'Curve Distribution',
+                        'Histogram Overlay',
+                        'Cross-Plot',
+                        'PairPlot'],
+                horizontal=True)
+        if st.session_state.displayTab2 == 'DataStatistics':
+            subtab21(df, well_names)
+        elif st.session_state.displayTab2 == 'Missing Statistic':
+            subtab22(df)
+        elif st.session_state.displayTab2 == 'Curve Distribution':
+            subtab23(df, curves)
+        elif st.session_state.displayTab2 == 'Histogram Overlay':
+            subtab24(df, curves)
+        elif st.session_state.displayTab2 == 'Cross-Plot':
+            subtab25(df, curves)
+        elif st.session_state.displayTab2 == 'PairPlot':
+            subtab26(df, curves)
+        else:
+            subtab21(df, well_names)
+        #|TABS-3-----------------------------------------------------------------------------------------
+    with tab3:
+        scatterPoint3D(df)
+    #|TABS-4-----------------------------------------------------------------------------------------
+    with tab4:
+        stViewCurves(df)
+    #|TABS-5-----------------------------------------------------------------------------------------
+    with tab5:
+        rmOutliers(df)

pages/3_Fracture_Training_Models.py ADDED Viewed

	@@ -0,0 +1,388 @@

+import numpy as np
+import pandas as pd
+import altair as alt
+import lightgbm as lgb
+import matplotlib.pyplot as plt
+import pickle, os, datetime
+import bz2file as bz2
+from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
+from sklearn.metrics import roc_auc_score, balanced_accuracy_score, f1_score, recall_score, precision_score
+from sklearn.metrics import roc_curve, precision_recall_curve, auc
+import streamlit as st
+from ui import *
+from mLogsFunctions import *
+#------------------------------------------------------------------------------------------
+# processing pipeline
+def remove_negative_val(df, col):
+    return df.drop(index=df[df[col] < 0].index)
+def rel_depth(df):
+    dfx = []
+    for well in df.WELL.unique():
+        df_ = df[df.WELL==well].sort_values(by="DEPTH", ascending=True)
+        dfx.append(df_.assign(rel_depth=df_.DEPTH / df_.DEPTH.values[0]))
+    return pd.concat(dfx).reset_index(drop=True)
+def tweak_data_S(df):
+    return (
+        df.assign(
+                FRACTURE_ZONE=df.FRACTURE_ZONE.replace({-9999: 0, np.nan: 0}).astype('int8'),
+                GR=df.GR.replace({-9999.:0.}).astype('float32'),
+                DCALI_FINAL=df.DCALI_FINAL.replace({-9999.:0.}).astype('float32'),
+                LLD=df.LLD.replace({-9999.:0.}).astype('float32'),
+                LLS=df.LLS.replace({-9999.:0.}).astype('float32'),
+                NPHI=df.NPHI.replace({-9999.:0.}).astype('float32'),
+                RHOB=df.RHOB.replace({-9999.:0.}).astype('float32'),
+                DTC=df.DTC.replace({-9999.:0.}).astype('float32'),
+                DTS=df.DTS.replace({-9999.:0.}).astype('float32'),
+                DEPTH=df.DEPTH.astype('float32')
+                )
+                .pipe(remove_negative_val, "RHOB")
+                .pipe(remove_negative_val, "DTC")
+                .pipe(remove_negative_val, "DTS")
+                .pipe(remove_negative_val, "GR")
+                .pipe(remove_negative_val, "LLD")
+            ).pipe(rel_depth)
+#Streamlit Dashboard------------------------------------------------------------------------------------------
+pagetile = """<center><h1>TRAINING SITE</h1></center>"""
+set_page_config(page='custom')
+hide_menu_button()
+condense_layout()
+logo_site, info_site = st.columns([1.5, 8.5])
+with logo_site:
+    st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
+with info_site:
+    st.set_option('deprecation.showfileUploaderEncoding', False)
+    # st.set_option('maxUploadSize', 200*1024) # 200 MB
+    st.markdown(pagetile, unsafe_allow_html=True)
+    # Option 1: CSV File Loading
+    st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
+    st.subheader("1. CSV File Loading")
+    df = csv_uploader()
+    # df = tweak_data(df,resample=False, reindex=True)
+    # Option 2: CSV from LAS Exploration
+    st.subheader("2. CSV from LAS Exploration")
+    dir_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+    csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
+    selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
+    # # Đọc file csv được chọn vào DataFrame
+    if selected_csv_file: # Nếu người dùng đã chọn file CSV
+        # Đọc file csv được chọn vào DataFrame
+        file_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+        merged_data = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
+        # df = tweak_data_S(merged_data, resample=False, reindex=True)
+        df = merged_data
+    else: # Nếu người dùng không chọn file CSV
+        merged_data = df
+# df = tweak_data(merged_data, resample=False, reindex=True)
+#------------------------------------------------------------------------------------------
+if df is not None:
+    st.caption("Data Preparation")
+    # Processing data
+    # df = tweak_data_S(df)
+    try:
+        df = tweak_data_S(df)
+    except AttributeError as e:
+        print(" ")
+    st.info("Tweak Data")
+    i1, i2 = st.columns(2)
+    for i, v in enumerate(["FRACTURE_ZONE", "GR", "DCAL", "LLD", "LLS", "NPHI", "RHOB", "DTC", "DTS", "DEPTH"]):
+        if i%2==0:
+            with i1:
+                st.success(f"{v}: Replaced nan values by 0")
+        if i%2==1:
+            with i2:
+                st.success(f"{v}: Replaced nan values by 0")
+    st.info(" Negative values removal in RHOB, DTC, DTS, GR, LLD: Done!")
+    st.write("---")
+    #--------------------------------------------------------------------------------------
+    # define training/testing data
+    st.write("Please to slectect Curves input for Traning Model")
+    feature_names_dict = [col for col in df.columns if col not in ["WELL",
+                                                            "DEPTH",
+                                                            # "Fracture Intensity",
+                                                            # "FRACTURE_ZONE",
+                                                            ]]
+    feature_names = st.multiselect('Select curves', feature_names_dict, key = 'st.session_state.selected_well_multi_curves')
+    feature_names_label = [col for col in df.columns if col not in ["WELL",
+                                                            "DEPTH",
+                                                            # "Fracture Intensity",
+                                                            # "FRACTURE_ZONE",
+                                                            ]]
+    st.write("Please to slectect a Label input for Traning Model")
+    feature_names_label = st.selectbox('Select a curves', feature_names_label, key = 'st.session_state.selected_well_multi_label')
+    label_name = feature_names_label
+    st.caption("Features Selection")
+    st.info(f"Label names: {label_name}")
+    st.info(f"Feature names: {feature_names}")
+    st.write("---")
+    #--------------------------------------------------------------------------------------
+    st.caption("Split Data")
+    ## split data
+    ### some data for test model after deploy
+    ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
+    for _train_inx, _test_inx in ss.split(df, df["WELL"]):
+        train_df, test_df = df.loc[_train_inx, :], df.loc[_test_inx, :]
+    X_train, X_test, y_train, y_test = train_test_split(
+                                                        train_df[feature_names],
+                                                        train_df[label_name],
+                                                        stratify=train_df[label_name],
+                                                        train_size=0.9,
+                                                        random_state=42,
+                                                        )
+    ### create lgb dataset
+    train_set = lgb.Dataset(X_train,
+                            label=y_train,
+                            feature_name=feature_names,
+                            )
+    valid_set = lgb.Dataset(X_test,
+                            label=y_test,
+                            reference=train_set,
+                            feature_name=feature_names,
+                            )
+    st.info(f"Size of FULL Dataset: {len(df)}")
+    st.info(f"Size of TRAINING set: {train_set.construct().num_data()}")
+    st.info(f"Size of VALIDATION set: {valid_set.construct().num_data()}")
+    st.info(f"Size of TESTING set: {len(test_df)}")
+    st.write("---")
+#Traning model--------------------------------------------------------------------------------------
+    if st.button("Start Train"):
+        # Modeling
+        ## custom metric
+        st.caption("Training")
+        from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score, roc_auc_score, precision_recall_curve
+        model = lgb.train(
+                        params={"boosting_type": "gbdt",
+                                    "objective": "cross_entropy",
+                                    "metric": ["rmse","recall"],
+                                    "is_unbalance": True,
+                                    },
+                        train_set=lgb.Dataset(data=X_train, label=y_train,
+                                            feature_name=feature_names),
+                                            num_boost_round=2000,
+                        valid_sets=lgb.Dataset(data=X_test, label=y_test,
+                                            feature_name=feature_names),
+                                            early_stopping_rounds=5,
+                                            verbose_eval=0,
+                        )
+        st.success("Finished Training!")
+        st.success("Saved Model!")
+        st.write("---")
+        now = datetime.datetime.now()
+        current_time = now.strftime("%m_%d_%Y_%H_%M_%S")
+        link= f"/work/2022_VPIMLogs_WebApp/models/{current_time}_model_LGBM.json"
+        model.save_model(filename= link)
+        with open(link, 'r') as f:
+            file_content = f.read()
+        if st.download_button(label='Download JSON File',
+                            data=file_content,
+                            file_name=link,
+                            mime='application/json'):
+            pass
+        else:
+            st.text(" ")
+#Scores--------------------------------------------------------------------------------------
+        ## using model to make prediction
+        st.caption("Modeling Scores")
+        threshold = 0.5
+        #Make label Prediction
+        predictions = (model.predict(df[feature_names])> threshold).astype(int)
+        df['FRACTURE_ZONE_PRED'] = predictions
+        test_preds = model.predict(test_df[feature_names])
+        train_preds = model.predict(X_train)
+        valid_preds = model.predict(X_test)
+        valid_recall = recall_score(y_test, valid_preds >= threshold, average = 'weighted')
+        valid_precision = precision_score(y_test, valid_preds >= threshold, average = 'weighted')
+        valid_acc = accuracy_score(y_test, valid_preds >= threshold)
+        valid_f1 = f1_score(y_test, valid_preds >= threshold, average = 'weighted')
+        valid_aoc = roc_auc_score(y_test, valid_preds >= threshold)
+        train_recall = recall_score(y_train, train_preds >= threshold, average = 'weighted')
+        train_precision = precision_score(y_train, train_preds >= threshold, average = 'weighted')
+        train_acc = accuracy_score(y_train, train_preds >= threshold)
+        train_aoc = roc_auc_score(y_train, train_preds >= threshold)
+        train_f1 = f1_score(y_train, train_preds >= threshold, average = 'weighted')
+        test_recall = recall_score(test_df[label_name], test_preds >= threshold, average = 'weighted')
+        test_precision = precision_score(test_df[label_name], test_preds >= threshold, average = 'weighted')
+        test_acc = accuracy_score(test_df[label_name], test_preds >= threshold)
+        test_aoc = roc_auc_score(test_df[label_name], test_preds >= threshold)
+        test_f1 = f1_score(test_df[label_name], test_preds >= threshold, average = 'weighted')
+        sc1, sc2, sc3 = st.columns(3)
+        with sc1:
+            st.info(f"Training score (RECALL): {train_recall}")
+            st.info(f"Training score (PRECISION): {train_precision}")
+            st.info(f"Training score (ACC): {train_acc}")
+            st.info(f"Training score (F1): {train_f1}")
+            st.info(f"Training score (AOC): {train_aoc}")
+        with sc2:
+            st.info(f"Validation score (RECALL): {valid_recall}")
+            st.info(f"Validation score (PRECISION): {valid_precision}")
+            st.info(f"Validation score (ACC): {valid_acc}")
+            st.info(f"Validation score (F1): {valid_f1}")
+            st.info(f"Validation score (AOC): {valid_aoc}")
+        with sc3:
+            st.info(f"Testing score (RECALL): {test_recall}")
+            st.info(f"Testing score (PRECISION): {test_precision}")
+            st.info(f"Testing score (ACC): {test_acc}")
+            st.info(f"Testing score (F1): {test_f1}")
+            st.info(f"Testing score (AOC): {test_aoc}")
+        st.write("---")
+#Measure Scores--------------------------------------------------------------------------------------
+        st.caption("Scores plotting charts")
+        ## roc valid
+        fpr_valid, tpr_valid, threshold_valid = roc_curve(y_test, valid_preds)
+        roc_auc_valid = auc(fpr_valid, tpr_valid)
+        ## precision recall valid
+        pr_valid, rc_valid, threshold_valid= precision_recall_curve(y_test, valid_preds)
+        ## roc training
+        tfpr_train, ttpr_train, tthreshold_train = roc_curve(y_train, train_preds)
+        troc_auc_train = auc(tfpr_train, ttpr_train)
+        ## precision recall training
+        tpr_train, trc_train, tthreshold_train = precision_recall_curve(y_train, train_preds)
+        ## roc test
+        tfpr_test, ttpr_test, tthreshold = roc_curve(test_df[label_name], test_preds)
+        troc_auc_test = auc(tfpr_test, ttpr_test)
+        ## precision recall testing
+        tpr_test, trc_test, tthreshold_test = precision_recall_curve(test_df[label_name], test_preds)
+#Plot Scores--------------------------------------------------------------------------------------
+        fig, ax = plt.subplots(figsize=(40,40))
+        ax1 = plt.subplot2grid((7,7), (0,0), rowspan=1, colspan = 1)
+        ax2 = plt.subplot2grid((7,7), (0,1), rowspan=1, colspan = 1)
+        ax3 = plt.subplot2grid((7,7), (0,2), rowspan=1, colspan = 1)
+        ax4 = plt.subplot2grid((7,7), (1,0), rowspan=1, colspan = 1)
+        ax5 = plt.subplot2grid((7,7), (1,1), rowspan=1, colspan = 1)
+        ax6 = plt.subplot2grid((7,7), (1,2), rowspan=1, colspan = 1)
+        def set_ax(ax,
+                x, y, color, label, legend,
+                line:bool=False,
+                title:str=None,
+                x_label:str=None,
+                y_label:str=None,
+                ):
+            ax.plot(x, y, color, label=label)
+            ax.set_title(title)
+            ax.legend(loc = legend)
+            if line == True:
+                ax.plot([0, 1], [0, 1],'r--')
+            ax.set_xlim([0, 1])
+            ax.set_ylim([0, 1])
+            ax.set_ylabel(y_label)
+            ax.set_xlabel(x_label)
+        p1, p2, p3 = st.columns([1,14,1])
+        with p2:
+            ## roc valid
+            set_ax(ax2, fpr_valid, tpr_valid, 'b',
+                label = 'AUC = %0.2f' % roc_auc_valid,
+                legend='lower right',
+                title='Receiver Operating Characteristic - Validation',
+                line=True,
+                x_label='False Positive Rate',
+                y_label='True Positive Rate',
+                )
+            ## precision recall valid
+            set_ax(ax5, pr_valid, rc_valid, 'orange',
+                label = 'PR Curve',
+                legend='lower right',
+                title='Precision Recall Curve - Validation',
+                line=True,
+                x_label='Recall',
+                y_label='Precision',
+                )
+            ## roc training
+            set_ax(ax1, tfpr_train, ttpr_train, 'b',
+                label = 'AUC = %0.2f' % troc_auc_train,
+                legend='lower right',
+                title='Receiver Operating Characteristic - Training',
+                line=True,
+                x_label='False Positive Rate',
+                y_label='True Positive Rate',
+                )
+            ## precision recall training
+            set_ax(ax4, tpr_train, trc_train, 'orange',
+                label = 'PR Curve',
+                legend='lower right',
+                title='Precision Recall Curve - Training',
+                line=True,
+                x_label='Recall',
+                y_label='Precision',
+                )
+            ## roc test
+            set_ax(ax3, tfpr_test, ttpr_test, 'b',
+                label = 'AUC = %0.2f' % troc_auc_test,
+                legend='lower right',
+                title='Receiver Operating Characteristic - Blind test',
+                line=True,
+                x_label='False Positive Rate',
+                y_label='True Positive Rate',
+                )
+            ## precision recall testing
+            set_ax(ax6, tpr_test, trc_test, 'orange',
+                label = 'PR Curve',
+                legend='lower right',
+                title='Precision Recall Curve - Blind test',
+                line=True,
+                x_label='Recall',
+                y_label='Precision',
+                )
+            st.pyplot(fig)
+#Plot Data------------------------------------------------------------------
+        plotting_curves = [c for c in df.columns.unique() if c not in ["DEPTH", "WELL", "TVD", "DCALI_FINAL", "INCL", "AZIM_TN", "rel_depth"]]
+        plotting_curves.sort()
+        if "FRACTURE_ZONE_PRED" in df.columns.unique():
+            plotting_curves.append("FRACTURE_ZONE_PRED")
+        for well in df.WELL.unique():
+            st.write('---')
+            st.write(f"{well} Logs: \n")
+            well_plot = df[df.WELL == well]
+            charts_dict={}
+            for i, c in enumerate(plotting_curves):
+                charts_dict[i] = curve_plot(data=well_plot,filted_data=None, x_column=c)
+        #Show Curve-----------------------------------------------------------------------
+            st.write(alt.concat(*charts_dict.values(), columns = 12).configure(autosize='fit'))
+            # st.snow()
+#DOWNLOAD-----------------------------------------------------------------
+    # Define the download button
+    # if st.download_button('Download Modeling (with format JSON)'):
+    #     with open(filename, 'r') as f:
+    #         data = json.load(f)
+    #     href = f"data:text/json;charset=utf-8,{json.dumps(data, indent=2)}"
+    #     st.markdown(f'<a href="{href}" download="{filename}">Download Modeling (with format JSON)</a>')
+hide_menu_button()
+condense_layout()

pages/4_Fracture_Prediction.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import numpy as np
+import lightgbm as lgb
+import pickle, os
+import bz2file as bz2
+import altair as alt
+import streamlit as st
+from ui import *
+from mLogsFunctions import *
+from mLogsFunctions.fx import *
+#ignore version warnings
+import warnings
+warnings.filterwarnings("ignore")
+#Global variables
+THRESHOLD_GLOBAL = 0.5
+#All wells to be trained
+wells_name = ["01-97-HXS-1X", "15-1-SN-1X", "15-1-SN-2X", "15-1-SN-3XST", "15-1-SN-4X", "15-1-SNN-1P",
+              "15-1-SNN-2P", "15-1-SNN-3P", "15-1-SNN-4P", "15-1-SNS-1P", "15-1-SNS-2P", "15-1-SNS-4P"]
+#Obtain data and label of wells
+name_features = ["GR", "LLD", "LLS", "NPHI", "RHOB", "DTC", "DTS"]
+label = "FRACTURE_ZONE"
+#-----------------------------------------------------------------
+# processing pipeline
+def remove_negative_val(df, col):
+    return df.drop(index=df[df[col] < 0].index)
+def rel_depth(df):
+    dfx = []
+    for well in df.WELL.unique():
+        df_ = df[df.WELL==well].sort_values(by="DEPTH", ascending=True)
+        dfx.append(df_.assign(rel_depth=df_.DEPTH / df_.DEPTH.values[0]))
+    return pd.concat(dfx).reset_index(drop=True)
+def tweak_data_S(df):
+    return (
+        df.assign(
+                # FRACTURE_ZONE=df.FRACTURE_ZONE.replace({-9999: 0, np.nan: 0}).astype('int8'),
+                GR=df.GR.replace({-9999.:0.}).astype('float32'),
+                DCALI_FINAL=df.DCALI_FINAL.replace({-9999.:0.}).astype('float32'),
+                LLD=df.LLD.replace({-9999.:0.}).astype('float32'),
+                LLS=df.LLS.replace({-9999.:0.}).astype('float32'),
+                NPHI=df.NPHI.replace({-9999.:0.}).astype('float32'),
+                RHOB=df.RHOB.replace({-9999.:0.}).astype('float32'),
+                DTC=df.DTC.replace({-9999.:0.}).astype('float32'),
+                DTS=df.DTS.replace({-9999.:0.}).astype('float32'),
+                DEPTH=df.DEPTH.astype('float32')
+                )
+                .pipe(remove_negative_val, "RHOB")
+                .pipe(remove_negative_val, "DTC")
+                .pipe(remove_negative_val, "DTS")
+                .pipe(remove_negative_val, "GR")
+                .pipe(remove_negative_val, "LLD")
+            ).pipe(rel_depth)
+# Calculate the confusion matrix of applying model on dataframe (including features and label) df with threshold
+def calculate_confusion_matrix (model = None, df= None, threshold=None):
+    model_prediction = [model]
+    # Apply model on dataframe
+    proba = Prediction_LGBM(trained_models=model_prediction, data = df, feature_names=name_features)
+    proba_well = proba.loc[:, "model_0"]
+    # Apply threshold
+    if threshold==None: threshold = 0.5
+    # Get label from dataframe df
+    well_proba = proba_well.apply(lambda x: 1 if x >= threshold else 0)
+    return well_proba
+#------------------------------------------------------------------
+# Load any compressed pickle file
+file = "/work/2022_VPIMLogs_WebApp/models/LightGBM_0.45.pbz2"
+def decompress_pickle(file):
+    data = bz2.BZ2File(file, 'rb')
+    data = pickle.load(data)
+    return data
+# model_best = decompress_pickle(file)
+# Loading Modeling
+# model_best = lgb.Booster(model_file="/work/2022_VPIMLogs_WebApp/models/LGBM_20221125.json")
+#Loading data from browser:----------------------------------------
+#Streamlit Dashboard------------------------------------------------------------------------------------------
+pagetile = """<center><h1>PREDICTION SITE</h1></center>"""
+st.markdown(pagetile, unsafe_allow_html=True)
+# set_page_config(page='custom')
+hide_menu_button()
+condense_layout()
+logo_site, info_site = st.columns([1.5, 8.5])
+with logo_site:
+    st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
+with info_site:
+    # Option 1: CSV File Loading
+    st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
+    st.subheader("1. CSV File Loading")
+    st.caption('## 1.1. CSV from Uploader')
+    df = csv_uploader()
+    # Option 2: CSV from LAS Exploration
+    st.caption('## 1.2. CSV from LAS Exploration')
+    dir_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+    csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
+    selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
+    if selected_csv_file: # Nếu người dùng đã chọn file CSV
+        # Đọc file csv được chọn vào DataFrame
+        file_path = '/work/2022_VPIMLogs_WebApp/data/merged/'
+        wells_df_predict = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
+        # wells_df_predict = tweak_data_S(wells_df_predict)
+    else: # Nếu người dùng không chọn file CSV
+        wells_df_predict = df
+        # wells_df_predict = tweak_data_S(wells_df_predict)
+    st.write('You can load your json file using the file upload or selection from TRAINING SECTION below.')
+    st.subheader("2. JSON File Loading")
+    st.caption('## 2.1. JSON from Uploader')
+    model_best_uploader = None
+    uploaded_file = st.file_uploader("Choose a JSON file", type="json")
+    # Kiểm tra xem có file được upload hay không
+    if uploaded_file is not None:
+        # Lưu file JSON tạm thời
+        with open("temp.json", "w") as f:
+            f.write(uploaded_file.read().decode("utf-8"))
+        # Đường dẫn tới file JSON tạm thời
+        temp_file_path = os.path.abspath("temp.json")
+        # Tạo Booster từ file JSON
+        model_best_uploader = lgb.Booster(model_file=temp_file_path)
+        # Xóa file tạm sau khi sử dụng
+        os.remove(temp_file_path)
+    # Option 2: JSON from TRAINING SECTION
+    st.caption('## 2.2. JSON from TRANING SECTION')
+    dir_path = '/work/2022_VPIMLogs_WebApp/models/'
+    json_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.json')]
+    selected_json_file= st.multiselect('Select a JSON file', json_files, key = 'st.session_state.selected_well_multi_JSON')
+    if selected_json_file: # Nếu người dùng đã chọn file json
+        # Đọc file json được chọn vào Booster
+        file_path = '/work/2022_VPIMLogs_WebApp/models/'
+        model_files = "/work/2022_VPIMLogs_WebApp/models/05_13_2023_11_50_38_model_LGBM.json"
+        model_best = lgb.Booster(model_file=model_files)
+    else: # Nếu người dùng không chọn file json
+        model_best = model_best_uploader
+if wells_df_predict is not None:
+    wells_df_predict = tweak_data_S(wells_df_predict)
+    wells_df_predict = wells_df_predict.replace({-9999: np.nan}).dropna(how='any', subset = "FRACTURE_ZONE")
+    st.write("Data Input:")
+    st.dataframe(wells_df_predict.sort_index(), width=1400, height=300)
+    st.write('---')
+    st.write("Selected Prediction Model:")
+    st.write(model_best)
+#------------------------------------------------------------------
+    feature_names = [col for col in wells_df_predict.columns if col not in ["WELL", "DEPTH","FRACTURE_ZONE"]]
+    # Full data for export data
+    st.session_state.pred = st.button("Predict Fracture Zone")
+    if st.session_state.pred:
+        threshold = 0.5
+        #Make label Prediction
+        predictions = (model_best.predict(wells_df_predict[feature_names])> threshold).astype(int)
+        wells_df_predict['FRACTURE_ZONE_PRED'] = predictions
+        st.dataframe(wells_df_predict, width=1400, height=300)
+#Plot Data------------------------------------------------------------------
+        plotting_curves = [c for c in wells_df_predict.columns.unique() if c not in ["DEPTH", "WELL", "TVD", "FRACTURE_ZONE", "FRACTURE_ZONE_PRED", "DCALI_FINAL", "INCL", "AZIM_TN", "rel_depth"]]
+        plotting_curves.sort()
+        if "FRACTURE_ZONE_PRED" in wells_df_predict.columns.unique():
+            plotting_curves.append("FRACTURE_ZONE_PRED")
+        for well in wells_df_predict.WELL.unique():
+            st.write('---')
+            st.write(f"{well} Logs: \n")
+            well_plot = wells_df_predict[wells_df_predict.WELL == well]
+            charts_dict={}
+            for i, c in enumerate(plotting_curves):
+                charts_dict[i] = curve_plot(data=well_plot,filted_data=None, x_column=c)
+    #Show Curve-----------------------------------------------------------------------
+            st.write(alt.concat(*charts_dict.values(), columns = 12).configure(autosize='fit'))
+    # Download --------------------------------------------------------------
+        st.write('---')
+        st.write("Download final result to csv file")
+        st.download_button(label='Download All Wells',
+                        data = wells_df_predict.to_csv(),
+                        file_name='FracturePredictionALL.csv',
+                        mime='text/csv')
+hide_menu_button()
+condense_layout()

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+altair==4.2.0
+bz2file==0.98
+lasio==0.30
+matplotlib==3.4.3
+numpy==1.21.6
+pandas==1.4.3
+seaborn==0.11.2
+streamlit==1.22.0
+streamlit_vega_lite==0.1.0
+streamlit-nested-layout==0.1.1
+lightgbm==3.3.2
+plotly==5.10.0
+scikit-learn==1.1.2

ui/PageComponents.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import pandas as pd
+import streamlit as st
+from mLogsFunctions.fx import *
+from mLogsFunctions import *
+def subtab21(df, well_names):
+    col1, col2 = st.columns([1,8])
+    if "method_eda" not in st.session_state:
+        st.session_state.method_eda:str = "Single Well"
+        st.session_state.well_eda:str = well_names[0]
+    with col1:
+        st.radio("",
+                key="method_eda",
+                options=["All Wells", "Single Well"])
+        st.radio("WELL",
+                key="well_eda",
+                options=well_names)
+    with col2:
+        st.write('Data Description:')
+        if st.session_state.method_eda == "All Wells":
+            st.dataframe(df.describe(),width=1400, height=300)
+        elif st.session_state.method_eda == "Single Well":
+            df_single_w = df[df.WELL == st.session_state.well_eda]
+            st.dataframe(df_single_w.describe(),width=1400, height=300)
+        else:
+            pass
+def subtab22(df,):
+    def missing_count(df):
+        missing = df.isnull().sum()*100/df.isnull().sum().sum()
+        missing = missing[missing >= 0].reset_index()
+        missing.columns = ['Columns', 'Count missing (%)']
+        return missing
+    mt1, mt2, mt3 = st.columns(3)
+    with mt1:
+        st.caption("Missing data rate of whole wells")
+        st.write(missing_bar(missing_count(df), "ALL WELLS"))
+        for i, w in enumerate(df.WELL.unique()):
+            if i%3 == 0:
+                st.caption(f"Missing data rate of {w}")
+                st.write(missing_bar(missing_count(well_filter(df, w)), f"WELL {w}"))
+    with mt2:
+        for i, w in enumerate(df.WELL.unique()):
+            if i%3 == 1:
+                st.caption(f"Missing data rate of {w}")
+                st.write(missing_bar(missing_count(well_filter(df, w)), f"WELL {w}"))
+    with mt3:
+        for i, w in enumerate(df.WELL.unique()):
+            if i%3 == 2:
+                st.caption(f"Missing data rate of {w}")
+                st.write(missing_bar(missing_count(well_filter(df, w)), f"WELL {w}"))
+def subtab23(df, curves):
+    mb1, mb2, mb3 = st.columns(3)
+    for i, c in enumerate(curves):
+        if i%3 == 0:
+            with mb1:
+                st.caption(f"Distribution of {c}")
+                st.write(missing_box(df, c))
+        if i%3 == 1:
+            with mb2:
+                st.caption(f"Distribution of {c}")
+                st.write(missing_box(df, c))
+        if i%3 == 2:
+            with mb3:
+                st.caption(f"Distribution of {c}")
+                st.write(missing_box(df, c))
+def subtab24(df, curves):
+    #Histogram Line----------------------------------------------------------------
+    h1, h2, h3 = st.columns(3)
+    for i, c in enumerate(curves):
+        if i%3 == 0:
+            with h1:
+                hist_line_plot(df,c)
+        if i%3 == 1:
+            with h2:
+                hist_line_plot(df,c)
+        if i%3 == 2:
+            with h3:
+                hist_line_plot(df,c)
+def subtab25(df, curves):
+    #CrossPlot----------------------------------------------------------------
+    pair_curv = [(a, b) for idx, a in enumerate(curves) for b in curves[idx + 1:]]
+    cp0, cp1, cp2, cp3, cp4 = st.columns(5)
+    for i, c in enumerate(pair_curv):
+        if i%5 == 0:
+            with cp0:
+                crossplot(df, pair_curv[i][0], pair_curv[i][1])
+        if i%5 == 1:
+            with cp1:
+                crossplot(df, pair_curv[i][0], pair_curv[i][1])
+        if i%5 == 2:
+            with cp2:
+                crossplot(df, pair_curv[i][0], pair_curv[i][1])
+        if i%5 == 3:
+            with cp3:
+                crossplot(df, pair_curv[i][0], pair_curv[i][1])
+        if i%5 == 4:
+            with cp4:
+                crossplot(df, pair_curv[i][0], pair_curv[i][1])
+def subtab26(df, curves):
+    #Pairpot----------------------------------------------------------------
+    _p1, _p2, _p3 = st.columns([2,2,2])
+    if "pair_opt" not in st.session_state:
+        st.session_state.pair_opt:str = "ALL WELLS"
+        st.session_state.color_pair:str = "WELL"
+        st.session_state.well_pair:str = list(df.WELL.unique())[0]
+    with _p1:
+        pair_opt_ = st.radio("Displayed objects", key="pair_opt", options=["ALL WELLS", "SINGLE WELL"], horizontal=True)
+    with _p2:
+        well_pair_ = st.selectbox("WELL", key="well_pair", options=list(df.WELL.unique()))
+    with _p3:
+        colorp_ = st.selectbox("COLOR", key="color_pair", options=columns_list(df))
+    if pair_opt_ == "ALL WELLS":
+        st.write(pairplot(df, curves, curves, colorp_))
+    elif pair_opt_ == "SINGLE WELL":
+        st.write(pairplot(df[df["WELL"]==well_pair_], curves, curves, colorp_))
+    else:
+        st.write("Undefined Error!")
+def scatterPoint3D(df,):
+    #3D Plotly----------------------------------------------------------------
+    wells_ = list(df.WELL.unique())
+    curvs_ = columns_list(df, no_well=True)
+    colors_ = columns_list(df)
+    sizes_ = ["WELL", "FRACTURE_INTENSITY", "DEPTH", None]
+    symbols_ = ["WELL", "FRACTURE_INTENSITY", None]
+    if "well_3d" not in st.session_state:
+        st.session_state.w_opt:str = "ALL WELLS"
+        st.session_state.well_3d:str = wells_[0]
+        st.session_state.x_3d:str = curvs_[0]
+        st.session_state.y_3d:str = curvs_[0]
+        st.session_state.z_3d:str = curvs_[0]
+        st.session_state.color_3d:str = "WELL"
+        st.session_state.size_3d:str = "DEPTH"
+        st.session_state.symbol_3d:str = "WELL"
+    p1_, p2_ = st.columns([1,7])
+    with p1_:
+        w_opt = st.radio("DisplayType", key="w_opt", options=["ALL WELLS", "SINGLE WELL"])
+        well_ = st.selectbox("WELL", key="well_3d", options=wells_)
+        x_ = st.selectbox("X", key="x_3d", options=curvs_)
+        y_ = st.selectbox("Y", key="y_3d", options=curvs_)
+        z_ = st.selectbox("Z", key="z_3d", options=curvs_)
+        color_ = st.selectbox("COLOR", key="color_3d", options=colors_)
+        size_ = st.selectbox("SIZE", key="size_3d", options=sizes_)
+        symbol_ = st.selectbox("SYMBOL", key="symbol_3d", options=symbols_)
+    with p2_:
+        log_x, log_y, log_z = [False, False, False]
+        if x_ in ["LLD", "LLS"]:
+            log_x = True
+        if y_ in ["LLD", "LLS"]:
+            log_y = True
+        if z_ in ["LLD", "LLS"]:
+            log_z = True
+        if w_opt == "ALL WELLS":
+            plotly_3d(df, x_, y_, z_, color_, size_, symbol_, log_x, log_y, log_z)
+        else:
+            df_3d_plt = df[df["WELL"]==well_]
+            plotly_3d(df_3d_plt, x_, y_, z_, color_, size_, symbol_, log_x, log_y, log_z)
+def stViewCurves(df):
+    _w = st.selectbox(label='Select WELL', options=list(df.WELL.unique()), key='w_plot')
+    if st.session_state.w_plot is not None:
+        df_plot = df[df['WELL']== _w]
+    _c = st.multiselect("Select curves for plotting:", key="curv_plt", options=columns_list(df, no_depth=True, no_well=True))
+    if len(_c) != 0:
+        view_curves(df_plot, curves=_c)

ui/UIConfigs.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import streamlit as st
+from PIL import Image
+def hide_menu_button():
+    st.markdown(""" <style>
+                #MainMenu {visibility: hidden;}
+                footer {visibility: hidden;}
+                </style> """, unsafe_allow_html=True
+                )
+def condense_layout():
+    padding = 0
+    st.markdown(f""" <style>
+                .reportview-container .main .block-container{{
+                    padding-top: {padding}rem;
+                    padding-right: {padding}rem;
+                    padding-left: {padding}rem;
+                    padding-bottom: {padding}rem;
+                }} </style> """, unsafe_allow_html=True
+                )
+def set_page_config(page:str='home', logo_size:str=200, pagetile:str=None):
+    img = Image.open("/work/LogoVPI.png")
+    st.set_page_config(# Alternate names: setup_page, page, layout
+                        layout="wide",  # Can be "centered" or "wide". In the future also "dashboard", etc.
+                        initial_sidebar_state="auto",  # Can be "auto", "expanded", "collapsed"
+                        page_title="VPI-MLogs",  # String or None. Strings get appended with "• Streamlit".
+                        page_icon=img,  # String, anything supported by st.image, or None.
+                        )
+    if page == 'home':
+        col_1, col_2, col_3, col_4, col_5, = st.columns(5)
+        with col_3:
+            st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", width=logo_size)
+    elif page == 'sub':
+        logo, info = st.columns([3, 7])
+        with logo:
+            st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", width=logo_size)
+        with info:
+            st.markdown(pagetile, unsafe_allow_html=True)

ui/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .UIConfigs import *
+from .PageComponents import *
+__all__ = ['hide_menu_button',
+           'condense_layout',
+           'set_page_config',
+            'subtab21',
+            'subtab22',
+            'subtab23',
+            'subtab24',
+            'subtab25',
+            'subtab26',
+            'scatterPoint3D',
+            'stViewCurves',
+]

ui/__pycache__/PageComponents.cpython-39.pyc ADDED Viewed

Binary file (6 kB). View file

ui/__pycache__/UIConfigs.cpython-39.pyc ADDED Viewed

Binary file (1.67 kB). View file

ui/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (347 Bytes). View file