Fracture_Webapp / pages /4_Fracture_Prediction.py
Sonnt's picture
Update pages/4_Fracture_Prediction.py
b933e6a verified
import numpy as np
import lightgbm as lgb
import pickle, os
import bz2file as bz2
import altair as alt
import streamlit as st
from ui import *
from mLogsFunctions import *
from mLogsFunctions.fx import *
#ignore version warnings
import warnings
warnings.filterwarnings("ignore")
#Global variables
THRESHOLD_GLOBAL = 0.5
#All wells to be trained
wells_name = ["01-97-HXS-1X", "15-1-SN-1X", "15-1-SN-2X", "15-1-SN-3XST", "15-1-SN-4X", "15-1-SNN-1P",
"15-1-SNN-2P", "15-1-SNN-3P", "15-1-SNN-4P", "15-1-SNS-1P", "15-1-SNS-2P", "15-1-SNS-4P"]
#Obtain data and label of wells
name_features = ["GR", "LLD", "LLS", "NPHI", "RHOB", "DTC", "DTS"]
label = "FRACTURE_ZONE"
#-----------------------------------------------------------------
# processing pipeline
def remove_negative_val(df, col):
return df.drop(index=df[df[col] < 0].index)
def rel_depth(df):
dfx = []
for well in df.WELL.unique():
df_ = df[df.WELL==well].sort_values(by="DEPTH", ascending=True)
dfx.append(df_.assign(rel_depth=df_.DEPTH / df_.DEPTH.values[0]))
return pd.concat(dfx).reset_index(drop=True)
def tweak_data_S(df):
return (
df.assign(
# FRACTURE_ZONE=df.FRACTURE_ZONE.replace({-9999: 0, np.nan: 0}).astype('int8'),
GR=df.GR.replace({-9999.:0.}).astype('float32'),
DCALI_FINAL=df.DCALI_FINAL.replace({-9999.:0.}).astype('float32'),
LLD=df.LLD.replace({-9999.:0.}).astype('float32'),
LLS=df.LLS.replace({-9999.:0.}).astype('float32'),
NPHI=df.NPHI.replace({-9999.:0.}).astype('float32'),
RHOB=df.RHOB.replace({-9999.:0.}).astype('float32'),
DTC=df.DTC.replace({-9999.:0.}).astype('float32'),
DTS=df.DTS.replace({-9999.:0.}).astype('float32'),
DEPTH=df.DEPTH.astype('float32')
)
.pipe(remove_negative_val, "RHOB")
.pipe(remove_negative_val, "DTC")
.pipe(remove_negative_val, "DTS")
.pipe(remove_negative_val, "GR")
.pipe(remove_negative_val, "LLD")
).pipe(rel_depth)
# Calculate the confusion matrix of applying model on dataframe (including features and label) df with threshold
def calculate_confusion_matrix (model = None, df= None, threshold=None):
model_prediction = [model]
# Apply model on dataframe
proba = Prediction_LGBM(trained_models=model_prediction, data = df, feature_names=name_features)
proba_well = proba.loc[:, "model_0"]
# Apply threshold
if threshold==None: threshold = 0.5
# Get label from dataframe df
well_proba = proba_well.apply(lambda x: 1 if x >= threshold else 0)
return well_proba
#------------------------------------------------------------------
# Load any compressed pickle file
file = "/work/2022_VPIMLogs_WebApp/models/LightGBM_0.45.pbz2"
def decompress_pickle(file):
data = bz2.BZ2File(file, 'rb')
data = pickle.load(data)
return data
# model_best = decompress_pickle(file)
# Loading Modeling
# model_best = lgb.Booster(model_file="/work/2022_VPIMLogs_WebApp/models/LGBM_20221125.json")
#Loading data from browser:----------------------------------------
#Streamlit Dashboard------------------------------------------------------------------------------------------
pagetile = """<center><h1>PREDICTION SITE</h1></center>"""
st.markdown(pagetile, unsafe_allow_html=True)
# set_page_config(page='custom')
hide_menu_button()
condense_layout()
logo_site, info_site = st.columns([1.5, 8.5])
with logo_site:
st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
with info_site:
# Option 1: CSV File Loading
st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
st.subheader("1. CSV File Loading")
st.caption('## 1.1. CSV from Uploader')
df = csv_uploader()
# Option 2: CSV from LAS Exploration
st.caption('## 1.2. CSV from LAS Exploration')
dir_path = 'data/merged/'
csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
if selected_csv_file: # Nếu người dùng đã chọn file CSV
# Đọc file csv được chọn vào DataFrame
file_path = 'data/merged/'
wells_df_predict = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
# wells_df_predict = tweak_data_S(wells_df_predict)
else: # Nếu người dùng không chọn file CSV
wells_df_predict = df
# wells_df_predict = tweak_data_S(wells_df_predict)
st.write('You can load your json file using the file upload or selection from TRAINING SECTION below.')
st.subheader("2. JSON File Loading")
st.caption('## 2.1. JSON from Uploader')
model_best_uploader = None
uploaded_file = st.file_uploader("Choose a JSON file", type="json")
# Kiểm tra xem có file được upload hay không
if uploaded_file is not None:
# Lưu file JSON tạm thời
with open("temp.json", "w") as f:
f.write(uploaded_file.read().decode("utf-8"))
# Đường dẫn tới file JSON tạm thời
temp_file_path = os.path.abspath("temp.json")
# Tạo Booster từ file JSON
model_best_uploader = lgb.Booster(model_file=temp_file_path)
# Xóa file tạm sau khi sử dụng
os.remove(temp_file_path)
# Option 2: JSON from TRAINING SECTION
st.caption('## 2.2. JSON from TRANING SECTION')
dir_path = 'models/'
json_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.json')]
selected_json_file= st.multiselect('Select a JSON file', json_files, key = 'st.session_state.selected_well_multi_JSON')
if selected_json_file: # Nếu người dùng đã chọn file json
# Đọc file json được chọn vào Booster
file_path = 'models/'
model_files = "/models/05_13_2023_11_50_38_model_LGBM.json"
model_best = lgb.Booster(model_file=model_files)
else: # Nếu người dùng không chọn file json
model_best = model_best_uploader
if wells_df_predict is not None:
wells_df_predict = tweak_data_S(wells_df_predict)
wells_df_predict = wells_df_predict.replace({-9999: np.nan}).dropna(how='any', subset = "FRACTURE_ZONE")
st.write("Data Input:")
st.dataframe(wells_df_predict.sort_index(), width=1400, height=300)
st.write('---')
st.write("Selected Prediction Model:")
st.write(model_best)
#------------------------------------------------------------------
feature_names = [col for col in wells_df_predict.columns if col not in ["WELL", "DEPTH","FRACTURE_ZONE"]]
# Full data for export data
st.session_state.pred = st.button("Predict Fracture Zone")
if st.session_state.pred:
threshold = 0.5
#Make label Prediction
predictions = (model_best.predict(wells_df_predict[feature_names])> threshold).astype(int)
wells_df_predict['FRACTURE_ZONE_PRED'] = predictions
st.dataframe(wells_df_predict, width=1400, height=300)
#Plot Data------------------------------------------------------------------
plotting_curves = [c for c in wells_df_predict.columns.unique() if c not in ["DEPTH", "WELL", "TVD", "FRACTURE_ZONE", "FRACTURE_ZONE_PRED", "DCALI_FINAL", "INCL", "AZIM_TN", "rel_depth"]]
plotting_curves.sort()
if "FRACTURE_ZONE_PRED" in wells_df_predict.columns.unique():
plotting_curves.append("FRACTURE_ZONE_PRED")
for well in wells_df_predict.WELL.unique():
st.write('---')
st.write(f"{well} Logs: \n")
well_plot = wells_df_predict[wells_df_predict.WELL == well]
charts_dict={}
for i, c in enumerate(plotting_curves):
charts_dict[i] = curve_plot(data=well_plot,filted_data=None, x_column=c)
#Show Curve-----------------------------------------------------------------------
st.write(alt.concat(*charts_dict.values(), columns = 12).configure(autosize='fit'))
# Download --------------------------------------------------------------
st.write('---')
st.write("Download final result to csv file")
st.download_button(label='Download All Wells',
data = wells_df_predict.to_csv(),
file_name='FracturePredictionALL.csv',
mime='text/csv')
hide_menu_button()
condense_layout()