Spaces:

Sonnt
/

Fracture_Webapp

Sleeping

App Files Files Community

Fracture_Webapp / pages /4_Fracture_Prediction.py

Sonnt

Update pages/4_Fracture_Prediction.py

b933e6a verified about 2 years ago

raw

history blame contribute delete

8.65 kB

	import numpy as np
	import lightgbm as lgb

	import pickle, os
	import bz2file as bz2

	import altair as alt

	import streamlit as st

	from ui import *
	from mLogsFunctions import *
	from mLogsFunctions.fx import *


	#ignore version warnings
	import warnings
	warnings.filterwarnings("ignore")

	#Global variables
	THRESHOLD_GLOBAL = 0.5

	#All wells to be trained
	wells_name = ["01-97-HXS-1X", "15-1-SN-1X", "15-1-SN-2X", "15-1-SN-3XST", "15-1-SN-4X", "15-1-SNN-1P",
	"15-1-SNN-2P", "15-1-SNN-3P", "15-1-SNN-4P", "15-1-SNS-1P", "15-1-SNS-2P", "15-1-SNS-4P"]
	#Obtain data and label of wells
	name_features = ["GR", "LLD", "LLS", "NPHI", "RHOB", "DTC", "DTS"]
	label = "FRACTURE_ZONE"
	#-----------------------------------------------------------------
	# processing pipeline
	def remove_negative_val(df, col):
	return df.drop(index=df[df[col] < 0].index)
	def rel_depth(df):
	dfx = []
	for well in df.WELL.unique():
	df_ = df[df.WELL==well].sort_values(by="DEPTH", ascending=True)
	dfx.append(df_.assign(rel_depth=df_.DEPTH / df_.DEPTH.values[0]))
	return pd.concat(dfx).reset_index(drop=True)

	def tweak_data_S(df):
	return (
	df.assign(
	# FRACTURE_ZONE=df.FRACTURE_ZONE.replace({-9999: 0, np.nan: 0}).astype('int8'),
	GR=df.GR.replace({-9999.:0.}).astype('float32'),
	DCALI_FINAL=df.DCALI_FINAL.replace({-9999.:0.}).astype('float32'),
	LLD=df.LLD.replace({-9999.:0.}).astype('float32'),
	LLS=df.LLS.replace({-9999.:0.}).astype('float32'),
	NPHI=df.NPHI.replace({-9999.:0.}).astype('float32'),
	RHOB=df.RHOB.replace({-9999.:0.}).astype('float32'),
	DTC=df.DTC.replace({-9999.:0.}).astype('float32'),
	DTS=df.DTS.replace({-9999.:0.}).astype('float32'),
	DEPTH=df.DEPTH.astype('float32')
	)
	.pipe(remove_negative_val, "RHOB")
	.pipe(remove_negative_val, "DTC")
	.pipe(remove_negative_val, "DTS")
	.pipe(remove_negative_val, "GR")
	.pipe(remove_negative_val, "LLD")
	).pipe(rel_depth)
	# Calculate the confusion matrix of applying model on dataframe (including features and label) df with threshold
	def calculate_confusion_matrix (model = None, df= None, threshold=None):
	model_prediction = [model]
	# Apply model on dataframe
	proba = Prediction_LGBM(trained_models=model_prediction, data = df, feature_names=name_features)
	proba_well = proba.loc[:, "model_0"]
	# Apply threshold
	if threshold==None: threshold = 0.5
	# Get label from dataframe df
	well_proba = proba_well.apply(lambda x: 1 if x >= threshold else 0)
	return well_proba

	#------------------------------------------------------------------
	# Load any compressed pickle file
	file = "/work/2022_VPIMLogs_WebApp/models/LightGBM_0.45.pbz2"
	def decompress_pickle(file):
	data = bz2.BZ2File(file, 'rb')
	data = pickle.load(data)
	return data
	# model_best = decompress_pickle(file)

	# Loading Modeling
	# model_best = lgb.Booster(model_file="/work/2022_VPIMLogs_WebApp/models/LGBM_20221125.json")

	#Loading data from browser:----------------------------------------
	#Streamlit Dashboard------------------------------------------------------------------------------------------
	pagetile = """<center><h1>PREDICTION SITE</h1></center>"""
	st.markdown(pagetile, unsafe_allow_html=True)
	# set_page_config(page='custom')

	hide_menu_button()
	condense_layout()

	logo_site, info_site = st.columns([1.5, 8.5])
	with logo_site:
	st.image("https://i.ibb.co/Yd42K98/LogoVPI.png", use_column_width='auto')
	with info_site:
	# Option 1: CSV File Loading
	st.write('You can load your csv file using the file upload or selection from LAS Exploration option below.')
	st.subheader("1. CSV File Loading")
	st.caption('## 1.1. CSV from Uploader')
	df = csv_uploader()

	# Option 2: CSV from LAS Exploration
	st.caption('## 1.2. CSV from LAS Exploration')
	dir_path = 'data/merged/'
	csv_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.csv')]
	selected_csv_file= st.multiselect('Select a CSV file', csv_files, key = 'st.session_state.selected_well_multi')
	if selected_csv_file: # Nếu người dùng đã chọn file CSV
	# Đọc file csv được chọn vào DataFrame
	file_path = 'data/merged/'
	wells_df_predict = pd.concat([pd.read_csv(file_path + f) for f in selected_csv_file])
	# wells_df_predict = tweak_data_S(wells_df_predict)
	else: # Nếu người dùng không chọn file CSV
	wells_df_predict = df
	# wells_df_predict = tweak_data_S(wells_df_predict)

	st.write('You can load your json file using the file upload or selection from TRAINING SECTION below.')

	st.subheader("2. JSON File Loading")
	st.caption('## 2.1. JSON from Uploader')
	model_best_uploader = None
	uploaded_file = st.file_uploader("Choose a JSON file", type="json")
	# Kiểm tra xem có file được upload hay không
	if uploaded_file is not None:
	# Lưu file JSON tạm thời
	with open("temp.json", "w") as f:
	f.write(uploaded_file.read().decode("utf-8"))
	# Đường dẫn tới file JSON tạm thời
	temp_file_path = os.path.abspath("temp.json")
	# Tạo Booster từ file JSON
	model_best_uploader = lgb.Booster(model_file=temp_file_path)

	# Xóa file tạm sau khi sử dụng
	os.remove(temp_file_path)
	# Option 2: JSON from TRAINING SECTION
	st.caption('## 2.2. JSON from TRANING SECTION')
	dir_path = 'models/'
	json_files = [filename for filename in os.listdir(dir_path) if filename.endswith('.json')]
	selected_json_file= st.multiselect('Select a JSON file', json_files, key = 'st.session_state.selected_well_multi_JSON')
	if selected_json_file: # Nếu người dùng đã chọn file json
	# Đọc file json được chọn vào Booster
	file_path = 'models/'
	model_files = "/models/05_13_2023_11_50_38_model_LGBM.json"
	model_best = lgb.Booster(model_file=model_files)
	else: # Nếu người dùng không chọn file json
	model_best = model_best_uploader

	if wells_df_predict is not None:
	wells_df_predict = tweak_data_S(wells_df_predict)
	wells_df_predict = wells_df_predict.replace({-9999: np.nan}).dropna(how='any', subset = "FRACTURE_ZONE")
	st.write("Data Input:")
	st.dataframe(wells_df_predict.sort_index(), width=1400, height=300)
	st.write('---')
	st.write("Selected Prediction Model:")
	st.write(model_best)
	#------------------------------------------------------------------
	feature_names = [col for col in wells_df_predict.columns if col not in ["WELL", "DEPTH","FRACTURE_ZONE"]]
	# Full data for export data
	st.session_state.pred = st.button("Predict Fracture Zone")
	if st.session_state.pred:
	threshold = 0.5
	#Make label Prediction
	predictions = (model_best.predict(wells_df_predict[feature_names])> threshold).astype(int)
	wells_df_predict['FRACTURE_ZONE_PRED'] = predictions
	st.dataframe(wells_df_predict, width=1400, height=300)
	#Plot Data------------------------------------------------------------------
	plotting_curves = [c for c in wells_df_predict.columns.unique() if c not in ["DEPTH", "WELL", "TVD", "FRACTURE_ZONE", "FRACTURE_ZONE_PRED", "DCALI_FINAL", "INCL", "AZIM_TN", "rel_depth"]]
	plotting_curves.sort()
	if "FRACTURE_ZONE_PRED" in wells_df_predict.columns.unique():
	plotting_curves.append("FRACTURE_ZONE_PRED")
	for well in wells_df_predict.WELL.unique():
	st.write('---')
	st.write(f"{well} Logs: \n")
	well_plot = wells_df_predict[wells_df_predict.WELL == well]
	charts_dict={}
	for i, c in enumerate(plotting_curves):
	charts_dict[i] = curve_plot(data=well_plot,filted_data=None, x_column=c)
	#Show Curve-----------------------------------------------------------------------
	st.write(alt.concat(*charts_dict.values(), columns = 12).configure(autosize='fit'))
	# Download --------------------------------------------------------------
	st.write('---')
	st.write("Download final result to csv file")

	st.download_button(label='Download All Wells',
	data = wells_df_predict.to_csv(),
	file_name='FracturePredictionALL.csv',
	mime='text/csv')

	hide_menu_button()
	condense_layout()