BHP_STREAMERLIT / app.py
adomfosugit's picture
Create app.py
fe34bda verified
import streamlit as st
import numpy as np
import pandas as pd
import pickle
from copy import deepcopy as dc
from tensorflow.keras.models import load_model
st.title('Estimate BHP')
st.subheader("Upload your CSV file here")
# Required columns for file validation
required_columns = ['PRODUCTION DATE', 'Qliquid', 'GOR', 'Pwh', 'THT', 'WCT']
# File uploader
uploaded_file = st.file_uploader("Choose a file")
def prepare_dataframe_for_lstm(df, n_steps):
df = dc(df)
df.set_index('PRODUCTION DATE', inplace=True)
for i in range(1, n_steps + 1):
df[f'Qliquid(t-{i})'] = df['Qliquid'].shift(i)
df[f'GOR(t-{i})'] = df['GOR'].shift(i)
df[f'Pwh(t-{i})'] = df['Pwh'].shift(i)
df[f'THT(t-{i})'] = df['THT'].shift(i)
df[f'WCT(t-{i})'] = df['WCT'].shift(i)
df.dropna(inplace=True)
return df
# File processing and validation
if uploaded_file is not None:
try:
dataframe = pd.read_csv(uploaded_file)
missing_columns = [col for col in required_columns if col not in dataframe.columns]
if missing_columns:
st.error(f"The uploaded file is missing the following required columns: {', '.join(missing_columns)}")
st.image("description.jpg", caption="Please check that the uploaded file has this structure")
else:
original_dataframe = dataframe.copy()
processed_dataframe = prepare_dataframe_for_lstm(dataframe, 2)
st.success("File successfully uploaded and verified!")
st.write("Processed Data Preview:", processed_dataframe)
st.session_state['processed_dataframe'] = processed_dataframe
st.session_state['original_dataframe'] = original_dataframe
except Exception as e:
st.error(f"An error occurred while processing the file: {e}")
# Sidebar for model selection
model_files = {
'modelJ05': 'modelXAMATR_MODELTRAINFILEBETASF.pkl',
'modelJ57': 'model57 (2).pkl',
'modelJ61': 'modelJ61.pkl',
'modelJ68': 'modelJ68.pkl'
}
selected_model = st.sidebar.selectbox("Select Model", list(model_files.keys()))
# Sidebar for trend selection
if 'original_dataframe' in st.session_state:
available_columns = [col for col in st.session_state['original_dataframe'].columns
if col not in ['PRODUCTION DATE', 'MBHFP']]
selected_trends = st.sidebar.multiselect("Select Trends to Display", available_columns)
# Load the saved model and scaler
def load_model_m(model_file):
with open(model_file, 'rb') as file:
data = pickle.load(file)
return data
data = load_model_m(model_files[selected_model])
model = data['model']
scaler = data['scaler']
def start_prediction():
if 'processed_dataframe' in st.session_state:
df = st.session_state['processed_dataframe']
# Columns to scale
columns_to_scale = ['MBHFP', 'Qliquid', 'GOR', 'Pwh', 'THT', 'WCT',
'Qliquid(t-1)', 'GOR(t-1)', 'Pwh(t-1)', 'THT(t-1)', 'WCT(t-1)',
'Qliquid(t-2)', 'GOR(t-2)', 'Pwh(t-2)', 'THT(t-2)', 'WCT(t-2)']
scaled_columns = [col for col in columns_to_scale if col in df.columns]
data_predicted = scaler.transform(df[scaled_columns])
scaled_df = pd.DataFrame(data_predicted, columns=scaled_columns)
X = scaled_df[['Qliquid', 'GOR', 'Pwh','WCT', 'THT','Qliquid(t-1)','GOR(t-1)','Pwh(t-1)','WCT(t-1)','THT(t-1)','Qliquid(t-2)','GOR(t-2)','Pwh(t-2)','WCT(t-1)','THT(t-2)']]
X = X.values.reshape((scaled_df.shape[0], 5, 3))
y_pred = model.predict(X).reshape(1,-1)
data_predicted[:, 0] = y_pred
unscaled_data = scaler.inverse_transform(data_predicted)
unscaled_df = pd.DataFrame(unscaled_data, columns=scaled_columns)
st.session_state['prediction_result'] = unscaled_df['MBHFP'].values
else:
st.error("Please upload a file and preprocess it first.")
if st.button("Predict"):
start_prediction()
if 'prediction_result' in st.session_state:
st.subheader("Prediction Result:")
# Show predictions as a table
predictions_table = pd.DataFrame({
'Prediction Index': range(len(st.session_state['prediction_result'])),
'Predicted BHP': st.session_state['prediction_result']
})
st.write(predictions_table)
# Create simple index-based DataFrame for plotting
plot_df = pd.DataFrame({
'Predicted BHP': st.session_state['prediction_result']
})
# Add selected trends, skipping first 2 elements
if 'original_dataframe' in st.session_state and selected_trends:
original_df = st.session_state['original_dataframe']
start_idx = 2 # Skip first 2 elements
end_idx = start_idx + len(plot_df)
for trend in selected_trends:
plot_df[trend] = original_df[trend].iloc[start_idx:end_idx].values
# Plot with simple numeric index
st.subheader("Visualization")
st.line_chart(plot_df)
if st.checkbox("Normalize data for better comparison"):
normalized_df = (plot_df - plot_df.mean()) / plot_df.std()
st.line_chart(normalized_df)