BHSFP / app.py
adomfosugit's picture
Update app.py
29aa0ff verified
import streamlit as st
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from scipy.optimize import fsolve
import pandasai
from pandasai import SmartDataframe
from pandasai.llm import BambooLLM
st.set_page_config(page_title="BHP Estimator", layout="wide")
st.title('Bottom Hole Pressure (BHP) Estimator')
st.subheader("Upload your production data or enter manual inputs")
# Required columns for file validation
required_columns = ['PRODUCTION DATE', 'Qo', 'THT', 'GOR ', 'Pwh(psi)', 'THT', 'Depth', 'WCT']
# Initialize session state variables
if 'original_df' not in st.session_state:
st.session_state.original_df = None
if 'predictions' not in st.session_state:
st.session_state.predictions = None
# Sidebar for model selection
with st.sidebar:
st.header("Model Configuration")
# Model selection
model_option = st.selectbox(
"Select Well Model",
["J57 & J05", "J19 & J56", "J37 & J51"],
index=0,
help="Select the appropriate model for your well"
)
# Model file mapping
model_files = {
"J57 & J05": 'modelBIGDATA5US1NNMAXP57.pkl',
"J19 & J56": 'modelBIGDATA5US1NNMAXP576168allL1NN.pkl',
"J37 & J51": 'modelBIGDATA5US1NNMAXP5137.pkl'
}
# Model loading function with caching
@st.cache_resource
def load_model(model_path):
try:
with open(model_path, 'rb') as file:
saved_data = pickle.load(file)
model = saved_data['model']
scaler = saved_data['scaler']
return model, scaler
except Exception as e:
st.error(f"Failed to load model: {str(e)}")
return None, None
# Load the selected model
model, scaler = load_model(model_files[model_option])
if model is None or scaler is None:
st.stop()
# Define tabs only once
tab1, tab2, tab3, tab4 = st.tabs(["📁 File Prediction", "✍️ Manual Prediction", "🔍 Solve for Parameter", "🧠 AI Data Analysis "])
with tab1:
# File uploader section
st.header("File-based Prediction")
st.info(f"Using model: {model_option}")
uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'], key="file_uploader")
# File processing and validation
if uploaded_file is not None:
try:
# Read file based on type
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
# Convert to numeric and drop NA
for col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna()
# Check required columns
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"Missing required columns: {', '.join(missing_columns)}")
else:
# Convert date and add features
df['PRODUCTION DATE'] = pd.to_datetime(df['PRODUCTION DATE'])
df['Fluid gradient'] = (df['WCT']/100)*0.433 + (1-(df['WCT']/100))*0.273
df['Ph'] = df['Fluid gradient'] * df['Depth']
st.session_state.original_df = df
st.success("File successfully processed!")
# Show preview
st.write("Data Preview:")
st.dataframe(df.head(), use_container_width=True)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
# Prediction function for file data
def make_predictions():
if st.session_state.original_df is not None:
try:
# Prepare data
scaled_features = ['Qo', 'GOR ', 'THT', 'Pwh(psi)', 'Ph', 'Depth']
X_test_scaled = scaler.transform(st.session_state.original_df[scaled_features])
# Make predictions
predictions = model.predict(X_test_scaled)
st.session_state.predictions = predictions.flatten()
# Add to dataframe for display
result_df = st.session_state.original_df.copy()
result_df['Predicted_BHP'] = st.session_state.predictions
result_df['Model_Used'] = model_option
return result_df
except Exception as e:
st.error(f"Prediction error: {str(e)}")
return None
else:
st.error("Please upload and process a file first")
return None
# Prediction button
if st.button("Make Predictions from File", key="file_predict"):
result_df = make_predictions()
if result_df is not None:
st.write("Prediction Results:")
st.dataframe(result_df[['PRODUCTION DATE', 'Predicted_BHP', 'Model_Used']], use_container_width=True)
# Visualization
if st.session_state.predictions is not None and st.session_state.original_df is not None:
st.subheader("Visualization")
# Create dataframe for plotting
plot_df = st.session_state.original_df.copy()
plot_df['Predicted_BHP'] = st.session_state.predictions
# Line chart comparison
st.line_chart(
plot_df.set_index('PRODUCTION DATE')[['Predicted_BHP']],
use_container_width=True
)
with tab2:
st.header("Manual Single Prediction")
st.info(f"Using model: {model_option}")
with st.form("manual_prediction_form"):
col1, col2 = st.columns(2)
with col1:
qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0)
gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0)
tht = st.number_input("Tubing Head Temperature (THT, °C)", min_value=0.0)
with col2:
pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0)
wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0)
depth = st.number_input("Depth m", min_value=0.0, max_value=10000.0)
submitted = st.form_submit_button("Predict BHP")
if submitted:
try:
# Calculate derived features
fluid_gradient = (wct/100)*0.433 + (1-(wct/100))*0.273
ph = fluid_gradient * depth
# Prepare input array
input_data = np.array([[qo, gor, tht, pwh, ph, depth]])
# Scale and predict
scaled_input = scaler.transform(input_data)
prediction = model.predict(scaled_input)[0][0]
# Display results
st.success(f"Predicted Bottom Hole Pressure: **{prediction:.2f} psi**")
# Show input summary with well information
st.subheader("Input Summary")
input_summary = {
"Parameter": [ "Oil Rate", "GOR", "THT", "Wellhead Pressure",
"Depth", "Water Cut", "Fluid Gradient", "Model Used"],
"Value": [
f"{qo} STB/d",
f"{gor} scf/STB",
f"{tht} °C",
f"{pwh} psi",
f"{depth} m",
f"{wct}%",
f"{fluid_gradient:.4f} psi/ft",
model_option
],
"Units": [ "STB/d", "scf/STB", "°C", "psi", "m", "%", "psi/ft", ""]
}
st.table(pd.DataFrame(input_summary))
except Exception as e:
st.error(f"Prediction failed: {str(e)}")
with tab3:
st.header("Iterative Parameter Solver")
st.info("Solve for Qo, GOR, or WCT to match a target BHP")
# Select which parameter to solve for
target_param = st.selectbox(
"Parameter to Solve For",
["Qo", "GOR", "WCT"],
index=0
)
# Input known values with defaults
col1, col2 = st.columns(2)
with col1:
qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0, value=1000.0, key="solve_qo")
gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0, value=500.0, key="solve_gor")
tht = st.number_input("Tubing Head Temp (THT, °C)", min_value=0.0, value=10.0, key="solve_tht")
with col2:
pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0, value=300.0, key="solve_pwh")
depth = st.number_input("Depth (m)", min_value=0.0, value=5000.0, key="solve_depth")
wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0, value=30.0, key="solve_wct")
# Target BHP value to match
target_bhp = st.number_input("Target BHP (psi)", min_value=0.0, value=2000.0, key="target_bhp")
if st.button("Solve Iteratively"):
try:
# Define the equation to solve with WCT constraints
def equation_to_solve(x):
# Enforce constraints depending on the parameter to solve
if target_param == "WCT":
x[0] = np.clip(x[0], 0.0, 100.0)
elif target_param == "Qo":
x[0] = max(x[0], 0.0) # Prevent negative oil rate
elif target_param == "GOR":
x[0] = max(x[0], 0.0) # Prevent negative GOR
# Prepare inputs
inputs = {
'Qo': x[0] if target_param == "Qo" else qo,
'GOR ': x[0] if target_param == "GOR" else gor,
'THT': tht,
'Pwh(psi)': pwh,
'Depth': depth,
'WCT': x[0] if target_param == "WCT" else wct
}
# Calculate fluid gradient and Ph
current_wct = inputs['WCT']
fluid_gradient = (current_wct/100)*0.433 + (1-(current_wct/100))*0.273
ph = fluid_gradient * inputs['Depth']
# Scale inputs and predict BHP
scaled_input = scaler.transform(np.array([
[inputs['Qo'], inputs['GOR '], inputs['THT'],
inputs['Pwh(psi)'], ph, inputs['Depth']]
]))
current_bhp = model.predict(scaled_input)[0][0]
return current_bhp - target_bhp
# Initial guesses (parameter-specific)
initial_guess = {
"Qo": max(qo, 100), # Avoid 0 for Qo
"GOR": max(gor, 300), # Avoid 0 for GOR
"WCT": np.clip(wct, 1.0, 99.0) # Avoid boundaries for stability
}[target_param]
# Solve with numerical safeguards
solution = fsolve(
equation_to_solve,
[initial_guess],
xtol=1e-6 # Tight tolerance for precision
)
# Clip WCT to [0, 100] if solved
if target_param == "WCT":
solution[0] = np.clip(solution[0], 0.0, 100.0)
# Display results
st.success(f"**Solved {target_param} = {solution[0]:.2f}** (for BHP = {target_bhp} psi)")
# Show all parameters
st.subheader("Solution Summary")
results = pd.DataFrame({
"Parameter": ["Qo", "GOR", "WCT", "THT", "Pwh", "Depth", "Target BHP"],
"Value": [
f"{solution[0] if target_param == 'Qo' else qo:.2f}",
f"{solution[0] if target_param == 'GOR' else gor:.2f}",
f"{solution[0] if target_param == 'WCT' else wct:.2f}%",
f"{tht:.2f}",
f"{pwh:.2f}",
f"{depth:.2f}",
f"{target_bhp:.2f}"
],
"Units": ["STB/d", "scf/STB", "%", "°C", "psi", "m", "psi"]
})
st.dataframe(results, hide_index=True)
except Exception as e:
st.error(f"Solving failed: {str(e)}")
st.warning("Check if the target BHP is physically achievable with given inputs.")
with tab4:
st.header("AI-Powered Data Analysis")
st.caption("Ask questions about your uploaded data using natural language.")
# Set PandasAI API key
pandasai.api_key.set("PAI-09de3b8d-edb9-4d72-998f-60508abfb286")
# Upload data file if not done already
uploaded_file_ai = uploaded_file if uploaded_file is not None else st.file_uploader("Upload a CSV or Excel file for analysis", type=["csv", "xlsx"], key="ai_file")
if uploaded_file_ai is not None:
try:
if uploaded_file_ai.name.endswith(".csv"):
df_ai = pd.read_csv(uploaded_file_ai)
else:
df_ai = pd.read_excel(uploaded_file_ai)
# Display data preview
st.dataframe(df_ai.head(), use_container_width=True)
# Wrap with SmartDataFrame
sdf = SmartDataframe(df_ai, config={"llm": BambooLLM(api_key="PAI-09de3b8d-edb9-4d72-998f-60508abfb286")})
# Ask the user for a natural language query
user_query = st.text_area("Ask a question about your data", placeholder="")
if st.button("Run AI Query", key="run_pandasai"):
with st.spinner("Thinking..."):
try:
result = sdf.chat(user_query)
st.subheader("AI Response:")
st.write(result)
except Exception as e:
st.error(f"Error processing query: {e}")
except Exception as e:
st.error(f"Failed to process uploaded file: {e}")
else:
st.info("Upload a file to begin AI analysis.")
# Additional data exploration (only for file data)
if st.session_state.original_df is not None:
with st.expander("Advanced Data Exploration"):
selected_columns = st.multiselect(
"Select parameters to visualize",
st.session_state.original_df.columns.drop('PRODUCTION DATE'),
default=['Qo']
)
if selected_columns:
st.line_chart(
st.session_state.original_df.set_index('PRODUCTION DATE')[selected_columns],
use_container_width=True
)