import streamlit as st import numpy as np import pandas as pd import pickle from datetime import datetime from scipy.optimize import fsolve import pandasai from pandasai import SmartDataframe from pandasai.llm import BambooLLM st.set_page_config(page_title="BHP Estimator", layout="wide") st.title('Bottom Hole Pressure (BHP) Estimator') st.subheader("Upload your production data or enter manual inputs") # Required columns for file validation required_columns = ['PRODUCTION DATE', 'Qo', 'THT', 'GOR ', 'Pwh(psi)', 'THT', 'Depth', 'WCT'] # Initialize session state variables if 'original_df' not in st.session_state: st.session_state.original_df = None if 'predictions' not in st.session_state: st.session_state.predictions = None # Sidebar for model selection with st.sidebar: st.header("Model Configuration") # Model selection model_option = st.selectbox( "Select Well Model", ["J57 & J05", "J19 & J56", "J37 & J51"], index=0, help="Select the appropriate model for your well" ) # Model file mapping model_files = { "J57 & J05": 'modelBIGDATA5US1NNMAXP57.pkl', "J19 & J56": 'modelBIGDATA5US1NNMAXP576168allL1NN.pkl', "J37 & J51": 'modelBIGDATA5US1NNMAXP5137.pkl' } # Model loading function with caching @st.cache_resource def load_model(model_path): try: with open(model_path, 'rb') as file: saved_data = pickle.load(file) model = saved_data['model'] scaler = saved_data['scaler'] return model, scaler except Exception as e: st.error(f"Failed to load model: {str(e)}") return None, None # Load the selected model model, scaler = load_model(model_files[model_option]) if model is None or scaler is None: st.stop() # Define tabs only once tab1, tab2, tab3, tab4 = st.tabs(["📁 File Prediction", "✍️ Manual Prediction", "🔍 Solve for Parameter", "🧠 AI Data Analysis "]) with tab1: # File uploader section st.header("File-based Prediction") st.info(f"Using model: {model_option}") uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'], key="file_uploader") # File processing and validation if uploaded_file is not None: try: # Read file based on type if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) # Convert to numeric and drop NA for col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() # Check required columns missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"Missing required columns: {', '.join(missing_columns)}") else: # Convert date and add features df['PRODUCTION DATE'] = pd.to_datetime(df['PRODUCTION DATE']) df['Fluid gradient'] = (df['WCT']/100)*0.433 + (1-(df['WCT']/100))*0.273 df['Ph'] = df['Fluid gradient'] * df['Depth'] st.session_state.original_df = df st.success("File successfully processed!") # Show preview st.write("Data Preview:") st.dataframe(df.head(), use_container_width=True) except Exception as e: st.error(f"An error occurred: {str(e)}") # Prediction function for file data def make_predictions(): if st.session_state.original_df is not None: try: # Prepare data scaled_features = ['Qo', 'GOR ', 'THT', 'Pwh(psi)', 'Ph', 'Depth'] X_test_scaled = scaler.transform(st.session_state.original_df[scaled_features]) # Make predictions predictions = model.predict(X_test_scaled) st.session_state.predictions = predictions.flatten() # Add to dataframe for display result_df = st.session_state.original_df.copy() result_df['Predicted_BHP'] = st.session_state.predictions result_df['Model_Used'] = model_option return result_df except Exception as e: st.error(f"Prediction error: {str(e)}") return None else: st.error("Please upload and process a file first") return None # Prediction button if st.button("Make Predictions from File", key="file_predict"): result_df = make_predictions() if result_df is not None: st.write("Prediction Results:") st.dataframe(result_df[['PRODUCTION DATE', 'Predicted_BHP', 'Model_Used']], use_container_width=True) # Visualization if st.session_state.predictions is not None and st.session_state.original_df is not None: st.subheader("Visualization") # Create dataframe for plotting plot_df = st.session_state.original_df.copy() plot_df['Predicted_BHP'] = st.session_state.predictions # Line chart comparison st.line_chart( plot_df.set_index('PRODUCTION DATE')[['Predicted_BHP']], use_container_width=True ) with tab2: st.header("Manual Single Prediction") st.info(f"Using model: {model_option}") with st.form("manual_prediction_form"): col1, col2 = st.columns(2) with col1: qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0) gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0) tht = st.number_input("Tubing Head Temperature (THT, °C)", min_value=0.0) with col2: pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0) wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0) depth = st.number_input("Depth m", min_value=0.0, max_value=10000.0) submitted = st.form_submit_button("Predict BHP") if submitted: try: # Calculate derived features fluid_gradient = (wct/100)*0.433 + (1-(wct/100))*0.273 ph = fluid_gradient * depth # Prepare input array input_data = np.array([[qo, gor, tht, pwh, ph, depth]]) # Scale and predict scaled_input = scaler.transform(input_data) prediction = model.predict(scaled_input)[0][0] # Display results st.success(f"Predicted Bottom Hole Pressure: **{prediction:.2f} psi**") # Show input summary with well information st.subheader("Input Summary") input_summary = { "Parameter": [ "Oil Rate", "GOR", "THT", "Wellhead Pressure", "Depth", "Water Cut", "Fluid Gradient", "Model Used"], "Value": [ f"{qo} STB/d", f"{gor} scf/STB", f"{tht} °C", f"{pwh} psi", f"{depth} m", f"{wct}%", f"{fluid_gradient:.4f} psi/ft", model_option ], "Units": [ "STB/d", "scf/STB", "°C", "psi", "m", "%", "psi/ft", ""] } st.table(pd.DataFrame(input_summary)) except Exception as e: st.error(f"Prediction failed: {str(e)}") with tab3: st.header("Iterative Parameter Solver") st.info("Solve for Qo, GOR, or WCT to match a target BHP") # Select which parameter to solve for target_param = st.selectbox( "Parameter to Solve For", ["Qo", "GOR", "WCT"], index=0 ) # Input known values with defaults col1, col2 = st.columns(2) with col1: qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0, value=1000.0, key="solve_qo") gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0, value=500.0, key="solve_gor") tht = st.number_input("Tubing Head Temp (THT, °C)", min_value=0.0, value=10.0, key="solve_tht") with col2: pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0, value=300.0, key="solve_pwh") depth = st.number_input("Depth (m)", min_value=0.0, value=5000.0, key="solve_depth") wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0, value=30.0, key="solve_wct") # Target BHP value to match target_bhp = st.number_input("Target BHP (psi)", min_value=0.0, value=2000.0, key="target_bhp") if st.button("Solve Iteratively"): try: # Define the equation to solve with WCT constraints def equation_to_solve(x): # Enforce constraints depending on the parameter to solve if target_param == "WCT": x[0] = np.clip(x[0], 0.0, 100.0) elif target_param == "Qo": x[0] = max(x[0], 0.0) # Prevent negative oil rate elif target_param == "GOR": x[0] = max(x[0], 0.0) # Prevent negative GOR # Prepare inputs inputs = { 'Qo': x[0] if target_param == "Qo" else qo, 'GOR ': x[0] if target_param == "GOR" else gor, 'THT': tht, 'Pwh(psi)': pwh, 'Depth': depth, 'WCT': x[0] if target_param == "WCT" else wct } # Calculate fluid gradient and Ph current_wct = inputs['WCT'] fluid_gradient = (current_wct/100)*0.433 + (1-(current_wct/100))*0.273 ph = fluid_gradient * inputs['Depth'] # Scale inputs and predict BHP scaled_input = scaler.transform(np.array([ [inputs['Qo'], inputs['GOR '], inputs['THT'], inputs['Pwh(psi)'], ph, inputs['Depth']] ])) current_bhp = model.predict(scaled_input)[0][0] return current_bhp - target_bhp # Initial guesses (parameter-specific) initial_guess = { "Qo": max(qo, 100), # Avoid 0 for Qo "GOR": max(gor, 300), # Avoid 0 for GOR "WCT": np.clip(wct, 1.0, 99.0) # Avoid boundaries for stability }[target_param] # Solve with numerical safeguards solution = fsolve( equation_to_solve, [initial_guess], xtol=1e-6 # Tight tolerance for precision ) # Clip WCT to [0, 100] if solved if target_param == "WCT": solution[0] = np.clip(solution[0], 0.0, 100.0) # Display results st.success(f"**Solved {target_param} = {solution[0]:.2f}** (for BHP = {target_bhp} psi)") # Show all parameters st.subheader("Solution Summary") results = pd.DataFrame({ "Parameter": ["Qo", "GOR", "WCT", "THT", "Pwh", "Depth", "Target BHP"], "Value": [ f"{solution[0] if target_param == 'Qo' else qo:.2f}", f"{solution[0] if target_param == 'GOR' else gor:.2f}", f"{solution[0] if target_param == 'WCT' else wct:.2f}%", f"{tht:.2f}", f"{pwh:.2f}", f"{depth:.2f}", f"{target_bhp:.2f}" ], "Units": ["STB/d", "scf/STB", "%", "°C", "psi", "m", "psi"] }) st.dataframe(results, hide_index=True) except Exception as e: st.error(f"Solving failed: {str(e)}") st.warning("Check if the target BHP is physically achievable with given inputs.") with tab4: st.header("AI-Powered Data Analysis") st.caption("Ask questions about your uploaded data using natural language.") # Set PandasAI API key pandasai.api_key.set("PAI-09de3b8d-edb9-4d72-998f-60508abfb286") # Upload data file if not done already uploaded_file_ai = uploaded_file if uploaded_file is not None else st.file_uploader("Upload a CSV or Excel file for analysis", type=["csv", "xlsx"], key="ai_file") if uploaded_file_ai is not None: try: if uploaded_file_ai.name.endswith(".csv"): df_ai = pd.read_csv(uploaded_file_ai) else: df_ai = pd.read_excel(uploaded_file_ai) # Display data preview st.dataframe(df_ai.head(), use_container_width=True) # Wrap with SmartDataFrame sdf = SmartDataframe(df_ai, config={"llm": BambooLLM(api_key="PAI-09de3b8d-edb9-4d72-998f-60508abfb286")}) # Ask the user for a natural language query user_query = st.text_area("Ask a question about your data", placeholder="") if st.button("Run AI Query", key="run_pandasai"): with st.spinner("Thinking..."): try: result = sdf.chat(user_query) st.subheader("AI Response:") st.write(result) except Exception as e: st.error(f"Error processing query: {e}") except Exception as e: st.error(f"Failed to process uploaded file: {e}") else: st.info("Upload a file to begin AI analysis.") # Additional data exploration (only for file data) if st.session_state.original_df is not None: with st.expander("Advanced Data Exploration"): selected_columns = st.multiselect( "Select parameters to visualize", st.session_state.original_df.columns.drop('PRODUCTION DATE'), default=['Qo'] ) if selected_columns: st.line_chart( st.session_state.original_df.set_index('PRODUCTION DATE')[selected_columns], use_container_width=True )