Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| from datetime import datetime | |
| from scipy.optimize import fsolve | |
| import pandasai | |
| from pandasai import SmartDataframe | |
| from pandasai.llm import BambooLLM | |
| st.set_page_config(page_title="BHP Estimator", layout="wide") | |
| st.title('Bottom Hole Pressure (BHP) Estimator') | |
| st.subheader("Upload your production data or enter manual inputs") | |
| # Required columns for file validation | |
| required_columns = ['PRODUCTION DATE', 'Qo', 'THT', 'GOR ', 'Pwh(psi)', 'THT', 'Depth', 'WCT'] | |
| # Initialize session state variables | |
| if 'original_df' not in st.session_state: | |
| st.session_state.original_df = None | |
| if 'predictions' not in st.session_state: | |
| st.session_state.predictions = None | |
| # Sidebar for model selection | |
| with st.sidebar: | |
| st.header("Model Configuration") | |
| # Model selection | |
| model_option = st.selectbox( | |
| "Select Well Model", | |
| ["J57 & J05", "J19 & J56", "J37 & J51"], | |
| index=0, | |
| help="Select the appropriate model for your well" | |
| ) | |
| # Model file mapping | |
| model_files = { | |
| "J57 & J05": 'modelBIGDATA5US1NNMAXP57.pkl', | |
| "J19 & J56": 'modelBIGDATA5US1NNMAXP576168allL1NN.pkl', | |
| "J37 & J51": 'modelBIGDATA5US1NNMAXP5137.pkl' | |
| } | |
| # Model loading function with caching | |
| def load_model(model_path): | |
| try: | |
| with open(model_path, 'rb') as file: | |
| saved_data = pickle.load(file) | |
| model = saved_data['model'] | |
| scaler = saved_data['scaler'] | |
| return model, scaler | |
| except Exception as e: | |
| st.error(f"Failed to load model: {str(e)}") | |
| return None, None | |
| # Load the selected model | |
| model, scaler = load_model(model_files[model_option]) | |
| if model is None or scaler is None: | |
| st.stop() | |
| # Define tabs only once | |
| tab1, tab2, tab3, tab4 = st.tabs(["📁 File Prediction", "✍️ Manual Prediction", "🔍 Solve for Parameter", "🧠 AI Data Analysis "]) | |
| with tab1: | |
| # File uploader section | |
| st.header("File-based Prediction") | |
| st.info(f"Using model: {model_option}") | |
| uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'], key="file_uploader") | |
| # File processing and validation | |
| if uploaded_file is not None: | |
| try: | |
| # Read file based on type | |
| if uploaded_file.name.endswith('.csv'): | |
| df = pd.read_csv(uploaded_file) | |
| else: | |
| df = pd.read_excel(uploaded_file) | |
| # Convert to numeric and drop NA | |
| for col in df.columns: | |
| df[col] = pd.to_numeric(df[col], errors='coerce') | |
| df = df.dropna() | |
| # Check required columns | |
| missing_columns = [col for col in required_columns if col not in df.columns] | |
| if missing_columns: | |
| st.error(f"Missing required columns: {', '.join(missing_columns)}") | |
| else: | |
| # Convert date and add features | |
| df['PRODUCTION DATE'] = pd.to_datetime(df['PRODUCTION DATE']) | |
| df['Fluid gradient'] = (df['WCT']/100)*0.433 + (1-(df['WCT']/100))*0.273 | |
| df['Ph'] = df['Fluid gradient'] * df['Depth'] | |
| st.session_state.original_df = df | |
| st.success("File successfully processed!") | |
| # Show preview | |
| st.write("Data Preview:") | |
| st.dataframe(df.head(), use_container_width=True) | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| # Prediction function for file data | |
| def make_predictions(): | |
| if st.session_state.original_df is not None: | |
| try: | |
| # Prepare data | |
| scaled_features = ['Qo', 'GOR ', 'THT', 'Pwh(psi)', 'Ph', 'Depth'] | |
| X_test_scaled = scaler.transform(st.session_state.original_df[scaled_features]) | |
| # Make predictions | |
| predictions = model.predict(X_test_scaled) | |
| st.session_state.predictions = predictions.flatten() | |
| # Add to dataframe for display | |
| result_df = st.session_state.original_df.copy() | |
| result_df['Predicted_BHP'] = st.session_state.predictions | |
| result_df['Model_Used'] = model_option | |
| return result_df | |
| except Exception as e: | |
| st.error(f"Prediction error: {str(e)}") | |
| return None | |
| else: | |
| st.error("Please upload and process a file first") | |
| return None | |
| # Prediction button | |
| if st.button("Make Predictions from File", key="file_predict"): | |
| result_df = make_predictions() | |
| if result_df is not None: | |
| st.write("Prediction Results:") | |
| st.dataframe(result_df[['PRODUCTION DATE', 'Predicted_BHP', 'Model_Used']], use_container_width=True) | |
| # Visualization | |
| if st.session_state.predictions is not None and st.session_state.original_df is not None: | |
| st.subheader("Visualization") | |
| # Create dataframe for plotting | |
| plot_df = st.session_state.original_df.copy() | |
| plot_df['Predicted_BHP'] = st.session_state.predictions | |
| # Line chart comparison | |
| st.line_chart( | |
| plot_df.set_index('PRODUCTION DATE')[['Predicted_BHP']], | |
| use_container_width=True | |
| ) | |
| with tab2: | |
| st.header("Manual Single Prediction") | |
| st.info(f"Using model: {model_option}") | |
| with st.form("manual_prediction_form"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0) | |
| gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0) | |
| tht = st.number_input("Tubing Head Temperature (THT, °C)", min_value=0.0) | |
| with col2: | |
| pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0) | |
| wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0) | |
| depth = st.number_input("Depth m", min_value=0.0, max_value=10000.0) | |
| submitted = st.form_submit_button("Predict BHP") | |
| if submitted: | |
| try: | |
| # Calculate derived features | |
| fluid_gradient = (wct/100)*0.433 + (1-(wct/100))*0.273 | |
| ph = fluid_gradient * depth | |
| # Prepare input array | |
| input_data = np.array([[qo, gor, tht, pwh, ph, depth]]) | |
| # Scale and predict | |
| scaled_input = scaler.transform(input_data) | |
| prediction = model.predict(scaled_input)[0][0] | |
| # Display results | |
| st.success(f"Predicted Bottom Hole Pressure: **{prediction:.2f} psi**") | |
| # Show input summary with well information | |
| st.subheader("Input Summary") | |
| input_summary = { | |
| "Parameter": [ "Oil Rate", "GOR", "THT", "Wellhead Pressure", | |
| "Depth", "Water Cut", "Fluid Gradient", "Model Used"], | |
| "Value": [ | |
| f"{qo} STB/d", | |
| f"{gor} scf/STB", | |
| f"{tht} °C", | |
| f"{pwh} psi", | |
| f"{depth} m", | |
| f"{wct}%", | |
| f"{fluid_gradient:.4f} psi/ft", | |
| model_option | |
| ], | |
| "Units": [ "STB/d", "scf/STB", "°C", "psi", "m", "%", "psi/ft", ""] | |
| } | |
| st.table(pd.DataFrame(input_summary)) | |
| except Exception as e: | |
| st.error(f"Prediction failed: {str(e)}") | |
| with tab3: | |
| st.header("Iterative Parameter Solver") | |
| st.info("Solve for Qo, GOR, or WCT to match a target BHP") | |
| # Select which parameter to solve for | |
| target_param = st.selectbox( | |
| "Parameter to Solve For", | |
| ["Qo", "GOR", "WCT"], | |
| index=0 | |
| ) | |
| # Input known values with defaults | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| qo = st.number_input("Oil Rate (Qo, STB/d)", min_value=0.0, value=1000.0, key="solve_qo") | |
| gor = st.number_input("Gas-Oil Ratio (GOR, scf/STB)", min_value=0.0, value=500.0, key="solve_gor") | |
| tht = st.number_input("Tubing Head Temp (THT, °C)", min_value=0.0, value=10.0, key="solve_tht") | |
| with col2: | |
| pwh = st.number_input("Wellhead Pressure (Pwh, psi)", min_value=0.0, value=300.0, key="solve_pwh") | |
| depth = st.number_input("Depth (m)", min_value=0.0, value=5000.0, key="solve_depth") | |
| wct = st.number_input("Water Cut (WCT, %)", min_value=0.0, max_value=100.0, value=30.0, key="solve_wct") | |
| # Target BHP value to match | |
| target_bhp = st.number_input("Target BHP (psi)", min_value=0.0, value=2000.0, key="target_bhp") | |
| if st.button("Solve Iteratively"): | |
| try: | |
| # Define the equation to solve with WCT constraints | |
| def equation_to_solve(x): | |
| # Enforce constraints depending on the parameter to solve | |
| if target_param == "WCT": | |
| x[0] = np.clip(x[0], 0.0, 100.0) | |
| elif target_param == "Qo": | |
| x[0] = max(x[0], 0.0) # Prevent negative oil rate | |
| elif target_param == "GOR": | |
| x[0] = max(x[0], 0.0) # Prevent negative GOR | |
| # Prepare inputs | |
| inputs = { | |
| 'Qo': x[0] if target_param == "Qo" else qo, | |
| 'GOR ': x[0] if target_param == "GOR" else gor, | |
| 'THT': tht, | |
| 'Pwh(psi)': pwh, | |
| 'Depth': depth, | |
| 'WCT': x[0] if target_param == "WCT" else wct | |
| } | |
| # Calculate fluid gradient and Ph | |
| current_wct = inputs['WCT'] | |
| fluid_gradient = (current_wct/100)*0.433 + (1-(current_wct/100))*0.273 | |
| ph = fluid_gradient * inputs['Depth'] | |
| # Scale inputs and predict BHP | |
| scaled_input = scaler.transform(np.array([ | |
| [inputs['Qo'], inputs['GOR '], inputs['THT'], | |
| inputs['Pwh(psi)'], ph, inputs['Depth']] | |
| ])) | |
| current_bhp = model.predict(scaled_input)[0][0] | |
| return current_bhp - target_bhp | |
| # Initial guesses (parameter-specific) | |
| initial_guess = { | |
| "Qo": max(qo, 100), # Avoid 0 for Qo | |
| "GOR": max(gor, 300), # Avoid 0 for GOR | |
| "WCT": np.clip(wct, 1.0, 99.0) # Avoid boundaries for stability | |
| }[target_param] | |
| # Solve with numerical safeguards | |
| solution = fsolve( | |
| equation_to_solve, | |
| [initial_guess], | |
| xtol=1e-6 # Tight tolerance for precision | |
| ) | |
| # Clip WCT to [0, 100] if solved | |
| if target_param == "WCT": | |
| solution[0] = np.clip(solution[0], 0.0, 100.0) | |
| # Display results | |
| st.success(f"**Solved {target_param} = {solution[0]:.2f}** (for BHP = {target_bhp} psi)") | |
| # Show all parameters | |
| st.subheader("Solution Summary") | |
| results = pd.DataFrame({ | |
| "Parameter": ["Qo", "GOR", "WCT", "THT", "Pwh", "Depth", "Target BHP"], | |
| "Value": [ | |
| f"{solution[0] if target_param == 'Qo' else qo:.2f}", | |
| f"{solution[0] if target_param == 'GOR' else gor:.2f}", | |
| f"{solution[0] if target_param == 'WCT' else wct:.2f}%", | |
| f"{tht:.2f}", | |
| f"{pwh:.2f}", | |
| f"{depth:.2f}", | |
| f"{target_bhp:.2f}" | |
| ], | |
| "Units": ["STB/d", "scf/STB", "%", "°C", "psi", "m", "psi"] | |
| }) | |
| st.dataframe(results, hide_index=True) | |
| except Exception as e: | |
| st.error(f"Solving failed: {str(e)}") | |
| st.warning("Check if the target BHP is physically achievable with given inputs.") | |
| with tab4: | |
| st.header("AI-Powered Data Analysis") | |
| st.caption("Ask questions about your uploaded data using natural language.") | |
| # Set PandasAI API key | |
| pandasai.api_key.set("PAI-09de3b8d-edb9-4d72-998f-60508abfb286") | |
| # Upload data file if not done already | |
| uploaded_file_ai = uploaded_file if uploaded_file is not None else st.file_uploader("Upload a CSV or Excel file for analysis", type=["csv", "xlsx"], key="ai_file") | |
| if uploaded_file_ai is not None: | |
| try: | |
| if uploaded_file_ai.name.endswith(".csv"): | |
| df_ai = pd.read_csv(uploaded_file_ai) | |
| else: | |
| df_ai = pd.read_excel(uploaded_file_ai) | |
| # Display data preview | |
| st.dataframe(df_ai.head(), use_container_width=True) | |
| # Wrap with SmartDataFrame | |
| sdf = SmartDataframe(df_ai, config={"llm": BambooLLM(api_key="PAI-09de3b8d-edb9-4d72-998f-60508abfb286")}) | |
| # Ask the user for a natural language query | |
| user_query = st.text_area("Ask a question about your data", placeholder="") | |
| if st.button("Run AI Query", key="run_pandasai"): | |
| with st.spinner("Thinking..."): | |
| try: | |
| result = sdf.chat(user_query) | |
| st.subheader("AI Response:") | |
| st.write(result) | |
| except Exception as e: | |
| st.error(f"Error processing query: {e}") | |
| except Exception as e: | |
| st.error(f"Failed to process uploaded file: {e}") | |
| else: | |
| st.info("Upload a file to begin AI analysis.") | |
| # Additional data exploration (only for file data) | |
| if st.session_state.original_df is not None: | |
| with st.expander("Advanced Data Exploration"): | |
| selected_columns = st.multiselect( | |
| "Select parameters to visualize", | |
| st.session_state.original_df.columns.drop('PRODUCTION DATE'), | |
| default=['Qo'] | |
| ) | |
| if selected_columns: | |
| st.line_chart( | |
| st.session_state.original_df.set_index('PRODUCTION DATE')[selected_columns], | |
| use_container_width=True | |
| ) |