import streamlit as st import streamlit.components.v1 as components import pandas as pd import numpy as np import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.linear_model import LinearRegression from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score import joblib import io import lasio import os from typing import Optional, Tuple, Dict, Any, List import datetime import copy # ------------------------------------ # CONFIGURATION & CONSTANTS # ------------------------------------ st.set_page_config( page_title="Well Log Analysis Pro", layout="wide", page_icon="🛢️", initial_sidebar_state="expanded" ) # ------------------------------------ # CLASS: DATA MANAGER # ------------------------------------ class DataManager: """Handles data ingestion, cleaning, and export.""" @staticmethod @st.cache_data(show_spinner=False) def load_las(file_bytes: bytes) -> Tuple[pd.DataFrame, lasio.LASFile]: """ Robust LAS file loader that handles encoding issues. Returns cleaned dataframe and the raw LAS object. """ str_data = None # Try common encodings for encoding in ["utf-8", "latin-1", "cp1252"]: try: str_data = file_bytes.decode(encoding) break except UnicodeDecodeError: continue if str_data is None: raise ValueError("Unable to decode LAS file with supported encodings.") file_buffer = io.StringIO(str_data) las = lasio.read(file_buffer) df = las.df().reset_index() # Standardize Depth Column found_depth = False for col in df.columns: if col.lower() in ["depth", "dept", "depth.m", "depth.ft", "depta"]: df = df.rename(columns={col: "Depth"}) found_depth = True break if not found_depth: # Fallback: check index name if it wasn't a column if las.index_unit and "m" in las.index_unit.lower(): df["Depth"] = df.index return df, las @staticmethod @st.cache_data(show_spinner=False) def clean_data(df: pd.DataFrame) -> pd.DataFrame: """Removes infinite values and standardizes NaNs.""" df = df.replace([np.inf, -np.inf], np.nan) # Drop rows only if ALL columns are NaN (rare) or specific cleanup logic # For ML, we usually drop rows with NaNs in feature columns during training return df @staticmethod def export_to_las(df: pd.DataFrame, original_las: Optional[lasio.LASFile] = None) -> bytes: """ Converts DataFrame back to LAS format bytes for download. If original_las is provided, it preserves headers. """ new_las = lasio.LASFile() if original_las: # FIX: Do not assign .header directly (read-only property). # Instead, copy sections item by item. # Copy WELL information (Metadata) for item in original_las.well: # We use deepcopy to ensure we don't link to the session state object new_las.well[item.mnemonic] = copy.deepcopy(item) # Copy PARAMETER information for item in original_las.params: new_las.params[item.mnemonic] = copy.deepcopy(item) # Copy Other info (often just text) new_las.other = original_las.other else: # Set default date if no original file new_las.well.DATE = str(datetime.datetime.now()) # Add curves from the DataFrame for col in df.columns: unit = "UNKNOWN" # Try to preserve units from original LAS if available if original_las: for curve in original_las.curves: if curve.mnemonic == col: unit = curve.unit break new_las.append_curve(col, df[col].values, unit=unit) # Write to string buffer then encode output = io.StringIO() new_las.write(output) return output.getvalue().encode("utf-8") # ------------------------------------ # CLASS: MODEL MANAGER # ------------------------------------ class ModelManager: """Handles Machine Learning operations.""" MODELS = { "Linear Regression": LinearRegression, "Decision Tree": DecisionTreeRegressor, "Random Forest": RandomForestRegressor, "Gradient Boosting": GradientBoostingRegressor, "SVR": SVR } @staticmethod def train( df: pd.DataFrame, target: str, features: List[str], algorithm: str, params: Dict[str, Any] ) -> Dict[str, Any]: """Trains a model and returns the artifact dictionary.""" # Prepare Data: Drop rows where Target or Features are NaN train_df = df.dropna(subset=[target] + features) if len(train_df) < 50: raise ValueError("Not enough data points to train (requires > 50).") X = train_df[features] y = train_df[target] # Initialize Model model_class = ModelManager.MODELS[algorithm] model = model_class(**params) # Train model.fit(X, y) score = model.score(X, y) return { "model": model, "features": features, "target": target, "algorithm": algorithm, "r2_score": score, "training_date": str(datetime.datetime.now()) } # ------------------------------------ # CLASS: VISUALIZER # ------------------------------------ class Visualizer: """Handles Plotly visualizations.""" @staticmethod def plot_well_logs(df: pd.DataFrame, x_cols: List[str], depth_col: str) -> go.Figure: rows = 1 cols = len(x_cols) fig = make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=x_cols) for i, col in enumerate(x_cols): fig.add_trace( go.Scatter(x=df[col], y=df[depth_col], mode="lines", name=col), row=1, col=i + 1 ) fig.update_yaxes(title_text=depth_col, autorange="reversed", row=1, col=1) fig.update_layout(height=800, showlegend=False, title_text="Well Log Viewer", template="plotly_white") return fig @staticmethod def plot_crossplot(y_true: pd.Series, y_pred: pd.Series, title: str) -> go.Figure: fig = go.Figure() fig.add_trace(go.Scatter( x=y_true, y=y_pred, mode='markers', marker=dict(color='blue', opacity=0.5, size=6), name='Data' )) # 1:1 Line min_val = min(y_true.min(), y_pred.min()) max_val = max(y_true.max(), y_pred.max()) fig.add_shape( type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val, line=dict(color="red", dash="dash", width=2) ) fig.update_layout( title=title, xaxis_title="Actual", yaxis_title="Predicted", height=600, template="plotly_white" ) return fig # ------------------------------------ # MAIN APPLICATION LOGIC # ------------------------------------ def main(): # --- Session State Initialization --- if "data" not in st.session_state: st.session_state.data = None if "las_object" not in st.session_state: st.session_state.las_object = None # --- Sidebar --- with st.sidebar: st.title("Navigation") page = st.radio("Go to", ["Home", "Visualizer", "Trainer", "Prediction"]) st.markdown("---") st.caption("Pro Version 2.0") # --- Page Routing --- if page == "Home": render_home() elif page == "Visualizer": render_visualizer() elif page == "Trainer": render_trainer() elif page == "Prediction": render_prediction() # ------------------------------------ # PAGE RENDERERS # ------------------------------------ def render_home(): st.title("🛢️ Well Log Analysis Pro") st.markdown("### Production-Grade Petrophysics ML Tool") col1, col2 = st.columns([2, 1]) with col1: st.info("Upload standard `.LAS` files or `.CSV` data to begin.") uploaded_file = st.file_uploader("Upload File", type=["csv", "las"]) if uploaded_file: try: if uploaded_file.name.lower().endswith(".las"): bytes_data = uploaded_file.read() df, las_obj = DataManager.load_las(bytes_data) st.session_state.las_object = las_obj else: df = pd.read_csv(uploaded_file) st.session_state.las_object = None # No LAS header for CSVs # Global Clean df = DataManager.clean_data(df) st.session_state.data = df st.success(f"Loaded **{uploaded_file.name}** successfully!") st.write(f"**Shape:** {df.shape[0]} rows, {df.shape[1]} columns") st.dataframe(df.head(), use_container_width=True) except Exception as e: st.error(f"Failed to load file: {str(e)}") with col2: st.subheader("Quick Guide") st.markdown(""" 1. **Upload** data here. 2. **Visualize** logs to check quality. 3. **Train** a model on existing curves. 4. **Predict** missing curves or export synthetic logs. """) def render_visualizer(): if st.session_state.data is None: st.warning("Please upload data on the Home page first.") return st.title("📊 Log Visualizer") df = st.session_state.data columns = df.columns.tolist() c1, c2 = st.columns([1, 3]) with c1: depth_col = st.selectbox("Depth Column", columns, index=0) x_cols = st.multiselect("Logs to Plot", [c for c in columns if c != depth_col], default=columns[1:4] if len(columns)>3 else columns[1:]) with c2: if x_cols: fig = Visualizer.plot_well_logs(df, x_cols, depth_col) st.plotly_chart(fig, use_container_width=True) def render_trainer(): if st.session_state.data is None: st.warning("Please upload data first.") return st.title("🤖 Model Trainer") df = st.session_state.data columns = df.columns.tolist() c1, c2, c3 = st.columns(3) target = c1.selectbox("Target (Output)", columns) features = c2.multiselect("Features (Input)", [c for c in columns if c != target]) algo = c3.selectbox("Algorithm", list(ModelManager.MODELS.keys()), index=2) with st.expander("Advanced Hyperparameters"): n_estimators = st.slider("Trees (RF/GB)", 10, 200, 100) max_depth = st.slider("Max Depth", 3, 20, 10) if st.button("Train Model", type="primary"): if not features: st.error("Select features!") return params = {} if algo in ["Random Forest", "Gradient Boosting"]: params = {"n_estimators": n_estimators, "max_depth": max_depth, "random_state": 42} with st.spinner("Training model..."): try: artifact = ModelManager.train(df, target, features, algo, params) # Save to disk joblib.dump(artifact, "well_model.pkl") st.success(f"Trained {algo} with R²: {artifact['r2_score']:.4f}") # Create Download for Model model_bytes = io.BytesIO() joblib.dump(artifact, model_bytes) st.download_button( label="⬇️ Download Trained Model (.pkl)", data=model_bytes.getvalue(), file_name="well_log_model.pkl", mime="application/octet-stream" ) except Exception as e: st.error(f"Training failed: {e}") def render_prediction(): st.title("🔮 Prediction & Export") if st.session_state.data is None: st.warning("Upload data first.") return if not os.path.exists("well_model.pkl"): st.warning("No model found. Train one in the 'Trainer' tab or upload a .pkl file.") uploaded_model = st.file_uploader("Upload Pre-trained Model (.pkl)", type="pkl") if uploaded_model: artifact = joblib.load(uploaded_model) joblib.dump(artifact, "well_model.pkl") st.rerun() return # Load Model artifact = joblib.load("well_model.pkl") model = artifact['model'] feats = artifact['features'] target = artifact['target'] st.info(f"Loaded Model: **{artifact['algorithm']}** (Predicting: `{target}`)") df = st.session_state.data.copy() # Check features missing = [f for f in feats if f not in df.columns] if missing: st.error(f"Missing features in current dataset: {missing}") return mode = st.radio("Prediction Mode", ["Predict New Curve (Overwrite)", "Fill Gaps (Imputation)"]) if st.button("Generate Prediction", type="primary"): X = df[feats] # Handle NaNs in input features for prediction # Option 1: Drop rows (simple) # Option 2: Simple Impute (mean) - using simple drop for safety in production valid_indices = X.dropna().index if len(valid_indices) == 0: st.error("Input features contain too many NaNs. Cannot predict.") return X_valid = X.loc[valid_indices] preds = model.predict(X_valid) output_col = f"PRED_{target}" if mode == "Predict New Curve (Overwrite)": df[output_col] = np.nan df.loc[valid_indices, output_col] = preds display_col = output_col else: # Imputation Mode if target not in df.columns: st.error(f"Target column {target} not found for imputation.") return df[output_col] = df[target].copy() # Find where target is NaN but we have valid inputs target_nans = df[df[target].isna()].index impute_indices = list(set(target_nans) & set(valid_indices)) if impute_indices: X_impute = df.loc[impute_indices, feats] impute_preds = model.predict(X_impute) df.loc[impute_indices, output_col] = impute_preds st.success(f"Filled {len(impute_indices)} gaps.") else: st.warning("No gaps found that could be filled with available features.") display_col = output_col # Visual Comparison st.subheader("Result Preview") # Plot fig = go.Figure() if "Depth" in df.columns: fig.add_trace(go.Scatter(x=df[display_col], y=df["Depth"], name="Prediction", line=dict(color='red', dash='dot'))) if target in df.columns: fig.add_trace(go.Scatter(x=df[target], y=df["Depth"], name="Original", line=dict(color='black', width=1))) fig.update_yaxes(autorange="reversed", title="Depth") else: fig.add_trace(go.Scatter(y=df[display_col], mode='lines', name="Prediction")) st.plotly_chart(fig, use_container_width=True) # Export Options st.divider() st.subheader("Export Data") c1, c2 = st.columns(2) with c1: csv_data = df.to_csv(index=False).encode('utf-8') st.download_button("Download CSV", csv_data, "results.csv", "text/csv") with c2: try: las_bytes = DataManager.export_to_las(df, st.session_state.las_object) st.download_button("Download LAS", las_bytes, "results.las", "application/octet-stream") except Exception as e: st.error(f"LAS export unavailable: {e}") if __name__ == "__main__": main()