Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor | |
| from sklearn.tree import DecisionTreeRegressor | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.svm import SVR | |
| from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | |
| import joblib | |
| import io | |
| import lasio | |
| import os | |
| from typing import Optional, Tuple, Dict, Any, List | |
| import datetime | |
| import copy | |
| # ------------------------------------ | |
| # CONFIGURATION & CONSTANTS | |
| # ------------------------------------ | |
| st.set_page_config( | |
| page_title="Well Log Analysis Pro", | |
| layout="wide", | |
| page_icon="🛢️", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # ------------------------------------ | |
| # CLASS: DATA MANAGER | |
| # ------------------------------------ | |
| class DataManager: | |
| """Handles data ingestion, cleaning, and export.""" | |
| def load_las(file_bytes: bytes) -> Tuple[pd.DataFrame, lasio.LASFile]: | |
| """ | |
| Robust LAS file loader that handles encoding issues. | |
| Returns cleaned dataframe and the raw LAS object. | |
| """ | |
| str_data = None | |
| # Try common encodings | |
| for encoding in ["utf-8", "latin-1", "cp1252"]: | |
| try: | |
| str_data = file_bytes.decode(encoding) | |
| break | |
| except UnicodeDecodeError: | |
| continue | |
| if str_data is None: | |
| raise ValueError("Unable to decode LAS file with supported encodings.") | |
| file_buffer = io.StringIO(str_data) | |
| las = lasio.read(file_buffer) | |
| df = las.df().reset_index() | |
| # Standardize Depth Column | |
| found_depth = False | |
| for col in df.columns: | |
| if col.lower() in ["depth", "dept", "depth.m", "depth.ft", "depta"]: | |
| df = df.rename(columns={col: "Depth"}) | |
| found_depth = True | |
| break | |
| if not found_depth: | |
| # Fallback: check index name if it wasn't a column | |
| if las.index_unit and "m" in las.index_unit.lower(): | |
| df["Depth"] = df.index | |
| return df, las | |
| def clean_data(df: pd.DataFrame) -> pd.DataFrame: | |
| """Removes infinite values and standardizes NaNs.""" | |
| df = df.replace([np.inf, -np.inf], np.nan) | |
| # Drop rows only if ALL columns are NaN (rare) or specific cleanup logic | |
| # For ML, we usually drop rows with NaNs in feature columns during training | |
| return df | |
| def export_to_las(df: pd.DataFrame, original_las: Optional[lasio.LASFile] = None) -> bytes: | |
| """ | |
| Converts DataFrame back to LAS format bytes for download. | |
| If original_las is provided, it preserves headers. | |
| """ | |
| new_las = lasio.LASFile() | |
| if original_las: | |
| # FIX: Do not assign .header directly (read-only property). | |
| # Instead, copy sections item by item. | |
| # Copy WELL information (Metadata) | |
| for item in original_las.well: | |
| # We use deepcopy to ensure we don't link to the session state object | |
| new_las.well[item.mnemonic] = copy.deepcopy(item) | |
| # Copy PARAMETER information | |
| for item in original_las.params: | |
| new_las.params[item.mnemonic] = copy.deepcopy(item) | |
| # Copy Other info (often just text) | |
| new_las.other = original_las.other | |
| else: | |
| # Set default date if no original file | |
| new_las.well.DATE = str(datetime.datetime.now()) | |
| # Add curves from the DataFrame | |
| for col in df.columns: | |
| unit = "UNKNOWN" | |
| # Try to preserve units from original LAS if available | |
| if original_las: | |
| for curve in original_las.curves: | |
| if curve.mnemonic == col: | |
| unit = curve.unit | |
| break | |
| new_las.append_curve(col, df[col].values, unit=unit) | |
| # Write to string buffer then encode | |
| output = io.StringIO() | |
| new_las.write(output) | |
| return output.getvalue().encode("utf-8") | |
| # ------------------------------------ | |
| # CLASS: MODEL MANAGER | |
| # ------------------------------------ | |
| class ModelManager: | |
| """Handles Machine Learning operations.""" | |
| MODELS = { | |
| "Linear Regression": LinearRegression, | |
| "Decision Tree": DecisionTreeRegressor, | |
| "Random Forest": RandomForestRegressor, | |
| "Gradient Boosting": GradientBoostingRegressor, | |
| "SVR": SVR | |
| } | |
| def train( | |
| df: pd.DataFrame, | |
| target: str, | |
| features: List[str], | |
| algorithm: str, | |
| params: Dict[str, Any] | |
| ) -> Dict[str, Any]: | |
| """Trains a model and returns the artifact dictionary.""" | |
| # Prepare Data: Drop rows where Target or Features are NaN | |
| train_df = df.dropna(subset=[target] + features) | |
| if len(train_df) < 50: | |
| raise ValueError("Not enough data points to train (requires > 50).") | |
| X = train_df[features] | |
| y = train_df[target] | |
| # Initialize Model | |
| model_class = ModelManager.MODELS[algorithm] | |
| model = model_class(**params) | |
| # Train | |
| model.fit(X, y) | |
| score = model.score(X, y) | |
| return { | |
| "model": model, | |
| "features": features, | |
| "target": target, | |
| "algorithm": algorithm, | |
| "r2_score": score, | |
| "training_date": str(datetime.datetime.now()) | |
| } | |
| # ------------------------------------ | |
| # CLASS: VISUALIZER | |
| # ------------------------------------ | |
| class Visualizer: | |
| """Handles Plotly visualizations.""" | |
| def plot_well_logs(df: pd.DataFrame, x_cols: List[str], depth_col: str) -> go.Figure: | |
| rows = 1 | |
| cols = len(x_cols) | |
| fig = make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=x_cols) | |
| for i, col in enumerate(x_cols): | |
| fig.add_trace( | |
| go.Scatter(x=df[col], y=df[depth_col], mode="lines", name=col), | |
| row=1, col=i + 1 | |
| ) | |
| fig.update_yaxes(title_text=depth_col, autorange="reversed", row=1, col=1) | |
| fig.update_layout(height=800, showlegend=False, title_text="Well Log Viewer", template="plotly_white") | |
| return fig | |
| def plot_crossplot(y_true: pd.Series, y_pred: pd.Series, title: str) -> go.Figure: | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=y_true, | |
| y=y_pred, | |
| mode='markers', | |
| marker=dict(color='blue', opacity=0.5, size=6), | |
| name='Data' | |
| )) | |
| # 1:1 Line | |
| min_val = min(y_true.min(), y_pred.min()) | |
| max_val = max(y_true.max(), y_pred.max()) | |
| fig.add_shape( | |
| type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val, | |
| line=dict(color="red", dash="dash", width=2) | |
| ) | |
| fig.update_layout( | |
| title=title, | |
| xaxis_title="Actual", | |
| yaxis_title="Predicted", | |
| height=600, | |
| template="plotly_white" | |
| ) | |
| return fig | |
| # ------------------------------------ | |
| # MAIN APPLICATION LOGIC | |
| # ------------------------------------ | |
| def main(): | |
| # --- Session State Initialization --- | |
| if "data" not in st.session_state: | |
| st.session_state.data = None | |
| if "las_object" not in st.session_state: | |
| st.session_state.las_object = None | |
| # --- Sidebar --- | |
| with st.sidebar: | |
| st.title("Navigation") | |
| page = st.radio("Go to", ["Home", "Visualizer", "Trainer", "Prediction"]) | |
| st.markdown("---") | |
| st.caption("Pro Version 2.0") | |
| # --- Page Routing --- | |
| if page == "Home": | |
| render_home() | |
| elif page == "Visualizer": | |
| render_visualizer() | |
| elif page == "Trainer": | |
| render_trainer() | |
| elif page == "Prediction": | |
| render_prediction() | |
| # ------------------------------------ | |
| # PAGE RENDERERS | |
| # ------------------------------------ | |
| def render_home(): | |
| st.title("🛢️ Well Log Analysis Pro") | |
| st.markdown("### Production-Grade Petrophysics ML Tool") | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| st.info("Upload standard `.LAS` files or `.CSV` data to begin.") | |
| uploaded_file = st.file_uploader("Upload File", type=["csv", "las"]) | |
| if uploaded_file: | |
| try: | |
| if uploaded_file.name.lower().endswith(".las"): | |
| bytes_data = uploaded_file.read() | |
| df, las_obj = DataManager.load_las(bytes_data) | |
| st.session_state.las_object = las_obj | |
| else: | |
| df = pd.read_csv(uploaded_file) | |
| st.session_state.las_object = None # No LAS header for CSVs | |
| # Global Clean | |
| df = DataManager.clean_data(df) | |
| st.session_state.data = df | |
| st.success(f"Loaded **{uploaded_file.name}** successfully!") | |
| st.write(f"**Shape:** {df.shape[0]} rows, {df.shape[1]} columns") | |
| st.dataframe(df.head(), use_container_width=True) | |
| except Exception as e: | |
| st.error(f"Failed to load file: {str(e)}") | |
| with col2: | |
| st.subheader("Quick Guide") | |
| st.markdown(""" | |
| 1. **Upload** data here. | |
| 2. **Visualize** logs to check quality. | |
| 3. **Train** a model on existing curves. | |
| 4. **Predict** missing curves or export synthetic logs. | |
| """) | |
| def render_visualizer(): | |
| if st.session_state.data is None: | |
| st.warning("Please upload data on the Home page first.") | |
| return | |
| st.title("📊 Log Visualizer") | |
| df = st.session_state.data | |
| columns = df.columns.tolist() | |
| c1, c2 = st.columns([1, 3]) | |
| with c1: | |
| depth_col = st.selectbox("Depth Column", columns, index=0) | |
| x_cols = st.multiselect("Logs to Plot", [c for c in columns if c != depth_col], default=columns[1:4] if len(columns)>3 else columns[1:]) | |
| with c2: | |
| if x_cols: | |
| fig = Visualizer.plot_well_logs(df, x_cols, depth_col) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def render_trainer(): | |
| if st.session_state.data is None: | |
| st.warning("Please upload data first.") | |
| return | |
| st.title("🤖 Model Trainer") | |
| df = st.session_state.data | |
| columns = df.columns.tolist() | |
| c1, c2, c3 = st.columns(3) | |
| target = c1.selectbox("Target (Output)", columns) | |
| features = c2.multiselect("Features (Input)", [c for c in columns if c != target]) | |
| algo = c3.selectbox("Algorithm", list(ModelManager.MODELS.keys()), index=2) | |
| with st.expander("Advanced Hyperparameters"): | |
| n_estimators = st.slider("Trees (RF/GB)", 10, 200, 100) | |
| max_depth = st.slider("Max Depth", 3, 20, 10) | |
| if st.button("Train Model", type="primary"): | |
| if not features: | |
| st.error("Select features!") | |
| return | |
| params = {} | |
| if algo in ["Random Forest", "Gradient Boosting"]: | |
| params = {"n_estimators": n_estimators, "max_depth": max_depth, "random_state": 42} | |
| with st.spinner("Training model..."): | |
| try: | |
| artifact = ModelManager.train(df, target, features, algo, params) | |
| # Save to disk | |
| joblib.dump(artifact, "well_model.pkl") | |
| st.success(f"Trained {algo} with R²: {artifact['r2_score']:.4f}") | |
| # Create Download for Model | |
| model_bytes = io.BytesIO() | |
| joblib.dump(artifact, model_bytes) | |
| st.download_button( | |
| label="⬇️ Download Trained Model (.pkl)", | |
| data=model_bytes.getvalue(), | |
| file_name="well_log_model.pkl", | |
| mime="application/octet-stream" | |
| ) | |
| except Exception as e: | |
| st.error(f"Training failed: {e}") | |
| def render_prediction(): | |
| st.title("🔮 Prediction & Export") | |
| if st.session_state.data is None: | |
| st.warning("Upload data first.") | |
| return | |
| if not os.path.exists("well_model.pkl"): | |
| st.warning("No model found. Train one in the 'Trainer' tab or upload a .pkl file.") | |
| uploaded_model = st.file_uploader("Upload Pre-trained Model (.pkl)", type="pkl") | |
| if uploaded_model: | |
| artifact = joblib.load(uploaded_model) | |
| joblib.dump(artifact, "well_model.pkl") | |
| st.rerun() | |
| return | |
| # Load Model | |
| artifact = joblib.load("well_model.pkl") | |
| model = artifact['model'] | |
| feats = artifact['features'] | |
| target = artifact['target'] | |
| st.info(f"Loaded Model: **{artifact['algorithm']}** (Predicting: `{target}`)") | |
| df = st.session_state.data.copy() | |
| # Check features | |
| missing = [f for f in feats if f not in df.columns] | |
| if missing: | |
| st.error(f"Missing features in current dataset: {missing}") | |
| return | |
| mode = st.radio("Prediction Mode", ["Predict New Curve (Overwrite)", "Fill Gaps (Imputation)"]) | |
| if st.button("Generate Prediction", type="primary"): | |
| X = df[feats] | |
| # Handle NaNs in input features for prediction | |
| # Option 1: Drop rows (simple) | |
| # Option 2: Simple Impute (mean) - using simple drop for safety in production | |
| valid_indices = X.dropna().index | |
| if len(valid_indices) == 0: | |
| st.error("Input features contain too many NaNs. Cannot predict.") | |
| return | |
| X_valid = X.loc[valid_indices] | |
| preds = model.predict(X_valid) | |
| output_col = f"PRED_{target}" | |
| if mode == "Predict New Curve (Overwrite)": | |
| df[output_col] = np.nan | |
| df.loc[valid_indices, output_col] = preds | |
| display_col = output_col | |
| else: | |
| # Imputation Mode | |
| if target not in df.columns: | |
| st.error(f"Target column {target} not found for imputation.") | |
| return | |
| df[output_col] = df[target].copy() | |
| # Find where target is NaN but we have valid inputs | |
| target_nans = df[df[target].isna()].index | |
| impute_indices = list(set(target_nans) & set(valid_indices)) | |
| if impute_indices: | |
| X_impute = df.loc[impute_indices, feats] | |
| impute_preds = model.predict(X_impute) | |
| df.loc[impute_indices, output_col] = impute_preds | |
| st.success(f"Filled {len(impute_indices)} gaps.") | |
| else: | |
| st.warning("No gaps found that could be filled with available features.") | |
| display_col = output_col | |
| # Visual Comparison | |
| st.subheader("Result Preview") | |
| # Plot | |
| fig = go.Figure() | |
| if "Depth" in df.columns: | |
| fig.add_trace(go.Scatter(x=df[display_col], y=df["Depth"], name="Prediction", line=dict(color='red', dash='dot'))) | |
| if target in df.columns: | |
| fig.add_trace(go.Scatter(x=df[target], y=df["Depth"], name="Original", line=dict(color='black', width=1))) | |
| fig.update_yaxes(autorange="reversed", title="Depth") | |
| else: | |
| fig.add_trace(go.Scatter(y=df[display_col], mode='lines', name="Prediction")) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Export Options | |
| st.divider() | |
| st.subheader("Export Data") | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| csv_data = df.to_csv(index=False).encode('utf-8') | |
| st.download_button("Download CSV", csv_data, "results.csv", "text/csv") | |
| with c2: | |
| try: | |
| las_bytes = DataManager.export_to_las(df, st.session_state.las_object) | |
| st.download_button("Download LAS", las_bytes, "results.las", "application/octet-stream") | |
| except Exception as e: | |
| st.error(f"LAS export unavailable: {e}") | |
| if __name__ == "__main__": | |
| main() |