well_log_x / app.py
pvyas96's picture
Update app.py
1d3e733 verified
import streamlit as st
import streamlit.components.v1 as components
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import io
import lasio
import os
from typing import Optional, Tuple, Dict, Any, List
import datetime
import copy
# ------------------------------------
# CONFIGURATION & CONSTANTS
# ------------------------------------
st.set_page_config(
page_title="Well Log Analysis Pro",
layout="wide",
page_icon="🛢️",
initial_sidebar_state="expanded"
)
# ------------------------------------
# CLASS: DATA MANAGER
# ------------------------------------
class DataManager:
"""Handles data ingestion, cleaning, and export."""
@staticmethod
@st.cache_data(show_spinner=False)
def load_las(file_bytes: bytes) -> Tuple[pd.DataFrame, lasio.LASFile]:
"""
Robust LAS file loader that handles encoding issues.
Returns cleaned dataframe and the raw LAS object.
"""
str_data = None
# Try common encodings
for encoding in ["utf-8", "latin-1", "cp1252"]:
try:
str_data = file_bytes.decode(encoding)
break
except UnicodeDecodeError:
continue
if str_data is None:
raise ValueError("Unable to decode LAS file with supported encodings.")
file_buffer = io.StringIO(str_data)
las = lasio.read(file_buffer)
df = las.df().reset_index()
# Standardize Depth Column
found_depth = False
for col in df.columns:
if col.lower() in ["depth", "dept", "depth.m", "depth.ft", "depta"]:
df = df.rename(columns={col: "Depth"})
found_depth = True
break
if not found_depth:
# Fallback: check index name if it wasn't a column
if las.index_unit and "m" in las.index_unit.lower():
df["Depth"] = df.index
return df, las
@staticmethod
@st.cache_data(show_spinner=False)
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
"""Removes infinite values and standardizes NaNs."""
df = df.replace([np.inf, -np.inf], np.nan)
# Drop rows only if ALL columns are NaN (rare) or specific cleanup logic
# For ML, we usually drop rows with NaNs in feature columns during training
return df
@staticmethod
def export_to_las(df: pd.DataFrame, original_las: Optional[lasio.LASFile] = None) -> bytes:
"""
Converts DataFrame back to LAS format bytes for download.
If original_las is provided, it preserves headers.
"""
new_las = lasio.LASFile()
if original_las:
# FIX: Do not assign .header directly (read-only property).
# Instead, copy sections item by item.
# Copy WELL information (Metadata)
for item in original_las.well:
# We use deepcopy to ensure we don't link to the session state object
new_las.well[item.mnemonic] = copy.deepcopy(item)
# Copy PARAMETER information
for item in original_las.params:
new_las.params[item.mnemonic] = copy.deepcopy(item)
# Copy Other info (often just text)
new_las.other = original_las.other
else:
# Set default date if no original file
new_las.well.DATE = str(datetime.datetime.now())
# Add curves from the DataFrame
for col in df.columns:
unit = "UNKNOWN"
# Try to preserve units from original LAS if available
if original_las:
for curve in original_las.curves:
if curve.mnemonic == col:
unit = curve.unit
break
new_las.append_curve(col, df[col].values, unit=unit)
# Write to string buffer then encode
output = io.StringIO()
new_las.write(output)
return output.getvalue().encode("utf-8")
# ------------------------------------
# CLASS: MODEL MANAGER
# ------------------------------------
class ModelManager:
"""Handles Machine Learning operations."""
MODELS = {
"Linear Regression": LinearRegression,
"Decision Tree": DecisionTreeRegressor,
"Random Forest": RandomForestRegressor,
"Gradient Boosting": GradientBoostingRegressor,
"SVR": SVR
}
@staticmethod
def train(
df: pd.DataFrame,
target: str,
features: List[str],
algorithm: str,
params: Dict[str, Any]
) -> Dict[str, Any]:
"""Trains a model and returns the artifact dictionary."""
# Prepare Data: Drop rows where Target or Features are NaN
train_df = df.dropna(subset=[target] + features)
if len(train_df) < 50:
raise ValueError("Not enough data points to train (requires > 50).")
X = train_df[features]
y = train_df[target]
# Initialize Model
model_class = ModelManager.MODELS[algorithm]
model = model_class(**params)
# Train
model.fit(X, y)
score = model.score(X, y)
return {
"model": model,
"features": features,
"target": target,
"algorithm": algorithm,
"r2_score": score,
"training_date": str(datetime.datetime.now())
}
# ------------------------------------
# CLASS: VISUALIZER
# ------------------------------------
class Visualizer:
"""Handles Plotly visualizations."""
@staticmethod
def plot_well_logs(df: pd.DataFrame, x_cols: List[str], depth_col: str) -> go.Figure:
rows = 1
cols = len(x_cols)
fig = make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=x_cols)
for i, col in enumerate(x_cols):
fig.add_trace(
go.Scatter(x=df[col], y=df[depth_col], mode="lines", name=col),
row=1, col=i + 1
)
fig.update_yaxes(title_text=depth_col, autorange="reversed", row=1, col=1)
fig.update_layout(height=800, showlegend=False, title_text="Well Log Viewer", template="plotly_white")
return fig
@staticmethod
def plot_crossplot(y_true: pd.Series, y_pred: pd.Series, title: str) -> go.Figure:
fig = go.Figure()
fig.add_trace(go.Scatter(
x=y_true,
y=y_pred,
mode='markers',
marker=dict(color='blue', opacity=0.5, size=6),
name='Data'
))
# 1:1 Line
min_val = min(y_true.min(), y_pred.min())
max_val = max(y_true.max(), y_pred.max())
fig.add_shape(
type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val,
line=dict(color="red", dash="dash", width=2)
)
fig.update_layout(
title=title,
xaxis_title="Actual",
yaxis_title="Predicted",
height=600,
template="plotly_white"
)
return fig
# ------------------------------------
# MAIN APPLICATION LOGIC
# ------------------------------------
def main():
# --- Session State Initialization ---
if "data" not in st.session_state:
st.session_state.data = None
if "las_object" not in st.session_state:
st.session_state.las_object = None
# --- Sidebar ---
with st.sidebar:
st.title("Navigation")
page = st.radio("Go to", ["Home", "Visualizer", "Trainer", "Prediction"])
st.markdown("---")
st.caption("Pro Version 2.0")
# --- Page Routing ---
if page == "Home":
render_home()
elif page == "Visualizer":
render_visualizer()
elif page == "Trainer":
render_trainer()
elif page == "Prediction":
render_prediction()
# ------------------------------------
# PAGE RENDERERS
# ------------------------------------
def render_home():
st.title("🛢️ Well Log Analysis Pro")
st.markdown("### Production-Grade Petrophysics ML Tool")
col1, col2 = st.columns([2, 1])
with col1:
st.info("Upload standard `.LAS` files or `.CSV` data to begin.")
uploaded_file = st.file_uploader("Upload File", type=["csv", "las"])
if uploaded_file:
try:
if uploaded_file.name.lower().endswith(".las"):
bytes_data = uploaded_file.read()
df, las_obj = DataManager.load_las(bytes_data)
st.session_state.las_object = las_obj
else:
df = pd.read_csv(uploaded_file)
st.session_state.las_object = None # No LAS header for CSVs
# Global Clean
df = DataManager.clean_data(df)
st.session_state.data = df
st.success(f"Loaded **{uploaded_file.name}** successfully!")
st.write(f"**Shape:** {df.shape[0]} rows, {df.shape[1]} columns")
st.dataframe(df.head(), use_container_width=True)
except Exception as e:
st.error(f"Failed to load file: {str(e)}")
with col2:
st.subheader("Quick Guide")
st.markdown("""
1. **Upload** data here.
2. **Visualize** logs to check quality.
3. **Train** a model on existing curves.
4. **Predict** missing curves or export synthetic logs.
""")
def render_visualizer():
if st.session_state.data is None:
st.warning("Please upload data on the Home page first.")
return
st.title("📊 Log Visualizer")
df = st.session_state.data
columns = df.columns.tolist()
c1, c2 = st.columns([1, 3])
with c1:
depth_col = st.selectbox("Depth Column", columns, index=0)
x_cols = st.multiselect("Logs to Plot", [c for c in columns if c != depth_col], default=columns[1:4] if len(columns)>3 else columns[1:])
with c2:
if x_cols:
fig = Visualizer.plot_well_logs(df, x_cols, depth_col)
st.plotly_chart(fig, use_container_width=True)
def render_trainer():
if st.session_state.data is None:
st.warning("Please upload data first.")
return
st.title("🤖 Model Trainer")
df = st.session_state.data
columns = df.columns.tolist()
c1, c2, c3 = st.columns(3)
target = c1.selectbox("Target (Output)", columns)
features = c2.multiselect("Features (Input)", [c for c in columns if c != target])
algo = c3.selectbox("Algorithm", list(ModelManager.MODELS.keys()), index=2)
with st.expander("Advanced Hyperparameters"):
n_estimators = st.slider("Trees (RF/GB)", 10, 200, 100)
max_depth = st.slider("Max Depth", 3, 20, 10)
if st.button("Train Model", type="primary"):
if not features:
st.error("Select features!")
return
params = {}
if algo in ["Random Forest", "Gradient Boosting"]:
params = {"n_estimators": n_estimators, "max_depth": max_depth, "random_state": 42}
with st.spinner("Training model..."):
try:
artifact = ModelManager.train(df, target, features, algo, params)
# Save to disk
joblib.dump(artifact, "well_model.pkl")
st.success(f"Trained {algo} with R²: {artifact['r2_score']:.4f}")
# Create Download for Model
model_bytes = io.BytesIO()
joblib.dump(artifact, model_bytes)
st.download_button(
label="⬇️ Download Trained Model (.pkl)",
data=model_bytes.getvalue(),
file_name="well_log_model.pkl",
mime="application/octet-stream"
)
except Exception as e:
st.error(f"Training failed: {e}")
def render_prediction():
st.title("🔮 Prediction & Export")
if st.session_state.data is None:
st.warning("Upload data first.")
return
if not os.path.exists("well_model.pkl"):
st.warning("No model found. Train one in the 'Trainer' tab or upload a .pkl file.")
uploaded_model = st.file_uploader("Upload Pre-trained Model (.pkl)", type="pkl")
if uploaded_model:
artifact = joblib.load(uploaded_model)
joblib.dump(artifact, "well_model.pkl")
st.rerun()
return
# Load Model
artifact = joblib.load("well_model.pkl")
model = artifact['model']
feats = artifact['features']
target = artifact['target']
st.info(f"Loaded Model: **{artifact['algorithm']}** (Predicting: `{target}`)")
df = st.session_state.data.copy()
# Check features
missing = [f for f in feats if f not in df.columns]
if missing:
st.error(f"Missing features in current dataset: {missing}")
return
mode = st.radio("Prediction Mode", ["Predict New Curve (Overwrite)", "Fill Gaps (Imputation)"])
if st.button("Generate Prediction", type="primary"):
X = df[feats]
# Handle NaNs in input features for prediction
# Option 1: Drop rows (simple)
# Option 2: Simple Impute (mean) - using simple drop for safety in production
valid_indices = X.dropna().index
if len(valid_indices) == 0:
st.error("Input features contain too many NaNs. Cannot predict.")
return
X_valid = X.loc[valid_indices]
preds = model.predict(X_valid)
output_col = f"PRED_{target}"
if mode == "Predict New Curve (Overwrite)":
df[output_col] = np.nan
df.loc[valid_indices, output_col] = preds
display_col = output_col
else:
# Imputation Mode
if target not in df.columns:
st.error(f"Target column {target} not found for imputation.")
return
df[output_col] = df[target].copy()
# Find where target is NaN but we have valid inputs
target_nans = df[df[target].isna()].index
impute_indices = list(set(target_nans) & set(valid_indices))
if impute_indices:
X_impute = df.loc[impute_indices, feats]
impute_preds = model.predict(X_impute)
df.loc[impute_indices, output_col] = impute_preds
st.success(f"Filled {len(impute_indices)} gaps.")
else:
st.warning("No gaps found that could be filled with available features.")
display_col = output_col
# Visual Comparison
st.subheader("Result Preview")
# Plot
fig = go.Figure()
if "Depth" in df.columns:
fig.add_trace(go.Scatter(x=df[display_col], y=df["Depth"], name="Prediction", line=dict(color='red', dash='dot')))
if target in df.columns:
fig.add_trace(go.Scatter(x=df[target], y=df["Depth"], name="Original", line=dict(color='black', width=1)))
fig.update_yaxes(autorange="reversed", title="Depth")
else:
fig.add_trace(go.Scatter(y=df[display_col], mode='lines', name="Prediction"))
st.plotly_chart(fig, use_container_width=True)
# Export Options
st.divider()
st.subheader("Export Data")
c1, c2 = st.columns(2)
with c1:
csv_data = df.to_csv(index=False).encode('utf-8')
st.download_button("Download CSV", csv_data, "results.csv", "text/csv")
with c2:
try:
las_bytes = DataManager.export_to_las(df, st.session_state.las_object)
st.download_button("Download LAS", las_bytes, "results.las", "application/octet-stream")
except Exception as e:
st.error(f"LAS export unavailable: {e}")
if __name__ == "__main__":
main()