Well_Log_X_2 / src /streamlit_app.py
pvyas96's picture
Update src/streamlit_app.py
bba1512 verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, Input
import multiprocessing
import joblib
# Page configuration
st.set_page_config(page_title="Well Log Analysis", layout="wide")
# Sidebar for file upload
uploaded_file = st.sidebar.file_uploader("Upload your log.csv file", type=["csv"])
if uploaded_file is not None:
data = pd.read_csv(uploaded_file).dropna().reset_index(drop=True)
st.sidebar.success("File uploaded successfully!")
else:
st.sidebar.warning("Please upload a CSV file.")
st.stop()
# Page 1: Visualizer
def page_visualizer(data):
st.title("Well Log Visualizer")
curve_columns = st.multiselect("Select columns to plot", data.columns, default=data.columns.tolist())
depth_column = st.selectbox("Select depth column", data.columns, index=0)
if curve_columns:
fig = make_subplots(rows=1, cols=len(curve_columns), shared_yaxes=True, subplot_titles=curve_columns)
for i, col in enumerate(curve_columns, start=1):
fig.add_trace(go.Scatter(x=data[col], y=data[depth_column], mode='lines', name=col), row=1, col=i)
fig.update_xaxes(title_text=col, row=1, col=i)
fig.update_yaxes(autorange='reversed', title_text=depth_column, row=1, col=1)
fig.update_layout(height=800, width=2000, showlegend=False)
st.plotly_chart(fig, use_container_width=True)
# Page 2: Trainer
def page_trainer(data):
st.title("Model Trainer")
target_column = st.selectbox("Select target column", data.columns, index=0)
model_type = st.selectbox("Select model type", ["Random Forest", "XGBoost", "Linear Regression", "1D CNN"])
# Prepare data
df = data.dropna().reset_index(drop=True)
features = df.drop(columns=[target_column])
target = df[target_column].values
# Train/test split
split_idx = int(0.8 * len(df))
X_train, X_test = features.iloc[:split_idx], features.iloc[split_idx:]
y_train, y_test = target[:split_idx], target[split_idx:]
if model_type in ["Random Forest", "XGBoost", "Linear Regression"]:
if model_type == "Random Forest":
model = RandomForestRegressor(n_estimators=500, n_jobs=-1, random_state=42)
elif model_type == "XGBoost":
model = xgb.XGBRegressor(n_estimators=200, n_jobs=-1, random_state=42)
elif model_type == "Linear Regression":
model = LinearRegression()
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
st.success(f"{model_type} R^2 Score: {score:.4f}")
# Save model
st.session_state.model = model
st.session_state.is_cnn = False
st.session_state.features = features
st.session_state.target_column = target_column
elif model_type == "1D CNN":
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)
X = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y_train, y_test
model = Sequential([
Input(shape=(X.shape[1], 1)),
Conv1D(32, 3, activation='relu'),
Flatten(),
Dense(64, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(X_train, y_train, epochs=30, batch_size=16, verbose=0)
loss, mae = model.evaluate(X_test, y_test, verbose=0)
st.success(f"1D CNN MAE: {mae:.4f}")
st.session_state.model = model
st.session_state.scaler = scaler
st.session_state.is_cnn = True
st.session_state.features = features
st.session_state.target_column = target_column
# Download model
if st.button("Download Trained Model"):
if not st.session_state.get("is_cnn", False):
joblib.dump(st.session_state.model, "trained_model.pkl")
with open("trained_model.pkl", "rb") as f:
st.download_button("Download sklearn model", f, file_name="trained_model.pkl")
else:
model.save("cnn_model.h5")
with open("cnn_model.h5", "rb") as f:
st.download_button("Download CNN model", f, file_name="cnn_model.h5")
# Page 3: Prediction
def page_prediction(data):
st.title("Prediction Comparison")
if 'model' not in st.session_state:
st.warning("Please train a model first.")
return
features = st.session_state.features
target_column = st.session_state.target_column
if st.session_state.get("is_cnn", False):
scaler = st.session_state.scaler
X_scaled = scaler.transform(features)
X_input = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))
predicted = st.session_state.model.predict(X_input).flatten()
else:
predicted = st.session_state.model.predict(features)
data_plot = data.copy()
data_plot['Predicted'] = predicted
depth_column = st.selectbox("Select depth column", data.columns, index=0)
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_plot[target_column], y=data_plot[depth_column],
mode='lines+markers', name='Original'))
fig.add_trace(go.Scatter(x=data_plot['Predicted'], y=data_plot[depth_column],
mode='lines+markers', name='Predicted'))
fig.update_yaxes(autorange='reversed', title_text=depth_column)
fig.update_layout(height=600, width=800, title="Original vs Predicted")
st.plotly_chart(fig, use_container_width=True)
# Main navigation
def main():
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Visualizer", "Trainer", "Prediction"])
if page == "Visualizer":
page_visualizer(data)
elif page == "Trainer":
page_trainer(data)
elif page == "Prediction":
page_prediction(data)
if __name__ == "__main__":
main()