import streamlit as st
import streamlit.components.v1 as components
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import io
import lasio
import os
from typing import Optional, Tuple, Dict, Any, List
import datetime
import copy

# ------------------------------------
# CONFIGURATION & CONSTANTS
# ------------------------------------
st.set_page_config(
    page_title="Well Log Analysis Pro",
    layout="wide",
    page_icon="🛢️",
    initial_sidebar_state="expanded"
)

# ------------------------------------
# CLASS: DATA MANAGER
# ------------------------------------
class DataManager:
    """Handles data ingestion, cleaning, and export."""

    @staticmethod
    @st.cache_data(show_spinner=False)
    def load_las(file_bytes: bytes) -> Tuple[pd.DataFrame, lasio.LASFile]:
        """
        Robust LAS file loader that handles encoding issues.
        Returns cleaned dataframe and the raw LAS object.
        """
        str_data = None
        # Try common encodings
        for encoding in ["utf-8", "latin-1", "cp1252"]:
            try:
                str_data = file_bytes.decode(encoding)
                break
            except UnicodeDecodeError:
                continue
        
        if str_data is None:
            raise ValueError("Unable to decode LAS file with supported encodings.")

        file_buffer = io.StringIO(str_data)
        las = lasio.read(file_buffer)
        df = las.df().reset_index()

        # Standardize Depth Column
        found_depth = False
        for col in df.columns:
            if col.lower() in ["depth", "dept", "depth.m", "depth.ft", "depta"]:
                df = df.rename(columns={col: "Depth"})
                found_depth = True
                break
        
        if not found_depth:
            # Fallback: check index name if it wasn't a column
            if las.index_unit and "m" in las.index_unit.lower():
                 df["Depth"] = df.index
        
        return df, las

    @staticmethod
    @st.cache_data(show_spinner=False)
    def clean_data(df: pd.DataFrame) -> pd.DataFrame:
        """Removes infinite values and standardizes NaNs."""
        df = df.replace([np.inf, -np.inf], np.nan)
        # Drop rows only if ALL columns are NaN (rare) or specific cleanup logic
        # For ML, we usually drop rows with NaNs in feature columns during training
        return df

    @staticmethod
    def export_to_las(df: pd.DataFrame, original_las: Optional[lasio.LASFile] = None) -> bytes:
        """
        Converts DataFrame back to LAS format bytes for download.
        If original_las is provided, it preserves headers.
        """
        new_las = lasio.LASFile()

        if original_las:
            # FIX: Do not assign .header directly (read-only property).
            # Instead, copy sections item by item.
            
            # Copy WELL information (Metadata)
            for item in original_las.well:
                # We use deepcopy to ensure we don't link to the session state object
                new_las.well[item.mnemonic] = copy.deepcopy(item)
            
            # Copy PARAMETER information
            for item in original_las.params:
                new_las.params[item.mnemonic] = copy.deepcopy(item)
                
            # Copy Other info (often just text)
            new_las.other = original_las.other
        else:
            # Set default date if no original file
            new_las.well.DATE = str(datetime.datetime.now())
        
        # Add curves from the DataFrame
        for col in df.columns:
            unit = "UNKNOWN"
            # Try to preserve units from original LAS if available
            if original_las:
                for curve in original_las.curves:
                    if curve.mnemonic == col:
                        unit = curve.unit
                        break
            
            new_las.append_curve(col, df[col].values, unit=unit)

        # Write to string buffer then encode
        output = io.StringIO()
        new_las.write(output)
        return output.getvalue().encode("utf-8")

# ------------------------------------
# CLASS: MODEL MANAGER
# ------------------------------------
class ModelManager:
    """Handles Machine Learning operations."""

    MODELS = {
        "Linear Regression": LinearRegression,
        "Decision Tree": DecisionTreeRegressor,
        "Random Forest": RandomForestRegressor,
        "Gradient Boosting": GradientBoostingRegressor,
        "SVR": SVR
    }

    @staticmethod
    def train(
        df: pd.DataFrame, 
        target: str, 
        features: List[str], 
        algorithm: str, 
        params: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Trains a model and returns the artifact dictionary."""
        
        # Prepare Data: Drop rows where Target or Features are NaN
        train_df = df.dropna(subset=[target] + features)
        
        if len(train_df) < 50:
            raise ValueError("Not enough data points to train (requires > 50).")

        X = train_df[features]
        y = train_df[target]

        # Initialize Model
        model_class = ModelManager.MODELS[algorithm]
        model = model_class(**params)

        # Train
        model.fit(X, y)
        score = model.score(X, y)

        return {
            "model": model,
            "features": features,
            "target": target,
            "algorithm": algorithm,
            "r2_score": score,
            "training_date": str(datetime.datetime.now())
        }

# ------------------------------------
# CLASS: VISUALIZER
# ------------------------------------
class Visualizer:
    """Handles Plotly visualizations."""

    @staticmethod
    def plot_well_logs(df: pd.DataFrame, x_cols: List[str], depth_col: str) -> go.Figure:
        rows = 1
        cols = len(x_cols)
        fig = make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=x_cols)

        for i, col in enumerate(x_cols):
            fig.add_trace(
                go.Scatter(x=df[col], y=df[depth_col], mode="lines", name=col),
                row=1, col=i + 1
            )
        
        fig.update_yaxes(title_text=depth_col, autorange="reversed", row=1, col=1)
        fig.update_layout(height=800, showlegend=False, title_text="Well Log Viewer", template="plotly_white")
        return fig

    @staticmethod
    def plot_crossplot(y_true: pd.Series, y_pred: pd.Series, title: str) -> go.Figure:
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=y_true, 
            y=y_pred, 
            mode='markers', 
            marker=dict(color='blue', opacity=0.5, size=6),
            name='Data'
        ))
        
        # 1:1 Line
        min_val = min(y_true.min(), y_pred.min())
        max_val = max(y_true.max(), y_pred.max())
        fig.add_shape(
            type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val,
            line=dict(color="red", dash="dash", width=2)
        )

        fig.update_layout(
            title=title,
            xaxis_title="Actual",
            yaxis_title="Predicted",
            height=600,
            template="plotly_white"
        )
        return fig

# ------------------------------------
# MAIN APPLICATION LOGIC
# ------------------------------------
def main():
    
    # --- Session State Initialization ---
    if "data" not in st.session_state:
        st.session_state.data = None
    if "las_object" not in st.session_state:
        st.session_state.las_object = None

    # --- Sidebar ---
    with st.sidebar:
        st.title("Navigation")
        page = st.radio("Go to", ["Home", "Visualizer", "Trainer", "Prediction"])
        st.markdown("---")
        st.caption("Pro Version 2.0")

    # --- Page Routing ---
    if page == "Home":
        render_home()
    elif page == "Visualizer":
        render_visualizer()
    elif page == "Trainer":
        render_trainer()
    elif page == "Prediction":
        render_prediction()

# ------------------------------------
# PAGE RENDERERS
# ------------------------------------
def render_home():
    st.title("🛢️ Well Log Analysis Pro")
    st.markdown("### Production-Grade Petrophysics ML Tool")
    
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.info("Upload standard `.LAS` files or `.CSV` data to begin.")
        uploaded_file = st.file_uploader("Upload File", type=["csv", "las"])
        
        if uploaded_file:
            try:
                if uploaded_file.name.lower().endswith(".las"):
                    bytes_data = uploaded_file.read()
                    df, las_obj = DataManager.load_las(bytes_data)
                    st.session_state.las_object = las_obj
                else:
                    df = pd.read_csv(uploaded_file)
                    st.session_state.las_object = None # No LAS header for CSVs
                
                # Global Clean
                df = DataManager.clean_data(df)
                st.session_state.data = df
                
                st.success(f"Loaded **{uploaded_file.name}** successfully!")
                st.write(f"**Shape:** {df.shape[0]} rows, {df.shape[1]} columns")
                st.dataframe(df.head(), use_container_width=True)
                
            except Exception as e:
                st.error(f"Failed to load file: {str(e)}")

    with col2:
        st.subheader("Quick Guide")
        st.markdown("""
        1. **Upload** data here.
        2. **Visualize** logs to check quality.
        3. **Train** a model on existing curves.
        4. **Predict** missing curves or export synthetic logs.
        """)

def render_visualizer():
    if st.session_state.data is None:
        st.warning("Please upload data on the Home page first.")
        return

    st.title("📊 Log Visualizer")
    df = st.session_state.data
    columns = df.columns.tolist()

    c1, c2 = st.columns([1, 3])
    with c1:
        depth_col = st.selectbox("Depth Column", columns, index=0)
        x_cols = st.multiselect("Logs to Plot", [c for c in columns if c != depth_col], default=columns[1:4] if len(columns)>3 else columns[1:])

    with c2:
        if x_cols:
            fig = Visualizer.plot_well_logs(df, x_cols, depth_col)
            st.plotly_chart(fig, use_container_width=True)

def render_trainer():
    if st.session_state.data is None:
        st.warning("Please upload data first.")
        return

    st.title("🤖 Model Trainer")
    df = st.session_state.data
    columns = df.columns.tolist()

    c1, c2, c3 = st.columns(3)
    target = c1.selectbox("Target (Output)", columns)
    features = c2.multiselect("Features (Input)", [c for c in columns if c != target])
    algo = c3.selectbox("Algorithm", list(ModelManager.MODELS.keys()), index=2)

    with st.expander("Advanced Hyperparameters"):
        n_estimators = st.slider("Trees (RF/GB)", 10, 200, 100)
        max_depth = st.slider("Max Depth", 3, 20, 10)

    if st.button("Train Model", type="primary"):
        if not features:
            st.error("Select features!")
            return
        
        params = {}
        if algo in ["Random Forest", "Gradient Boosting"]:
            params = {"n_estimators": n_estimators, "max_depth": max_depth, "random_state": 42}
        
        with st.spinner("Training model..."):
            try:
                artifact = ModelManager.train(df, target, features, algo, params)
                
                # Save to disk
                joblib.dump(artifact, "well_model.pkl")
                
                st.success(f"Trained {algo} with R²: {artifact['r2_score']:.4f}")
                
                # Create Download for Model
                model_bytes = io.BytesIO()
                joblib.dump(artifact, model_bytes)
                st.download_button(
                    label="⬇️ Download Trained Model (.pkl)",
                    data=model_bytes.getvalue(),
                    file_name="well_log_model.pkl",
                    mime="application/octet-stream"
                )
                
            except Exception as e:
                st.error(f"Training failed: {e}")

def render_prediction():
    st.title("🔮 Prediction & Export")
    
    if st.session_state.data is None:
        st.warning("Upload data first.")
        return

    if not os.path.exists("well_model.pkl"):
        st.warning("No model found. Train one in the 'Trainer' tab or upload a .pkl file.")
        uploaded_model = st.file_uploader("Upload Pre-trained Model (.pkl)", type="pkl")
        if uploaded_model:
            artifact = joblib.load(uploaded_model)
            joblib.dump(artifact, "well_model.pkl")
            st.rerun()
        return

    # Load Model
    artifact = joblib.load("well_model.pkl")
    model = artifact['model']
    feats = artifact['features']
    target = artifact['target']
    
    st.info(f"Loaded Model: **{artifact['algorithm']}** (Predicting: `{target}`)")
    
    df = st.session_state.data.copy()
    
    # Check features
    missing = [f for f in feats if f not in df.columns]
    if missing:
        st.error(f"Missing features in current dataset: {missing}")
        return

    mode = st.radio("Prediction Mode", ["Predict New Curve (Overwrite)", "Fill Gaps (Imputation)"])
    
    if st.button("Generate Prediction", type="primary"):
        X = df[feats]
        
        # Handle NaNs in input features for prediction
        # Option 1: Drop rows (simple) 
        # Option 2: Simple Impute (mean) - using simple drop for safety in production
        valid_indices = X.dropna().index
        
        if len(valid_indices) == 0:
            st.error("Input features contain too many NaNs. Cannot predict.")
            return

        X_valid = X.loc[valid_indices]
        preds = model.predict(X_valid)
        
        output_col = f"PRED_{target}"
        
        if mode == "Predict New Curve (Overwrite)":
            df[output_col] = np.nan
            df.loc[valid_indices, output_col] = preds
            display_col = output_col
        else:
            # Imputation Mode
            if target not in df.columns:
                st.error(f"Target column {target} not found for imputation.")
                return
            
            df[output_col] = df[target].copy()
            # Find where target is NaN but we have valid inputs
            target_nans = df[df[target].isna()].index
            impute_indices = list(set(target_nans) & set(valid_indices))
            
            if impute_indices:
                X_impute = df.loc[impute_indices, feats]
                impute_preds = model.predict(X_impute)
                df.loc[impute_indices, output_col] = impute_preds
                st.success(f"Filled {len(impute_indices)} gaps.")
            else:
                st.warning("No gaps found that could be filled with available features.")
            display_col = output_col

        # Visual Comparison
        st.subheader("Result Preview")
        
        # Plot
        fig = go.Figure()
        if "Depth" in df.columns:
            fig.add_trace(go.Scatter(x=df[display_col], y=df["Depth"], name="Prediction", line=dict(color='red', dash='dot')))
            if target in df.columns:
                fig.add_trace(go.Scatter(x=df[target], y=df["Depth"], name="Original", line=dict(color='black', width=1)))
            fig.update_yaxes(autorange="reversed", title="Depth")
        else:
             fig.add_trace(go.Scatter(y=df[display_col], mode='lines', name="Prediction"))
        
        st.plotly_chart(fig, use_container_width=True)

        # Export Options
        st.divider()
        st.subheader("Export Data")
        
        c1, c2 = st.columns(2)
        with c1:
            csv_data = df.to_csv(index=False).encode('utf-8')
            st.download_button("Download CSV", csv_data, "results.csv", "text/csv")
        
        with c2:
            try:
                las_bytes = DataManager.export_to_las(df, st.session_state.las_object)
                st.download_button("Download LAS", las_bytes, "results.las", "application/octet-stream")
            except Exception as e:
                st.error(f"LAS export unavailable: {e}")

if __name__ == "__main__":
    main()