import streamlit as st
import time

def go_to(page_name, from_callback=False):
    """
    Updates the session_state page and optionally triggers a rerun.
    - from_callback=True if called inside an on_click callback
    """
    st.session_state.page = page_name
    if not from_callback:
        st.rerun()
    # st.stop()

def add_navigation(previous_page, next_page):
    col1, col2, col3 = st.columns([1, 4, 1])

    if previous_page is not None:
        with col1:
            if st.button("Previous"):
                go_to(previous_page)

    if next_page is not None:
        with col3:
            if st.button("Next"):
                go_to(next_page)

    st.markdown("---")
    # st.divider()

def add_fadein_text(paragraphs):
    # Define CSS fade-in animation
    st.markdown("""
        <style>
        @keyframes fadeIn {
            from {opacity: 0;}
            to {opacity: 1;}
        }
        .fade-in {
            animation: fadeIn 1.5s ease-in forwards;
            opacity: 0;
            margin-bottom: 1em;
            font-size: 24px;
            color: #A6C8FF;
            text-align: left;
        }
        </style>
    """, unsafe_allow_html=True)

    # Create one placeholder for each paragraph
    placeholders = [st.empty() for _ in paragraphs]

    # Reveal paragraphs sequentially
    for i, p in enumerate(paragraphs):
        placeholders[i].markdown(f"<div class='fade-in' style='animation-delay:{p[1]}s'>{p[0]}</div>", unsafe_allow_html=True)
        # time.sleep(p[1])  # delay (seconds) before showing next one

def add_instruction_text(text_to_display):
    st.markdown(f"<div style='text-align: center; font-size:18px; color:gray;'>{text_to_display}</div>", unsafe_allow_html=True)

    st.markdown("---")

def add_red_text(text_to_display):
    st.markdown(
        f"<div style='text-align:center; color:#c0392b; font-size:18px;'>{text_to_display}<br></div>",
        unsafe_allow_html=True,
    )

# Define pipeline stages
pipeline_data = {
    "📥 Data Collection": {
        "explain_text": "**📥 Data Collection:** Decisions about what data to collect and how.",
        "📊 Data Sources": {
            "explain_text": "**📊 Data Sources:** What data sources will be used to collect data?",
            "sub_decisions": [
                "Collect existing dataset or new sensor data?",
                "Public datasets or Private datasets?",
                "Design Web Scraping or use APIs?"
            ]
        },
        "📜 Data Usage": {
            "explain_text": "**📜 Data Usage:** How should the data be used, given any license or permission constraints?",
            "sub_decisions": [
                "Ethical concerns to be addressed?",
                "Commercial use policies?",
                "Geographic limits?"
            ]
        },
        "🧹 Data Quality": {
            "explain_text": "**🧹 Data Quality:** What kind of quality checks are done to decide data collection?",
            "sub_decisions": [
                "Missing value checks to see if critical field are affected?",
                "Potential duplicates?",
                "Format consistency and encoding issues?"
            ]
        },
        "🎲 Data Sampling": {
            "explain_text": "**🎲 Data Sampling:** How to sample from a potentially bigger data source?",
            "sub_decisions": [
                "Random sampling/stratified sampling/cluster sampling?",
                "Sample size?",
                "Potential imbalance?",
                "Additional synthetic data?"
            ]
        },
        "💾 Data Storage": {
            "explain_text": "**💾 Data Storage:** How and where to store the data?",
            "sub_decisions": [
                "Backup frequency?",
                "File format choice?"
            ]
        },
    },

    "⚙️ Data Processing": {
        "explain_text": "**⚙️ Data Processing:** Decisions about how to process and prepare the data.",
        "🧽 Data Cleaning": {
            "explain_text": "**🧽 Data Cleaning:** How should raw data be cleaned and standardized?",
            "sub_decisions": [
                "How to handle missing values?",
                "How to detect/remove duplicates?",
                "How to fix formatting errors?"
            ]
        },
        "🎯 Feature Selection": {
            "explain_text": "**🎯 Feature Selection:** Which features should be included in the model?",
            "sub_decisions": [
                "Manual vs automated selection?",
                "How to check for data leakage?",
                "Should dimensionality reduction be applied?"
            ]
        },
        "🔧 Feature Engineering": {
            "explain_text": "**🔧 Feature Engineering:** How to create or transform features for better performance?",
            "sub_decisions": [
                "What new features should be created?",
                "How to combine existing features?",
                "How to encode categorical variables?"
            ]
        },
        "🚨 Outlier Handling": {
            "explain_text": "**🚨 Outlier Handling:** How to deal with unusual or extreme data points?",
            "sub_decisions": [
                "Which detection method to use (Z-score, IQR, clustering)?",
                "Remove, cap, or keep outliers?"
            ]
        },
        "📏 Data Scaling": {
            "explain_text": "**📏 Data Scaling:** How to scale or transform features before modeling?",
            "sub_decisions": [
                "Should Min-Max or Standard scaling be applied?",
                "Is log or Box-Cox transformation needed?"
            ]
        }
    },

    "🤖 Model Selection": {
        "explain_text": "**🤖 Model Selection:** Decisions about which model to train and the hyperparameter choices.",
        "🏗️ Model Architecture": {
            "explain_text": "**🏗️ Model Architecture:** Which type of model is best suited to the problem?",
            "sub_decisions": [
                "Linear vs tree-based vs neural networks?",
                "How interpretable should the model be?",
                "What are computational constraints?"
            ]
        },
        "📉 Baseline Model": {
            "explain_text": "**📉 Baseline Model:** What simple models can set a performance baseline?",
            "sub_decisions": [
                "Should a logistic regression or decision tree be used?",
                "What baseline metric is most relevant?"
            ]
        },
        "🧠 Pre-trained Models": {
            "explain_text": "**🧠 Pre-trained Models:** Can existing models be leveraged?",
            "sub_decisions": [
                "Which pre-trained models are relevant (image, NLP, tabular)?",
                "Fine-tune or use as feature extractors?"
            ]
        },
        "⚡ Hyperparams": {
            "explain_text": "**⚡ Hyperparams:** How to optimize model hyperparameters?",
            "sub_decisions": [
                "Grid search vs random search vs Bayesian?",
                "How many trials and folds to run?",
                "What budget or time limit applies?"
            ]
        },
        "📦 Model Complexity": {
            "explain_text": "**📦 Model Complexity:** Is the model efficient enough for deployment?",
            "sub_decisions": [
                "How many parameters and FLOPs?",
                "What is memory usage and latency?",
                "Are there deployment constraints (edge vs cloud)?"
            ]
        }
    },

    "🏋️ Model Training": {
        "explain_text": "**🏋️ Model Training:** Decisions about the training algorithm used.",
        "✂️ Data Splitting": {
            "explain_text": "**✂️ Data Splitting:** How should data be divided for training and testing?",
            "sub_decisions": [
                "Train-test split ratio?",
                "Cross-validation vs stratified split?"
            ]
        },
        "⚖️ Loss Function": {
            "explain_text": "**⚖️ Loss Function:** Which loss function aligns with the task?",
            "sub_decisions": [
                "MSE vs MAE vs cross-entropy?",
                "Is robustness to outliers needed?",
                "Does it align with evaluation metrics?"
            ]
        },
        "🚀 Optimization": {
            "explain_text": "**🚀 Optimization:** Which optimization algorithm should be used?",
            "sub_decisions": [
                "SGD vs Adam vs RMSProp?",
                "What learning rate schedule?",
                "What batch size?"
            ]
        },
        "🛡️ Regularization": {
            "explain_text": "**🛡️ Regularization:** How to prevent overfitting?",
            "sub_decisions": [
                "L1 vs L2 regularization?",
                "Dropout rate?",
                "Should early stopping be applied?"
            ]
        },
        "📊 Training Monitoring": {
            "explain_text": "**📊 Training Monitoring:** How to track and manage training progress?",
            "sub_decisions": [
                "Which metrics should be monitored?",
                "How often to checkpoint models?"
            ]
        }
    },

    "📈 Model Evaluation": {
        "explain_text": "**📈 Model Evaluation:** Decisions about the evaluation criteria.",
        "📏 Evaluation Metric": {
            "explain_text": "**📏 Evaluation Metric:** Which metrics best reflect model performance?",
            "sub_decisions": [
                "Accuracy vs Precision/Recall/F1?",
                "How to handle class imbalance?",
                "Including probabilistic metrics (AUC, log loss)?"
            ]
        },
        "🧪 Test Data": {
            "explain_text": "**🧪 Test Data:** How should testing be performed?",
            "sub_decisions": [
                "Hold-out set vs cross-validation?",
                "An external test dataset?"
            ]
        },
        "⚖️ Fairness": {
            "explain_text": "**⚖️ Fairness:** How to ensure fairness across groups?",
            "sub_decisions": [
                "Which fairness metric to use (demographic parity, equalized odds)?",
                "How to detect bias in predictions?"
            ]
        },
        "🛠️ Robustness": {
            "explain_text": "**🛠️ Robustness:** How reliable is the model under stress?",
            "sub_decisions": [
                "How does the model handle noisy inputs?",
                "How to test against distribution shifts?"
            ]
        },
        "🔍 Interpretability": {
            "explain_text": "**🔍 Interpretability:** How understandable are the model predictions?",
            "sub_decisions": [
                "Which methods to use (feature importance, SHAP, LIME)?",
                "How stable are explanations?",
                "Are explanations actionable for stakeholders?"
            ]
        }
    }
}