import streamlit as st
import time

def go_to(page_name, from_callback=False):
    """
    Updates the session_state page and optionally triggers a rerun.
    - from_callback=True if called inside an on_click callback
    """
    st.session_state.page = page_name
    if not from_callback:
        st.rerun()
    # st.stop()

def add_navigation(previous_page, next_page):
    col1, col2, col3 = st.columns([1, 4, 1])

    if previous_page is not None:
        with col1:
            if st.button("Previous"):
                go_to(previous_page)

    if next_page is not None:
        with col3:
            if st.button("Next"):
                go_to(next_page)

    st.markdown("---")
    # st.divider()

def add_fadein_text(paragraphs):
    # Define CSS fade-in animation
    st.markdown("""
        <style>
        @keyframes fadeIn {
            from {opacity: 0;}
            to {opacity: 1;}
        }
        .fade-in {
            animation: fadeIn 1.5s ease-in forwards;
            opacity: 0;
            margin-bottom: 1em;
            font-size: 24px;
            color: #A6C8FF;
            text-align: left;
        }
        </style>
    """, unsafe_allow_html=True)

    # Create one placeholder for each paragraph
    placeholders = [st.empty() for _ in paragraphs]

    # Reveal paragraphs sequentially
    for i, p in enumerate(paragraphs):
        placeholders[i].markdown(f"<div class='fade-in'>{p[0]}</div>", unsafe_allow_html=True)
        time.sleep(p[1])  # delay (seconds) before showing next one

def add_instruction_text(text_to_display):
    st.markdown(f"<div style='text-align: center; font-size:18px; color:gray;'>{text_to_display}</div>", unsafe_allow_html=True)

    st.markdown("---")

def add_red_text(text_to_display):
    st.markdown(
        f"<div style='text-align:center; color:#c0392b; font-size:18px;'>{text_to_display}<br></div>",
        unsafe_allow_html=True,
    )

# Define pipeline stages
pipeline_data = {
    "Data Collection": {
        "explain_text": "**Data Collection:** Decisions about what data to collect and how.",
        "Data Sources": {
            "explain_text": "**Data Sources:** What data sources will be used to collect data?",
            "sub_decisions": ["Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?"]
        },
        "Data Usage": {
            "explain_text": "**Data Usage:** How should the data be used, given any license or permission constraints?",
            "sub_decisions": ["Ethical concerns to be addressed?", "Commercial use policies?", "Geographic limits?"]
        },
        "Data Quality": {
            "explain_text": "**Data Quality:** What kind of quality checks are done to decide data collection?",
            "sub_decisions": ["Missing value checks to see if critical field are affected?", "Potential duplicates?", "Format consistency and encoding issues?"]
        },
        "Data Sampling": {
            "explain_text": "**Data Sampling:** How to sample from a potentially bigger data source?",
            "sub_decisions": ["Random sampling/stratified sampling/cluster sampling?", "Sample size?", "Potential imbalance?", "Additional synthetic data?"]
        },
        "Data Storage": {
            "explain_text": "**Data Storage:** How and where to store the data?",
            "sub_decisions": ["Backup frequency?", "File format choice?"]
        },
    },

    "Data Processing": {
        "explain_text": "**Data Processing:** Decisions about how to process and prepare the data.",
        "Data Cleaning": {
            "explain_text": "**Data Cleaning:** How should raw data be cleaned and standardized?",
            "sub_decisions": ["How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?"]
        },
        "Feature Selection": {
            "explain_text": "**Feature Selection:** Which features should be included in the model?",
            "sub_decisions": ["Manual vs automated selection?", "How to check for data leakage?", "Should dimensionality reduction be applied?"]
        },
        "Feature Engineering": {
            "explain_text": "**Feature Engineering:** How to create or transform features for better performance?",
            "sub_decisions": ["What new features should be created?", "How to combine existing features?", "How to encode categorical variables?"]
        },
        "Outlier Handling": {
            "explain_text": "**Outlier Handling:** How to deal with unusual or extreme data points?",
            "sub_decisions": ["Which detection method to use (Z-score, IQR, clustering)?", "Remove, cap, or keep outliers?"]
        },
        "Data Scaling": {
            "explain_text": "**Data Scaling:** How to scale or transform features before modeling?",
            "sub_decisions": ["Should Min-Max or Standard scaling be applied?", "Is log or Box-Cox transformation needed?"]
        }
    },

    "Model Selection": {
        "explain_text": "**Model Selection:** Decisions about which model to train and the hyperparameter choices.",
        "Model Architecture": {
            "explain_text": "**Model Architecture:** Which type of model is best suited to the problem?",
            "sub_decisions": ["Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?"]
        },
        "Baseline Model": {
            "explain_text": "**Baseline Model:** What simple models can set a performance baseline?",
            "sub_decisions": ["Should a logistic regression or decision tree be used?", "What baseline metric is most relevant?"]
        },
        "Pre-trained Models": {
            "explain_text": "**Pre-trained Models:** Can existing models be leveraged?",
            "sub_decisions": ["Which pre-trained models are relevant (image, NLP, tabular)?", "Fine-tune or use as feature extractors?"]
        },
        "Hyperparameters": {
            "explain_text": "**Hyperparameters:** How to optimize model hyperparameters?",
            "sub_decisions": ["Grid search vs random search vs Bayesian?", "How many trials and folds to run?", "What budget or time limit applies?"]
        },
        "Model Complexity": {
            "explain_text": "**Model Complexity:** Is the model efficient enough for deployment?",
            "sub_decisions": ["How many parameters and FLOPs?", "What is memory usage and latency?", "Are there deployment constraints (edge vs cloud)?"]
        }
    },

    "Model Training": {
        "explain_text": "**Model Training:** Decisions about the training algorithm used.",
        "Data Splitting": {
            "explain_text": "**Data Splitting:** How should data be divided for training and testing?",
            "sub_decisions": ["Train-test split ratio?", "Cross-validation vs stratified split?"]
        },
        "Loss Function": {
            "explain_text": "**Loss Function:** Which loss function aligns with the task?",
            "sub_decisions": ["MSE vs MAE vs cross-entropy?", "Is robustness to outliers needed?", "Does it align with evaluation metrics?"]
        },
        "Optimization Method": {
            "explain_text": "**Optimization Method:** Which optimization algorithm should be used?",
            "sub_decisions": ["SGD vs Adam vs RMSProp?", "What learning rate schedule?", "What batch size?"]
        },
        "Regularization": {
            "explain_text": "**Regularization:** How to prevent overfitting?",
            "sub_decisions": ["L1 vs L2 regularization?", "Dropout rate?", "Should early stopping be applied?"]
        },
        "Training Monitoring": {
            "explain_text": "**Training Monitoring:** How to track and manage training progress?",
            "sub_decisions": ["Which metrics should be monitored?", "How often to checkpoint models?"]
        }
    },

    "Model Evaluation": {
        "explain_text": "**Model Evaluation:** Decisions about the evaluation criteria.",
        "Evaluation Metric": {
            "explain_text": "**Evaluation Metric:** Which metrics best reflect model performance?",
            "sub_decisions": ["Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?"]
        },
        "Test Data": {
            "explain_text": "**Test Data:** How should testing be performed?",
            "sub_decisions": ["Hold-out set vs cross-validation?", "An external test dataset?"]
        },
        "Fairness": {
            "explain_text": "**Fairness:** How to ensure fairness across groups?",
            "sub_decisions": ["Which fairness metric to use (demographic parity, equalized odds)?", "How to detect bias in predictions?"]
        },
        "Robustness": {
            "explain_text": "**Robustness:** How reliable is the model under stress?",
            "sub_decisions": ["How does the model handle noisy inputs?", "How to test against distribution shifts?"]
        },
        "Interpretability": {
            "explain_text": "**Interpretability:** How understandable are the model predictions?",
            "sub_decisions": ["Which methods to use (feature importance, SHAP, LIME)?", "How stable are explanations?", "Are explanations actionable for stakeholders?"]
        }
    }
}