import streamlit as st
import time
def go_to(page_name, from_callback=False):
"""
Updates the session_state page and optionally triggers a rerun.
- from_callback=True if called inside an on_click callback
"""
st.session_state.page = page_name
if not from_callback:
st.rerun()
# st.stop()
def add_navigation(previous_page, next_page):
col1, col2, col3 = st.columns([1, 4, 1])
if previous_page is not None:
with col1:
if st.button("Previous"):
go_to(previous_page)
if next_page is not None:
with col3:
if st.button("Next"):
go_to(next_page)
st.markdown("---")
# st.divider()
def add_fadein_text(paragraphs):
# Define CSS fade-in animation
st.markdown("""
""", unsafe_allow_html=True)
# Create one placeholder for each paragraph
placeholders = [st.empty() for _ in paragraphs]
# Reveal paragraphs sequentially
for i, p in enumerate(paragraphs):
placeholders[i].markdown(f"
{p[0]}
", unsafe_allow_html=True)
# time.sleep(p[1]) # delay (seconds) before showing next one
def add_instruction_text(text_to_display):
st.markdown(f"{text_to_display}
", unsafe_allow_html=True)
st.markdown("---")
def add_red_text(text_to_display):
st.markdown(
f"{text_to_display}
",
unsafe_allow_html=True,
)
# Define pipeline stages
pipeline_data = {
"๐ฅ Data Collection": {
"explain_text": "**๐ฅ Data Collection:** Decisions about what data to collect and how.",
"๐ Data Sources": {
"explain_text": "**๐ Data Sources:** What data sources will be used to collect data?",
"sub_decisions": [
"Collect existing dataset or new sensor data?",
"Public datasets or Private datasets?",
"Design Web Scraping or use APIs?"
]
},
"๐ Data Usage": {
"explain_text": "**๐ Data Usage:** How should the data be used, given any license or permission constraints?",
"sub_decisions": [
"Ethical concerns to be addressed?",
"Commercial use policies?",
"Geographic limits?"
]
},
"๐งน Data Quality": {
"explain_text": "**๐งน Data Quality:** What kind of quality checks are done to decide data collection?",
"sub_decisions": [
"Missing value checks to see if critical field are affected?",
"Potential duplicates?",
"Format consistency and encoding issues?"
]
},
"๐ฒ Data Sampling": {
"explain_text": "**๐ฒ Data Sampling:** How to sample from a potentially bigger data source?",
"sub_decisions": [
"Random sampling/stratified sampling/cluster sampling?",
"Sample size?",
"Potential imbalance?",
"Additional synthetic data?"
]
},
"๐พ Data Storage": {
"explain_text": "**๐พ Data Storage:** How and where to store the data?",
"sub_decisions": [
"Backup frequency?",
"File format choice?"
]
},
},
"โ๏ธ Data Processing": {
"explain_text": "**โ๏ธ Data Processing:** Decisions about how to process and prepare the data.",
"๐งฝ Data Cleaning": {
"explain_text": "**๐งฝ Data Cleaning:** How should raw data be cleaned and standardized?",
"sub_decisions": [
"How to handle missing values?",
"How to detect/remove duplicates?",
"How to fix formatting errors?"
]
},
"๐ฏ Feature Selection": {
"explain_text": "**๐ฏ Feature Selection:** Which features should be included in the model?",
"sub_decisions": [
"Manual vs automated selection?",
"How to check for data leakage?",
"Should dimensionality reduction be applied?"
]
},
"๐ง Feature Engineering": {
"explain_text": "**๐ง Feature Engineering:** How to create or transform features for better performance?",
"sub_decisions": [
"What new features should be created?",
"How to combine existing features?",
"How to encode categorical variables?"
]
},
"๐จ Outlier Handling": {
"explain_text": "**๐จ Outlier Handling:** How to deal with unusual or extreme data points?",
"sub_decisions": [
"Which detection method to use (Z-score, IQR, clustering)?",
"Remove, cap, or keep outliers?"
]
},
"๐ Data Scaling": {
"explain_text": "**๐ Data Scaling:** How to scale or transform features before modeling?",
"sub_decisions": [
"Should Min-Max or Standard scaling be applied?",
"Is log or Box-Cox transformation needed?"
]
}
},
"๐ค Model Selection": {
"explain_text": "**๐ค Model Selection:** Decisions about which model to train and the hyperparameter choices.",
"๐๏ธ Model Architecture": {
"explain_text": "**๐๏ธ Model Architecture:** Which type of model is best suited to the problem?",
"sub_decisions": [
"Linear vs tree-based vs neural networks?",
"How interpretable should the model be?",
"What are computational constraints?"
]
},
"๐ Baseline Model": {
"explain_text": "**๐ Baseline Model:** What simple models can set a performance baseline?",
"sub_decisions": [
"Should a logistic regression or decision tree be used?",
"What baseline metric is most relevant?"
]
},
"๐ง Pre-trained Models": {
"explain_text": "**๐ง Pre-trained Models:** Can existing models be leveraged?",
"sub_decisions": [
"Which pre-trained models are relevant (image, NLP, tabular)?",
"Fine-tune or use as feature extractors?"
]
},
"โก Hyperparams": {
"explain_text": "**โก Hyperparams:** How to optimize model hyperparameters?",
"sub_decisions": [
"Grid search vs random search vs Bayesian?",
"How many trials and folds to run?",
"What budget or time limit applies?"
]
},
"๐ฆ Model Complexity": {
"explain_text": "**๐ฆ Model Complexity:** Is the model efficient enough for deployment?",
"sub_decisions": [
"How many parameters and FLOPs?",
"What is memory usage and latency?",
"Are there deployment constraints (edge vs cloud)?"
]
}
},
"๐๏ธ Model Training": {
"explain_text": "**๐๏ธ Model Training:** Decisions about the training algorithm used.",
"โ๏ธ Data Splitting": {
"explain_text": "**โ๏ธ Data Splitting:** How should data be divided for training and testing?",
"sub_decisions": [
"Train-test split ratio?",
"Cross-validation vs stratified split?"
]
},
"โ๏ธ Loss Function": {
"explain_text": "**โ๏ธ Loss Function:** Which loss function aligns with the task?",
"sub_decisions": [
"MSE vs MAE vs cross-entropy?",
"Is robustness to outliers needed?",
"Does it align with evaluation metrics?"
]
},
"๐ Optimization": {
"explain_text": "**๐ Optimization:** Which optimization algorithm should be used?",
"sub_decisions": [
"SGD vs Adam vs RMSProp?",
"What learning rate schedule?",
"What batch size?"
]
},
"๐ก๏ธ Regularization": {
"explain_text": "**๐ก๏ธ Regularization:** How to prevent overfitting?",
"sub_decisions": [
"L1 vs L2 regularization?",
"Dropout rate?",
"Should early stopping be applied?"
]
},
"๐ Training Monitoring": {
"explain_text": "**๐ Training Monitoring:** How to track and manage training progress?",
"sub_decisions": [
"Which metrics should be monitored?",
"How often to checkpoint models?"
]
}
},
"๐ Model Evaluation": {
"explain_text": "**๐ Model Evaluation:** Decisions about the evaluation criteria.",
"๐ Evaluation Metric": {
"explain_text": "**๐ Evaluation Metric:** Which metrics best reflect model performance?",
"sub_decisions": [
"Accuracy vs Precision/Recall/F1?",
"How to handle class imbalance?",
"Including probabilistic metrics (AUC, log loss)?"
]
},
"๐งช Test Data": {
"explain_text": "**๐งช Test Data:** How should testing be performed?",
"sub_decisions": [
"Hold-out set vs cross-validation?",
"An external test dataset?"
]
},
"โ๏ธ Fairness": {
"explain_text": "**โ๏ธ Fairness:** How to ensure fairness across groups?",
"sub_decisions": [
"Which fairness metric to use (demographic parity, equalized odds)?",
"How to detect bias in predictions?"
]
},
"๐ ๏ธ Robustness": {
"explain_text": "**๐ ๏ธ Robustness:** How reliable is the model under stress?",
"sub_decisions": [
"How does the model handle noisy inputs?",
"How to test against distribution shifts?"
]
},
"๐ Interpretability": {
"explain_text": "**๐ Interpretability:** How understandable are the model predictions?",
"sub_decisions": [
"Which methods to use (feature importance, SHAP, LIME)?",
"How stable are explanations?",
"Are explanations actionable for stakeholders?"
]
}
}
}