import streamlit as st import time def go_to(page_name, from_callback=False): """ Updates the session_state page and optionally triggers a rerun. - from_callback=True if called inside an on_click callback """ st.session_state.page = page_name if not from_callback: st.rerun() # st.stop() def add_navigation(previous_page, next_page): col1, col2, col3 = st.columns([1, 4, 1]) if previous_page is not None: with col1: if st.button("Previous"): go_to(previous_page) if next_page is not None: with col3: if st.button("Next"): go_to(next_page) st.markdown("---") # st.divider() def add_fadein_text(paragraphs): # Define CSS fade-in animation st.markdown(""" """, unsafe_allow_html=True) # Create one placeholder for each paragraph placeholders = [st.empty() for _ in paragraphs] # Reveal paragraphs sequentially for i, p in enumerate(paragraphs): placeholders[i].markdown(f"
{p[0]}
", unsafe_allow_html=True) # time.sleep(p[1]) # delay (seconds) before showing next one def add_instruction_text(text_to_display): st.markdown(f"
{text_to_display}
", unsafe_allow_html=True) st.markdown("---") def add_red_text(text_to_display): st.markdown( f"
{text_to_display}
", unsafe_allow_html=True, ) # Define pipeline stages pipeline_data = { "๐Ÿ“ฅ Data Collection": { "explain_text": "**๐Ÿ“ฅ Data Collection:** Decisions about what data to collect and how.", "๐Ÿ“Š Data Sources": { "explain_text": "**๐Ÿ“Š Data Sources:** What data sources will be used to collect data?", "sub_decisions": [ "Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?" ] }, "๐Ÿ“œ Data Usage": { "explain_text": "**๐Ÿ“œ Data Usage:** How should the data be used, given any license or permission constraints?", "sub_decisions": [ "Ethical concerns to be addressed?", "Commercial use policies?", "Geographic limits?" ] }, "๐Ÿงน Data Quality": { "explain_text": "**๐Ÿงน Data Quality:** What kind of quality checks are done to decide data collection?", "sub_decisions": [ "Missing value checks to see if critical field are affected?", "Potential duplicates?", "Format consistency and encoding issues?" ] }, "๐ŸŽฒ Data Sampling": { "explain_text": "**๐ŸŽฒ Data Sampling:** How to sample from a potentially bigger data source?", "sub_decisions": [ "Random sampling/stratified sampling/cluster sampling?", "Sample size?", "Potential imbalance?", "Additional synthetic data?" ] }, "๐Ÿ’พ Data Storage": { "explain_text": "**๐Ÿ’พ Data Storage:** How and where to store the data?", "sub_decisions": [ "Backup frequency?", "File format choice?" ] }, }, "โš™๏ธ Data Processing": { "explain_text": "**โš™๏ธ Data Processing:** Decisions about how to process and prepare the data.", "๐Ÿงฝ Data Cleaning": { "explain_text": "**๐Ÿงฝ Data Cleaning:** How should raw data be cleaned and standardized?", "sub_decisions": [ "How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?" ] }, "๐ŸŽฏ Feature Selection": { "explain_text": "**๐ŸŽฏ Feature Selection:** Which features should be included in the model?", "sub_decisions": [ "Manual vs automated selection?", "How to check for data leakage?", "Should dimensionality reduction be applied?" ] }, "๐Ÿ”ง Feature Engineering": { "explain_text": "**๐Ÿ”ง Feature Engineering:** How to create or transform features for better performance?", "sub_decisions": [ "What new features should be created?", "How to combine existing features?", "How to encode categorical variables?" ] }, "๐Ÿšจ Outlier Handling": { "explain_text": "**๐Ÿšจ Outlier Handling:** How to deal with unusual or extreme data points?", "sub_decisions": [ "Which detection method to use (Z-score, IQR, clustering)?", "Remove, cap, or keep outliers?" ] }, "๐Ÿ“ Data Scaling": { "explain_text": "**๐Ÿ“ Data Scaling:** How to scale or transform features before modeling?", "sub_decisions": [ "Should Min-Max or Standard scaling be applied?", "Is log or Box-Cox transformation needed?" ] } }, "๐Ÿค– Model Selection": { "explain_text": "**๐Ÿค– Model Selection:** Decisions about which model to train and the hyperparameter choices.", "๐Ÿ—๏ธ Model Architecture": { "explain_text": "**๐Ÿ—๏ธ Model Architecture:** Which type of model is best suited to the problem?", "sub_decisions": [ "Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?" ] }, "๐Ÿ“‰ Baseline Model": { "explain_text": "**๐Ÿ“‰ Baseline Model:** What simple models can set a performance baseline?", "sub_decisions": [ "Should a logistic regression or decision tree be used?", "What baseline metric is most relevant?" ] }, "๐Ÿง  Pre-trained Models": { "explain_text": "**๐Ÿง  Pre-trained Models:** Can existing models be leveraged?", "sub_decisions": [ "Which pre-trained models are relevant (image, NLP, tabular)?", "Fine-tune or use as feature extractors?" ] }, "โšก Hyperparams": { "explain_text": "**โšก Hyperparams:** How to optimize model hyperparameters?", "sub_decisions": [ "Grid search vs random search vs Bayesian?", "How many trials and folds to run?", "What budget or time limit applies?" ] }, "๐Ÿ“ฆ Model Complexity": { "explain_text": "**๐Ÿ“ฆ Model Complexity:** Is the model efficient enough for deployment?", "sub_decisions": [ "How many parameters and FLOPs?", "What is memory usage and latency?", "Are there deployment constraints (edge vs cloud)?" ] } }, "๐Ÿ‹๏ธ Model Training": { "explain_text": "**๐Ÿ‹๏ธ Model Training:** Decisions about the training algorithm used.", "โœ‚๏ธ Data Splitting": { "explain_text": "**โœ‚๏ธ Data Splitting:** How should data be divided for training and testing?", "sub_decisions": [ "Train-test split ratio?", "Cross-validation vs stratified split?" ] }, "โš–๏ธ Loss Function": { "explain_text": "**โš–๏ธ Loss Function:** Which loss function aligns with the task?", "sub_decisions": [ "MSE vs MAE vs cross-entropy?", "Is robustness to outliers needed?", "Does it align with evaluation metrics?" ] }, "๐Ÿš€ Optimization": { "explain_text": "**๐Ÿš€ Optimization:** Which optimization algorithm should be used?", "sub_decisions": [ "SGD vs Adam vs RMSProp?", "What learning rate schedule?", "What batch size?" ] }, "๐Ÿ›ก๏ธ Regularization": { "explain_text": "**๐Ÿ›ก๏ธ Regularization:** How to prevent overfitting?", "sub_decisions": [ "L1 vs L2 regularization?", "Dropout rate?", "Should early stopping be applied?" ] }, "๐Ÿ“Š Training Monitoring": { "explain_text": "**๐Ÿ“Š Training Monitoring:** How to track and manage training progress?", "sub_decisions": [ "Which metrics should be monitored?", "How often to checkpoint models?" ] } }, "๐Ÿ“ˆ Model Evaluation": { "explain_text": "**๐Ÿ“ˆ Model Evaluation:** Decisions about the evaluation criteria.", "๐Ÿ“ Evaluation Metric": { "explain_text": "**๐Ÿ“ Evaluation Metric:** Which metrics best reflect model performance?", "sub_decisions": [ "Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?" ] }, "๐Ÿงช Test Data": { "explain_text": "**๐Ÿงช Test Data:** How should testing be performed?", "sub_decisions": [ "Hold-out set vs cross-validation?", "An external test dataset?" ] }, "โš–๏ธ Fairness": { "explain_text": "**โš–๏ธ Fairness:** How to ensure fairness across groups?", "sub_decisions": [ "Which fairness metric to use (demographic parity, equalized odds)?", "How to detect bias in predictions?" ] }, "๐Ÿ› ๏ธ Robustness": { "explain_text": "**๐Ÿ› ๏ธ Robustness:** How reliable is the model under stress?", "sub_decisions": [ "How does the model handle noisy inputs?", "How to test against distribution shifts?" ] }, "๐Ÿ” Interpretability": { "explain_text": "**๐Ÿ” Interpretability:** How understandable are the model predictions?", "sub_decisions": [ "Which methods to use (feature importance, SHAP, LIME)?", "How stable are explanations?", "Are explanations actionable for stakeholders?" ] } } }