| import streamlit as st |
| import time |
|
|
| def go_to(page_name, from_callback=False): |
| """ |
| Updates the session_state page and optionally triggers a rerun. |
| - from_callback=True if called inside an on_click callback |
| """ |
| st.session_state.page = page_name |
| if not from_callback: |
| st.rerun() |
| |
|
|
| def add_navigation(previous_page, next_page): |
| col1, col2, col3 = st.columns([1, 4, 1]) |
|
|
| if previous_page is not None: |
| with col1: |
| if st.button("Previous"): |
| go_to(previous_page) |
|
|
| if next_page is not None: |
| with col3: |
| if st.button("Next"): |
| go_to(next_page) |
|
|
| st.markdown("---") |
| |
|
|
| def add_fadein_text(paragraphs): |
| |
| st.markdown(""" |
| <style> |
| @keyframes fadeIn { |
| from {opacity: 0;} |
| to {opacity: 1;} |
| } |
| .fade-in { |
| animation: fadeIn 1.5s ease-in forwards; |
| opacity: 0; |
| margin-bottom: 1em; |
| font-size: 24px; |
| color: #A6C8FF; |
| text-align: left; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| placeholders = [st.empty() for _ in paragraphs] |
|
|
| |
| for i, p in enumerate(paragraphs): |
| placeholders[i].markdown(f"<div class='fade-in' style='animation-delay:{p[1]}s'>{p[0]}</div>", unsafe_allow_html=True) |
| |
|
|
| def add_instruction_text(text_to_display): |
| st.markdown(f"<div style='text-align: center; font-size:18px; color:gray;'>{text_to_display}</div>", unsafe_allow_html=True) |
|
|
| st.markdown("---") |
|
|
| def add_red_text(text_to_display): |
| st.markdown( |
| f"<div style='text-align:center; color:#c0392b; font-size:18px;'>{text_to_display}<br></div>", |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| pipeline_data = { |
| "π₯ Data Collection": { |
| "explain_text": "**π₯ Data Collection:** Decisions about what data to collect and how.", |
| "π Data Sources": { |
| "explain_text": "**π Data Sources:** What data sources will be used to collect data?", |
| "sub_decisions": [ |
| "Collect existing dataset or new sensor data?", |
| "Public datasets or Private datasets?", |
| "Design Web Scraping or use APIs?" |
| ] |
| }, |
| "π Data Usage": { |
| "explain_text": "**π Data Usage:** How should the data be used, given any license or permission constraints?", |
| "sub_decisions": [ |
| "Ethical concerns to be addressed?", |
| "Commercial use policies?", |
| "Geographic limits?" |
| ] |
| }, |
| "π§Ή Data Quality": { |
| "explain_text": "**π§Ή Data Quality:** What kind of quality checks are done to decide data collection?", |
| "sub_decisions": [ |
| "Missing value checks to see if critical field are affected?", |
| "Potential duplicates?", |
| "Format consistency and encoding issues?" |
| ] |
| }, |
| "π² Data Sampling": { |
| "explain_text": "**π² Data Sampling:** How to sample from a potentially bigger data source?", |
| "sub_decisions": [ |
| "Random sampling/stratified sampling/cluster sampling?", |
| "Sample size?", |
| "Potential imbalance?", |
| "Additional synthetic data?" |
| ] |
| }, |
| "πΎ Data Storage": { |
| "explain_text": "**πΎ Data Storage:** How and where to store the data?", |
| "sub_decisions": [ |
| "Backup frequency?", |
| "File format choice?" |
| ] |
| }, |
| }, |
|
|
| "βοΈ Data Processing": { |
| "explain_text": "**βοΈ Data Processing:** Decisions about how to process and prepare the data.", |
| "π§½ Data Cleaning": { |
| "explain_text": "**π§½ Data Cleaning:** How should raw data be cleaned and standardized?", |
| "sub_decisions": [ |
| "How to handle missing values?", |
| "How to detect/remove duplicates?", |
| "How to fix formatting errors?" |
| ] |
| }, |
| "π― Feature Selection": { |
| "explain_text": "**π― Feature Selection:** Which features should be included in the model?", |
| "sub_decisions": [ |
| "Manual vs automated selection?", |
| "How to check for data leakage?", |
| "Should dimensionality reduction be applied?" |
| ] |
| }, |
| "π§ Feature Engineering": { |
| "explain_text": "**π§ Feature Engineering:** How to create or transform features for better performance?", |
| "sub_decisions": [ |
| "What new features should be created?", |
| "How to combine existing features?", |
| "How to encode categorical variables?" |
| ] |
| }, |
| "π¨ Outlier Handling": { |
| "explain_text": "**π¨ Outlier Handling:** How to deal with unusual or extreme data points?", |
| "sub_decisions": [ |
| "Which detection method to use (Z-score, IQR, clustering)?", |
| "Remove, cap, or keep outliers?" |
| ] |
| }, |
| "π Data Scaling": { |
| "explain_text": "**π Data Scaling:** How to scale or transform features before modeling?", |
| "sub_decisions": [ |
| "Should Min-Max or Standard scaling be applied?", |
| "Is log or Box-Cox transformation needed?" |
| ] |
| } |
| }, |
|
|
| "π€ Model Selection": { |
| "explain_text": "**π€ Model Selection:** Decisions about which model to train and the hyperparameter choices.", |
| "ποΈ Model Architecture": { |
| "explain_text": "**ποΈ Model Architecture:** Which type of model is best suited to the problem?", |
| "sub_decisions": [ |
| "Linear vs tree-based vs neural networks?", |
| "How interpretable should the model be?", |
| "What are computational constraints?" |
| ] |
| }, |
| "π Baseline Model": { |
| "explain_text": "**π Baseline Model:** What simple models can set a performance baseline?", |
| "sub_decisions": [ |
| "Should a logistic regression or decision tree be used?", |
| "What baseline metric is most relevant?" |
| ] |
| }, |
| "π§ Pre-trained Models": { |
| "explain_text": "**π§ Pre-trained Models:** Can existing models be leveraged?", |
| "sub_decisions": [ |
| "Which pre-trained models are relevant (image, NLP, tabular)?", |
| "Fine-tune or use as feature extractors?" |
| ] |
| }, |
| "β‘ Hyperparams": { |
| "explain_text": "**β‘ Hyperparams:** How to optimize model hyperparameters?", |
| "sub_decisions": [ |
| "Grid search vs random search vs Bayesian?", |
| "How many trials and folds to run?", |
| "What budget or time limit applies?" |
| ] |
| }, |
| "π¦ Model Complexity": { |
| "explain_text": "**π¦ Model Complexity:** Is the model efficient enough for deployment?", |
| "sub_decisions": [ |
| "How many parameters and FLOPs?", |
| "What is memory usage and latency?", |
| "Are there deployment constraints (edge vs cloud)?" |
| ] |
| } |
| }, |
|
|
| "ποΈ Model Training": { |
| "explain_text": "**ποΈ Model Training:** Decisions about the training algorithm used.", |
| "βοΈ Data Splitting": { |
| "explain_text": "**βοΈ Data Splitting:** How should data be divided for training and testing?", |
| "sub_decisions": [ |
| "Train-test split ratio?", |
| "Cross-validation vs stratified split?" |
| ] |
| }, |
| "βοΈ Loss Function": { |
| "explain_text": "**βοΈ Loss Function:** Which loss function aligns with the task?", |
| "sub_decisions": [ |
| "MSE vs MAE vs cross-entropy?", |
| "Is robustness to outliers needed?", |
| "Does it align with evaluation metrics?" |
| ] |
| }, |
| "π Optimization": { |
| "explain_text": "**π Optimization:** Which optimization algorithm should be used?", |
| "sub_decisions": [ |
| "SGD vs Adam vs RMSProp?", |
| "What learning rate schedule?", |
| "What batch size?" |
| ] |
| }, |
| "π‘οΈ Regularization": { |
| "explain_text": "**π‘οΈ Regularization:** How to prevent overfitting?", |
| "sub_decisions": [ |
| "L1 vs L2 regularization?", |
| "Dropout rate?", |
| "Should early stopping be applied?" |
| ] |
| }, |
| "π Training Monitoring": { |
| "explain_text": "**π Training Monitoring:** How to track and manage training progress?", |
| "sub_decisions": [ |
| "Which metrics should be monitored?", |
| "How often to checkpoint models?" |
| ] |
| } |
| }, |
|
|
| "π Model Evaluation": { |
| "explain_text": "**π Model Evaluation:** Decisions about the evaluation criteria.", |
| "π Evaluation Metric": { |
| "explain_text": "**π Evaluation Metric:** Which metrics best reflect model performance?", |
| "sub_decisions": [ |
| "Accuracy vs Precision/Recall/F1?", |
| "How to handle class imbalance?", |
| "Including probabilistic metrics (AUC, log loss)?" |
| ] |
| }, |
| "π§ͺ Test Data": { |
| "explain_text": "**π§ͺ Test Data:** How should testing be performed?", |
| "sub_decisions": [ |
| "Hold-out set vs cross-validation?", |
| "An external test dataset?" |
| ] |
| }, |
| "βοΈ Fairness": { |
| "explain_text": "**βοΈ Fairness:** How to ensure fairness across groups?", |
| "sub_decisions": [ |
| "Which fairness metric to use (demographic parity, equalized odds)?", |
| "How to detect bias in predictions?" |
| ] |
| }, |
| "π οΈ Robustness": { |
| "explain_text": "**π οΈ Robustness:** How reliable is the model under stress?", |
| "sub_decisions": [ |
| "How does the model handle noisy inputs?", |
| "How to test against distribution shifts?" |
| ] |
| }, |
| "π Interpretability": { |
| "explain_text": "**π Interpretability:** How understandable are the model predictions?", |
| "sub_decisions": [ |
| "Which methods to use (feature importance, SHAP, LIME)?", |
| "How stable are explanations?", |
| "Are explanations actionable for stakeholders?" |
| ] |
| } |
| } |
| } |
|
|