File size: 11,095 Bytes
3a29afc d9e600b 3a29afc 7650f6e 1d745dd ec10a30 49edf45 35aba11 7650f6e 5c6dc10 da4cc06 3232101 da4cc06 7650f6e e7897d7 7e68df4 e7897d7 1d745dd 95bf9d2 1d745dd 61c745a 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 ff5e7ef 1d745dd 95bf9d2 ff5e7ef 95bf9d2 ff5e7ef 95bf9d2 ff5e7ef 61c745a 95bf9d2 ff5e7ef 95bf9d2 ff5e7ef 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 61c745a 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 ff5e7ef 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 1d745dd 95bf9d2 ff5e7ef 95bf9d2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | import streamlit as st
import time
def go_to(page_name, from_callback=False):
"""
Updates the session_state page and optionally triggers a rerun.
- from_callback=True if called inside an on_click callback
"""
st.session_state.page = page_name
if not from_callback:
st.rerun()
# st.stop()
def add_navigation(previous_page, next_page):
col1, col2, col3 = st.columns([1, 4, 1])
if previous_page is not None:
with col1:
if st.button("Previous"):
go_to(previous_page)
if next_page is not None:
with col3:
if st.button("Next"):
go_to(next_page)
st.markdown("---")
# st.divider()
def add_fadein_text(paragraphs):
# Define CSS fade-in animation
st.markdown("""
<style>
@keyframes fadeIn {
from {opacity: 0;}
to {opacity: 1;}
}
.fade-in {
animation: fadeIn 1.5s ease-in forwards;
opacity: 0;
margin-bottom: 1em;
font-size: 24px;
color: #A6C8FF;
text-align: left;
}
</style>
""", unsafe_allow_html=True)
# Create one placeholder for each paragraph
placeholders = [st.empty() for _ in paragraphs]
# Reveal paragraphs sequentially
for i, p in enumerate(paragraphs):
placeholders[i].markdown(f"<div class='fade-in' style='animation-delay:{p[1]}s'>{p[0]}</div>", unsafe_allow_html=True)
# time.sleep(p[1]) # delay (seconds) before showing next one
def add_instruction_text(text_to_display):
st.markdown(f"<div style='text-align: center; font-size:18px; color:gray;'>{text_to_display}</div>", unsafe_allow_html=True)
st.markdown("---")
def add_red_text(text_to_display):
st.markdown(
f"<div style='text-align:center; color:#c0392b; font-size:18px;'>{text_to_display}<br></div>",
unsafe_allow_html=True,
)
# Define pipeline stages
pipeline_data = {
"π₯ Data Collection": {
"explain_text": "**π₯ Data Collection:** Decisions about what data to collect and how.",
"π Data Sources": {
"explain_text": "**π Data Sources:** What data sources will be used to collect data?",
"sub_decisions": [
"Collect existing dataset or new sensor data?",
"Public datasets or Private datasets?",
"Design Web Scraping or use APIs?"
]
},
"π Data Usage": {
"explain_text": "**π Data Usage:** How should the data be used, given any license or permission constraints?",
"sub_decisions": [
"Ethical concerns to be addressed?",
"Commercial use policies?",
"Geographic limits?"
]
},
"π§Ή Data Quality": {
"explain_text": "**π§Ή Data Quality:** What kind of quality checks are done to decide data collection?",
"sub_decisions": [
"Missing value checks to see if critical field are affected?",
"Potential duplicates?",
"Format consistency and encoding issues?"
]
},
"π² Data Sampling": {
"explain_text": "**π² Data Sampling:** How to sample from a potentially bigger data source?",
"sub_decisions": [
"Random sampling/stratified sampling/cluster sampling?",
"Sample size?",
"Potential imbalance?",
"Additional synthetic data?"
]
},
"πΎ Data Storage": {
"explain_text": "**πΎ Data Storage:** How and where to store the data?",
"sub_decisions": [
"Backup frequency?",
"File format choice?"
]
},
},
"βοΈ Data Processing": {
"explain_text": "**βοΈ Data Processing:** Decisions about how to process and prepare the data.",
"π§½ Data Cleaning": {
"explain_text": "**π§½ Data Cleaning:** How should raw data be cleaned and standardized?",
"sub_decisions": [
"How to handle missing values?",
"How to detect/remove duplicates?",
"How to fix formatting errors?"
]
},
"π― Feature Selection": {
"explain_text": "**π― Feature Selection:** Which features should be included in the model?",
"sub_decisions": [
"Manual vs automated selection?",
"How to check for data leakage?",
"Should dimensionality reduction be applied?"
]
},
"π§ Feature Engineering": {
"explain_text": "**π§ Feature Engineering:** How to create or transform features for better performance?",
"sub_decisions": [
"What new features should be created?",
"How to combine existing features?",
"How to encode categorical variables?"
]
},
"π¨ Outlier Handling": {
"explain_text": "**π¨ Outlier Handling:** How to deal with unusual or extreme data points?",
"sub_decisions": [
"Which detection method to use (Z-score, IQR, clustering)?",
"Remove, cap, or keep outliers?"
]
},
"π Data Scaling": {
"explain_text": "**π Data Scaling:** How to scale or transform features before modeling?",
"sub_decisions": [
"Should Min-Max or Standard scaling be applied?",
"Is log or Box-Cox transformation needed?"
]
}
},
"π€ Model Selection": {
"explain_text": "**π€ Model Selection:** Decisions about which model to train and the hyperparameter choices.",
"ποΈ Model Architecture": {
"explain_text": "**ποΈ Model Architecture:** Which type of model is best suited to the problem?",
"sub_decisions": [
"Linear vs tree-based vs neural networks?",
"How interpretable should the model be?",
"What are computational constraints?"
]
},
"π Baseline Model": {
"explain_text": "**π Baseline Model:** What simple models can set a performance baseline?",
"sub_decisions": [
"Should a logistic regression or decision tree be used?",
"What baseline metric is most relevant?"
]
},
"π§ Pre-trained Models": {
"explain_text": "**π§ Pre-trained Models:** Can existing models be leveraged?",
"sub_decisions": [
"Which pre-trained models are relevant (image, NLP, tabular)?",
"Fine-tune or use as feature extractors?"
]
},
"β‘ Hyperparams": {
"explain_text": "**β‘ Hyperparams:** How to optimize model hyperparameters?",
"sub_decisions": [
"Grid search vs random search vs Bayesian?",
"How many trials and folds to run?",
"What budget or time limit applies?"
]
},
"π¦ Model Complexity": {
"explain_text": "**π¦ Model Complexity:** Is the model efficient enough for deployment?",
"sub_decisions": [
"How many parameters and FLOPs?",
"What is memory usage and latency?",
"Are there deployment constraints (edge vs cloud)?"
]
}
},
"ποΈ Model Training": {
"explain_text": "**ποΈ Model Training:** Decisions about the training algorithm used.",
"βοΈ Data Splitting": {
"explain_text": "**βοΈ Data Splitting:** How should data be divided for training and testing?",
"sub_decisions": [
"Train-test split ratio?",
"Cross-validation vs stratified split?"
]
},
"βοΈ Loss Function": {
"explain_text": "**βοΈ Loss Function:** Which loss function aligns with the task?",
"sub_decisions": [
"MSE vs MAE vs cross-entropy?",
"Is robustness to outliers needed?",
"Does it align with evaluation metrics?"
]
},
"π Optimization": {
"explain_text": "**π Optimization:** Which optimization algorithm should be used?",
"sub_decisions": [
"SGD vs Adam vs RMSProp?",
"What learning rate schedule?",
"What batch size?"
]
},
"π‘οΈ Regularization": {
"explain_text": "**π‘οΈ Regularization:** How to prevent overfitting?",
"sub_decisions": [
"L1 vs L2 regularization?",
"Dropout rate?",
"Should early stopping be applied?"
]
},
"π Training Monitoring": {
"explain_text": "**π Training Monitoring:** How to track and manage training progress?",
"sub_decisions": [
"Which metrics should be monitored?",
"How often to checkpoint models?"
]
}
},
"π Model Evaluation": {
"explain_text": "**π Model Evaluation:** Decisions about the evaluation criteria.",
"π Evaluation Metric": {
"explain_text": "**π Evaluation Metric:** Which metrics best reflect model performance?",
"sub_decisions": [
"Accuracy vs Precision/Recall/F1?",
"How to handle class imbalance?",
"Including probabilistic metrics (AUC, log loss)?"
]
},
"π§ͺ Test Data": {
"explain_text": "**π§ͺ Test Data:** How should testing be performed?",
"sub_decisions": [
"Hold-out set vs cross-validation?",
"An external test dataset?"
]
},
"βοΈ Fairness": {
"explain_text": "**βοΈ Fairness:** How to ensure fairness across groups?",
"sub_decisions": [
"Which fairness metric to use (demographic parity, equalized odds)?",
"How to detect bias in predictions?"
]
},
"π οΈ Robustness": {
"explain_text": "**π οΈ Robustness:** How reliable is the model under stress?",
"sub_decisions": [
"How does the model handle noisy inputs?",
"How to test against distribution shifts?"
]
},
"π Interpretability": {
"explain_text": "**π Interpretability:** How understandable are the model predictions?",
"sub_decisions": [
"Which methods to use (feature importance, SHAP, LIME)?",
"How stable are explanations?",
"Are explanations actionable for stakeholders?"
]
}
}
}
|