File size: 11,965 Bytes
ae420f7 a3309b8 ae420f7 ee7a14d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
"""
Batch Analysis page for Smartwatch Normative Z-Score Calculator.
Upload multiple patient records for bulk z-score analysis.
"""
import streamlit as st
import pandas as pd
import sys
import os
from io import BytesIO
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS
import normalizer_model
st.set_page_config(
page_title="Batch Analysis - Smartwatch Z-Score Calculator",
page_icon="📊",
layout="wide",
)
# Load normative data
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv")
@st.cache_data
def get_normative_data():
try:
return normalizer_model.load_normative_table(DATA_PATH)
except Exception as e:
st.error(f"Could not load normative data: {e}")
return None
normative_df = get_normative_data()
st.title("📊 Batch Analysis")
st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**")
st.info(
"Upload an Excel or CSV file with patient data. Each row will be analyzed and "
"z-scores will be calculated for all available biomarkers."
)
col1, col2 = st.columns(2)
with col1:
st.subheader("📥 Download Template")
st.markdown("Use this template to prepare your data in the correct format.")
template_df = get_batch_template_df()
# Create downloadable Excel template
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
template_df.to_excel(writer, index=False, sheet_name='Patient Data')
workbook = writer.book
worksheet = writer.sheets['Patient Data']
# Orange-themed header format
header_format = workbook.add_format({
'bold': True,
'bg_color': '#e67e22',
'font_color': 'white',
'border': 1
})
for col_num, value in enumerate(template_df.columns.values):
worksheet.write(0, col_num, value, header_format)
worksheet.set_column(col_num, col_num, 18)
st.download_button(
label="⬇️ Download Excel Template",
data=output.getvalue(),
file_name="smartwatch_zscore_template.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
st.markdown("#### Required Columns:")
st.markdown("""
| Column | Description | Example |
|--------|-------------|---------|
| patient_id | Unique identifier | P001 |
| age | Age in years | 45 |
| gender | Man/Woman | Man |
| region | Geographic region | Western Europe |
| bmi | Body Mass Index | 24.5 |
""")
st.markdown("#### Biomarker Columns (optional):")
biomarker_table = "| Column | Description |\n|--------|-------------|\n"
for code in AVAILABLE_BIOMARKERS:
label = BIOMARKER_LABELS.get(code, code)
biomarker_table += f"| {code} | {label} |\n"
st.markdown(biomarker_table)
st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*")
with col2:
st.subheader("📤 Upload Data")
uploaded_file = st.file_uploader(
"Choose an Excel or CSV file",
type=['xlsx', 'xls', 'csv'],
help="Upload a file with patient data following the template format"
)
if uploaded_file is not None:
try:
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.success(f"✅ Loaded {len(df)} patient records")
# Detect available biomarkers in the uploaded data
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
if detected_biomarkers:
st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}")
else:
st.warning("No recognized biomarker columns found. Please check your column names.")
with st.expander("Preview uploaded data"):
st.dataframe(df, use_container_width=True)
except Exception as e:
st.error(f"Error reading file: {str(e)}")
df = None
st.markdown("---")
# Processing section
if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None:
# Biomarker selection
st.subheader("Select Biomarkers to Analyze")
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
if detected_biomarkers:
selected_biomarkers = st.multiselect(
"Choose biomarkers to include in analysis",
options=detected_biomarkers,
default=detected_biomarkers,
format_func=lambda x: BIOMARKER_LABELS.get(x, x)
)
if st.button("🔬 Process Batch Data", type="primary"):
if not selected_biomarkers:
st.error("Please select at least one biomarker to analyze.")
else:
with st.spinner("Processing patient data..."):
results_df = process_batch_data(df, normative_df, selected_biomarkers)
st.success("✅ Processing complete!")
# Results section
st.subheader("Results")
# Build display columns dynamically
base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi']
display_cols = [c for c in base_cols if c in results_df.columns]
for bm in selected_biomarkers:
if bm in results_df.columns:
display_cols.append(bm)
if f'{bm}_z' in results_df.columns:
display_cols.append(f'{bm}_z')
if f'{bm}_percentile' in results_df.columns:
display_cols.append(f'{bm}_percentile')
if f'{bm}_interpretation' in results_df.columns:
display_cols.append(f'{bm}_interpretation')
available_cols = [c for c in display_cols if c in results_df.columns]
# Style function for interpretation columns
def highlight_interpretation(val):
if pd.isna(val) or val == 'N/A' or val == 'No data':
return ''
val_str = str(val).lower()
if 'average' in val_str and 'below' not in val_str and 'above' not in val_str:
return 'background-color: #90EE90' # Green
elif 'below' in val_str:
return 'background-color: #87CEEB' # Light blue
elif 'above' in val_str:
return 'background-color: #FFD700' # Gold
elif 'very low' in val_str:
return 'background-color: #ADD8E6' # Light blue
elif 'very high' in val_str:
return 'background-color: #FF6B6B' # Red
return ''
# Apply styling to interpretation columns
interp_cols = [c for c in available_cols if 'interpretation' in c]
if interp_cols:
styled_df = results_df[available_cols].style.applymap(
highlight_interpretation,
subset=interp_cols
)
st.dataframe(styled_df, use_container_width=True)
else:
st.dataframe(results_df[available_cols], use_container_width=True)
# Summary Statistics
st.subheader("Summary Statistics")
# Create columns for each biomarker
if len(selected_biomarkers) > 0:
cols = st.columns(min(len(selected_biomarkers), 3))
for idx, bm in enumerate(selected_biomarkers[:3]):
with cols[idx]:
st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**")
z_col = f'{bm}_z'
if z_col in results_df.columns:
# Filter out non-numeric values
z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna()
if len(z_values) > 0:
st.metric("Mean Z-Score", f"{z_values.mean():.2f}")
st.metric("Patients Analyzed", len(z_values))
# Distribution of interpretations
interp_col = f'{bm}_interpretation'
if interp_col in results_df.columns:
interp_counts = results_df[interp_col].value_counts()
st.bar_chart(interp_counts)
# Export Results
st.subheader("📥 Export Results")
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False, sheet_name='Results')
workbook = writer.book
worksheet = writer.sheets['Results']
# Orange-themed header
header_format = workbook.add_format({
'bold': True,
'bg_color': '#e67e22',
'font_color': 'white',
'border': 1
})
for col_num, value in enumerate(results_df.columns.values):
worksheet.write(0, col_num, value, header_format)
worksheet.set_column(col_num, col_num, 18)
st.download_button(
label="⬇️ Download Results as Excel",
data=output.getvalue(),
file_name="smartwatch_zscore_results.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.warning(
"No recognized biomarker columns found in your data. "
"Please ensure your columns match the template format."
)
# Z-Score Classification Guide
st.markdown("---")
with st.expander("📊 Z-Score Classification Guide"):
st.markdown("""
**How to interpret Z-Scores:**
| Z-Score Range | Classification | Percentile Range |
|:-------------:|:--------------:|:----------------:|
| z < -2.0 | Very Low | < 2.3% |
| -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% |
| **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** |
| 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% |
| z ≥ 2.0 | Very High | > 97.7% |
**Context matters:**
- For **steps, sleep duration, and active minutes**: Higher values are generally better ✓
- For **heart rate**: Lower resting values are generally better ✓
*A z-score of 0 means you are exactly at the population average for your demographic group.*
""")
# Footer
st.markdown("---")
st.markdown(
"*Batch analysis calculates z-scores relative to the Withings normative population, "
"stratified by region, gender, age group, and BMI category.*"
)
st.markdown(
"Built with ❤️ in Düsseldorf. © Lars Masanneck 2026."
)
|