Spaces:

MasanneckLab
/

Withings_Normalization_App

Running

File size: 11,965 Bytes

"""
Batch Analysis page for Smartwatch Normative Z-Score Calculator.

Upload multiple patient records for bulk z-score analysis.
"""
import streamlit as st
import pandas as pd
import sys
import os
from io import BytesIO

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS
import normalizer_model

st.set_page_config(
    page_title="Batch Analysis - Smartwatch Z-Score Calculator",
    page_icon="📊",
    layout="wide",
)

# Load normative data
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv")

@st.cache_data
def get_normative_data():
    try:
        return normalizer_model.load_normative_table(DATA_PATH)
    except Exception as e:
        st.error(f"Could not load normative data: {e}")
        return None

normative_df = get_normative_data()

st.title("📊 Batch Analysis")
st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**")

st.info(
    "Upload an Excel or CSV file with patient data. Each row will be analyzed and "
    "z-scores will be calculated for all available biomarkers."
)

col1, col2 = st.columns(2)

with col1:
    st.subheader("📥 Download Template")
    st.markdown("Use this template to prepare your data in the correct format.")
    
    template_df = get_batch_template_df()
    
    # Create downloadable Excel template
    output = BytesIO()
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        template_df.to_excel(writer, index=False, sheet_name='Patient Data')
        workbook = writer.book
        worksheet = writer.sheets['Patient Data']
        
        # Orange-themed header format
        header_format = workbook.add_format({
            'bold': True,
            'bg_color': '#e67e22',
            'font_color': 'white',
            'border': 1
        })
        for col_num, value in enumerate(template_df.columns.values):
            worksheet.write(0, col_num, value, header_format)
            worksheet.set_column(col_num, col_num, 18)
    
    st.download_button(
        label="⬇️ Download Excel Template",
        data=output.getvalue(),
        file_name="smartwatch_zscore_template.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )
    
    st.markdown("#### Required Columns:")
    st.markdown("""
    | Column | Description | Example |
    |--------|-------------|---------|
    | patient_id | Unique identifier | P001 |
    | age | Age in years | 45 |
    | gender | Man/Woman | Man |
    | region | Geographic region | Western Europe |
    | bmi | Body Mass Index | 24.5 |
    """)
    
    st.markdown("#### Biomarker Columns (optional):")
    biomarker_table = "| Column | Description |\n|--------|-------------|\n"
    for code in AVAILABLE_BIOMARKERS:
        label = BIOMARKER_LABELS.get(code, code)
        biomarker_table += f"| {code} | {label} |\n"
    st.markdown(biomarker_table)
    
    st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*")

with col2:
    st.subheader("📤 Upload Data")
    
    uploaded_file = st.file_uploader(
        "Choose an Excel or CSV file",
        type=['xlsx', 'xls', 'csv'],
        help="Upload a file with patient data following the template format"
    )
    
    if uploaded_file is not None:
        try:
            if uploaded_file.name.endswith('.csv'):
                df = pd.read_csv(uploaded_file)
            else:
                df = pd.read_excel(uploaded_file)
            
            st.success(f"✅ Loaded {len(df)} patient records")
            
            # Detect available biomarkers in the uploaded data
            detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
            
            if detected_biomarkers:
                st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}")
            else:
                st.warning("No recognized biomarker columns found. Please check your column names.")
            
            with st.expander("Preview uploaded data"):
                st.dataframe(df, use_container_width=True)
            
        except Exception as e:
            st.error(f"Error reading file: {str(e)}")
            df = None

st.markdown("---")

# Processing section
if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None:
    
    # Biomarker selection
    st.subheader("Select Biomarkers to Analyze")
    detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
    
    if detected_biomarkers:
        selected_biomarkers = st.multiselect(
            "Choose biomarkers to include in analysis",
            options=detected_biomarkers,
            default=detected_biomarkers,
            format_func=lambda x: BIOMARKER_LABELS.get(x, x)
        )
        
        if st.button("🔬 Process Batch Data", type="primary"):
            if not selected_biomarkers:
                st.error("Please select at least one biomarker to analyze.")
            else:
                with st.spinner("Processing patient data..."):
                    results_df = process_batch_data(df, normative_df, selected_biomarkers)
                
                st.success("✅ Processing complete!")
                
                # Results section
                st.subheader("Results")
                
                # Build display columns dynamically
                base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi']
                display_cols = [c for c in base_cols if c in results_df.columns]
                
                for bm in selected_biomarkers:
                    if bm in results_df.columns:
                        display_cols.append(bm)
                    if f'{bm}_z' in results_df.columns:
                        display_cols.append(f'{bm}_z')
                    if f'{bm}_percentile' in results_df.columns:
                        display_cols.append(f'{bm}_percentile')
                    if f'{bm}_interpretation' in results_df.columns:
                        display_cols.append(f'{bm}_interpretation')
                
                available_cols = [c for c in display_cols if c in results_df.columns]
                
                # Style function for interpretation columns
                def highlight_interpretation(val):
                    if pd.isna(val) or val == 'N/A' or val == 'No data':
                        return ''
                    val_str = str(val).lower()
                    if 'average' in val_str and 'below' not in val_str and 'above' not in val_str:
                        return 'background-color: #90EE90'  # Green
                    elif 'below' in val_str:
                        return 'background-color: #87CEEB'  # Light blue
                    elif 'above' in val_str:
                        return 'background-color: #FFD700'  # Gold
                    elif 'very low' in val_str:
                        return 'background-color: #ADD8E6'  # Light blue
                    elif 'very high' in val_str:
                        return 'background-color: #FF6B6B'  # Red
                    return ''
                
                # Apply styling to interpretation columns
                interp_cols = [c for c in available_cols if 'interpretation' in c]
                if interp_cols:
                    styled_df = results_df[available_cols].style.applymap(
                        highlight_interpretation,
                        subset=interp_cols
                    )
                    st.dataframe(styled_df, use_container_width=True)
                else:
                    st.dataframe(results_df[available_cols], use_container_width=True)
                
                # Summary Statistics
                st.subheader("Summary Statistics")
                
                # Create columns for each biomarker
                if len(selected_biomarkers) > 0:
                    cols = st.columns(min(len(selected_biomarkers), 3))
                    
                    for idx, bm in enumerate(selected_biomarkers[:3]):
                        with cols[idx]:
                            st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**")
                            z_col = f'{bm}_z'
                            if z_col in results_df.columns:
                                # Filter out non-numeric values
                                z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna()
                                if len(z_values) > 0:
                                    st.metric("Mean Z-Score", f"{z_values.mean():.2f}")
                                    st.metric("Patients Analyzed", len(z_values))
                                    
                                    # Distribution of interpretations
                                    interp_col = f'{bm}_interpretation'
                                    if interp_col in results_df.columns:
                                        interp_counts = results_df[interp_col].value_counts()
                                        st.bar_chart(interp_counts)
                
                # Export Results
                st.subheader("📥 Export Results")
                
                output = BytesIO()
                with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
                    results_df.to_excel(writer, index=False, sheet_name='Results')
                    workbook = writer.book
                    worksheet = writer.sheets['Results']
                    
                    # Orange-themed header
                    header_format = workbook.add_format({
                        'bold': True,
                        'bg_color': '#e67e22',
                        'font_color': 'white',
                        'border': 1
                    })
                    for col_num, value in enumerate(results_df.columns.values):
                        worksheet.write(0, col_num, value, header_format)
                        worksheet.set_column(col_num, col_num, 18)
                
                st.download_button(
                    label="⬇️ Download Results as Excel",
                    data=output.getvalue(),
                    file_name="smartwatch_zscore_results.xlsx",
                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                )
    else:
        st.warning(
            "No recognized biomarker columns found in your data. "
            "Please ensure your columns match the template format."
        )

# Z-Score Classification Guide
st.markdown("---")
with st.expander("📊 Z-Score Classification Guide"):
    st.markdown("""
    **How to interpret Z-Scores:**
    
    | Z-Score Range | Classification | Percentile Range |
    |:-------------:|:--------------:|:----------------:|
    | z < -2.0 | Very Low | < 2.3% |
    | -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% |
    | **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** |
    | 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% |
    | z ≥ 2.0 | Very High | > 97.7% |
    
    **Context matters:**
    - For **steps, sleep duration, and active minutes**: Higher values are generally better ✓
    - For **heart rate**: Lower resting values are generally better ✓
    
    *A z-score of 0 means you are exactly at the population average for your demographic group.*
    """)

# Footer
st.markdown("---")
st.markdown(
    "*Batch analysis calculates z-scores relative to the Withings normative population, "
    "stratified by region, gender, age group, and BMI category.*"
)
st.markdown(
    "Built with ❤️ in Düsseldorf. © Lars Masanneck 2026."
)