Spaces:
No application file
No application file
:sparkles: New App From Computer
Browse files- app.py +100 -0
- final_data/Metabolic_Panel.csv +29 -0
- final_data/Serum_Cardiovascular.csv +29 -0
- requirements.txt +5 -3
- scripts/featureEngineering.py +63 -0
- scripts/graphMaking.py +256 -0
- scripts/stats.py +205 -0
app.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Import modules
|
| 6 |
+
from scripts.featureEngineering import add_flight_day
|
| 7 |
+
from scripts.stats import tidy_from_wide, analyze_r1_vs_L
|
| 8 |
+
from scripts.graphMaking import make_figure
|
| 9 |
+
|
| 10 |
+
# Load Data
|
| 11 |
+
def list_final_data(folder="final_data"):
|
| 12 |
+
"""Return list of CSV files in final_data folder."""
|
| 13 |
+
return [f for f in os.listdir(folder) if f.endswith(".csv")]
|
| 14 |
+
|
| 15 |
+
def load_final_data(fname, folder="final_data"):
|
| 16 |
+
"""Load selected CSV file as pandas DataFrame."""
|
| 17 |
+
path = os.path.join(folder, fname)
|
| 18 |
+
return pd.read_csv(path)
|
| 19 |
+
|
| 20 |
+
# Main App
|
| 21 |
+
def main():
|
| 22 |
+
st.title("Astronaut Biochemistry Dashboard")
|
| 23 |
+
|
| 24 |
+
# 1. Sidebar file selection
|
| 25 |
+
st.sidebar.header("Data Selection")
|
| 26 |
+
csv_files = list_final_data()
|
| 27 |
+
|
| 28 |
+
if not csv_files:
|
| 29 |
+
st.error("No CSV files found in final_data/")
|
| 30 |
+
return
|
| 31 |
+
|
| 32 |
+
selected_file = st.sidebar.selectbox("Choose dataset", csv_files)
|
| 33 |
+
df_raw = load_final_data(selected_file)
|
| 34 |
+
st.write(f"Loaded file: **{selected_file}**")
|
| 35 |
+
|
| 36 |
+
# 2. Clean with feature engineering
|
| 37 |
+
df_clean = add_flight_day(df_raw)
|
| 38 |
+
|
| 39 |
+
# 3. Transform to tidy format + run stats
|
| 40 |
+
tidy_df = tidy_from_wide(df_clean)
|
| 41 |
+
stats_df = analyze_r1_vs_L(tidy_df)
|
| 42 |
+
|
| 43 |
+
# 4. Sidebar user selections
|
| 44 |
+
st.sidebar.header("Plot Controls")
|
| 45 |
+
|
| 46 |
+
analyte = st.sidebar.selectbox(
|
| 47 |
+
"Select Analyte",
|
| 48 |
+
options=tidy_df["analyte"].unique().tolist(),
|
| 49 |
+
index=tidy_df["analyte"].unique().tolist().index("sodium")
|
| 50 |
+
if "sodium" in tidy_df["analyte"].unique() else 0
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
astronauts = st.sidebar.multiselect(
|
| 54 |
+
"Select Astronauts",
|
| 55 |
+
# Normalize IDs to uppercase for consistency
|
| 56 |
+
options=sorted([a.upper() for a in tidy_df["astronautID"].unique().tolist()]),
|
| 57 |
+
default=[]
|
| 58 |
+
)
|
| 59 |
+
astronauts = [a.upper() for a in astronauts]
|
| 60 |
+
|
| 61 |
+
sex_filter = st.sidebar.radio(
|
| 62 |
+
"Sex Filter",
|
| 63 |
+
["All", "Male", "Female"],
|
| 64 |
+
index=0
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
show_error = st.sidebar.radio(
|
| 68 |
+
"Error Band",
|
| 69 |
+
["None", "within", "group"],
|
| 70 |
+
index=0
|
| 71 |
+
)
|
| 72 |
+
show_error = None if show_error == "None" else show_error
|
| 73 |
+
|
| 74 |
+
# Unify filters: Astronauts take priority, else fall back to sex filter
|
| 75 |
+
if astronauts:
|
| 76 |
+
astronaut_filter = astronauts
|
| 77 |
+
elif sex_filter != "All":
|
| 78 |
+
astronaut_filter = sex_filter
|
| 79 |
+
else:
|
| 80 |
+
astronaut_filter = None
|
| 81 |
+
|
| 82 |
+
# 5. Generate figure
|
| 83 |
+
if analyte:
|
| 84 |
+
fig = make_figure(
|
| 85 |
+
tidy_df=tidy_df,
|
| 86 |
+
stats_df=stats_df,
|
| 87 |
+
analytes=[analyte],
|
| 88 |
+
astronaut_filter=astronaut_filter,
|
| 89 |
+
show_error=show_error
|
| 90 |
+
)
|
| 91 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 92 |
+
else:
|
| 93 |
+
st.warning("Please select at least one analyte to plot.")
|
| 94 |
+
|
| 95 |
+
# 6. Optional: preview data
|
| 96 |
+
with st.expander("Preview Data"):
|
| 97 |
+
st.dataframe(tidy_df.head(20))
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
main()
|
final_data/Metabolic_Panel.csv
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Sample Name,albumin_value_gram_per_deciliter,albumin_range_min_gram_per_deciliter,albumin_range_max_gram_per_deciliter,albumin_to_globulin_ratio_value,albumin_to_globulin_ratio_range_min,albumin_to_globulin_ratio_range_max,alkaline_phosphatase_value_units_per_liter,alkaline_phosphatase_range_min_units_per_liter,alkaline_phosphatase_range_max_units_per_liter,alt_value_units_per_liter,alt_range_min_units_per_liter,alt_max_units_per_liter,ast_value_units_per_liter,ast_range_min_units_per_liter,ast_max_units_per_liter,total_bilirubin_value_milligram_per_deciliter,total_bilirubin_range_min_milligram_per_deciliter,total_bilirubin_range_max_milligram_per_deciliter,bun_to_creatinine_ratio_value,bun_to_creatinine_ratio_range_min,bun_to_creatinine_ratio_range_max,calcium_value_milligram_per_deciliter,calcium_range_min_milligram_per_deciliter,calcium_range_max_milligram_per_deciliter,carbon_dioxide_value_millimol_per_liter,carbon_dioxide_range_min_millimol_per_liter,carbon_dioxide_range_max_millimol_per_liter,chloride_value_millimol_per_liter,chloride_range_min_millimol_per_liter,chloride_range_max_millimol_per_liter,creatinine_value_milligram_per_deciliter,creatinine_range_min_milligram_per_deciliter,creatinine_range_max_milligram_per_deciliter,egfr_african_american_value_milliliter_per_minute_per_1.73_meter_squared,egfr_african_american_range_min_milliliter_per_minute_per_1.73_meter_squared,egfr_african_american_range_max_milliliter_per_minute_per_1.73_meter_squared,egfr_non_african_american_value_milliliter_per_minute_per_1.73_meter_squared,egfr_non_african_american_range_min_milliliter_per_minute_per_1.73_meter_squared,egfr_non_african_american_range_max_milliliter_per_minute_per_1.73_meter_squared,globulin_value_gram_per_deciliter,globulin_range_min_gram_per_deciliter,globulin_range_max_gram_per_deciliter,glucose_value_milligram_per_deciliter,glucose_range_min_milligram_per_deciliter,glucose_range_max_milligram_per_deciliter,potassium_value_millimol_per_liter,potassium_range_min_millimol_per_liter,potassium_range_max_millimol_per_liter,total_protein_value_gram_per_deciliter,total_protein_range_min_gram_per_deciliter,total_protein_range_max_gram_per_deciliter,sodium_value_millimol_per_liter,sodium_range_min_millimol_per_liter,sodium_range_max_millimol_per_liter,urea_nitrogen_bun_value_milligram_per_deciliter,urea_nitrogen_bun_range_min_milligram_per_deciliter,urea_nitrogen_bun_range_max_milligram_per_deciliter,astronautID,timepoint
|
| 2 |
+
C001_serum_L-3,4.9,3.6,5.1,2.0,1,2.5,45,36,130,13,9,46,15,10,40,1.0,0.2,1.2,15.0,6.0,22.0,9.6,8.6,10.3,26,20,32,102,98,110,1.37,0.6,1.35,75,60,,65,60,,2.5,1.9,3.7,69,65,99,3.9,3.5,5.3,7.4,6.1,8.1,142,135,146,20,7,25,C001,L-3
|
| 3 |
+
C001_serum_L-44,5.0,3.6,5.1,1.9,1,2.5,46,36,130,16,9,46,18,10,40,0.9,0.2,1.2,,6.0,22.0,10.0,8.6,10.3,26,20,32,101,98,110,1.1,0.6,1.35,98,60,,85,60,,2.7,1.9,3.7,80,65,99,4.2,3.5,5.3,7.7,6.1,8.1,139,135,146,19,7,25,C001,L-44
|
| 4 |
+
C001_serum_L-92,5.0,3.6,5.1,2.2,1,2.5,51,36,130,15,9,46,15,10,40,0.7,0.2,1.2,,6.0,22.0,9.7,8.6,10.3,23,20,32,104,98,110,1.24,0.6,1.35,85,60,,73,60,,2.3,1.9,3.7,83,65,99,4.6,3.5,5.3,7.3,6.1,8.1,141,135,146,25,7,25,C001,L-92
|
| 5 |
+
C001_serum_R+1,4.9,3.6,5.1,1.9,1,2.5,52,36,130,9,9,46,16,10,40,1.2,0.2,1.2,,6.0,22.0,9.7,8.6,10.3,24,20,32,102,98,110,1.12,0.6,1.35,96,60,,83,60,,2.6,1.9,3.7,90,65,99,4.4,3.5,5.3,7.5,6.1,8.1,138,135,146,18,7,25,C001,R+1
|
| 6 |
+
C001_serum_R+194,4.6,3.6,5.1,1.9,1,2.5,50,36,130,15,9,46,12,10,40,0.8,0.2,1.2,22.0,6.0,22.0,9.4,8.6,10.3,23,20,32,102,98,110,1.29,0.6,1.35,80,60,,69,60,,2.4,1.9,3.7,87,65,99,4.3,3.5,5.3,7.0,6.1,8.1,137,135,146,28,7,25,C001,R+194
|
| 7 |
+
C001_serum_R+45,4.9,3.6,5.1,2.1,1,2.5,46,36,130,11,9,46,13,10,40,,0.2,1.2,,6.0,22.0,9.6,8.6,10.3,28,20,32,100,98,110,1.11,0.6,1.35,97,60,,84,60,,2.3,1.9,3.7,84,65,99,4.0,3.5,5.3,7.2,6.1,8.1,138,135,146,15,7,25,C001,R+45
|
| 8 |
+
C001_serum_R+82,5.1,3.6,5.1,1.9,1,2.5,49,36,130,15,9,46,16,10,40,1.2,0.2,1.2,,6.0,22.0,10.1,8.6,10.3,27,20,32,101,98,110,1.25,0.6,1.35,84,60,,73,60,,2.7,1.9,3.7,91,65,99,4.3,3.5,5.3,7.8,6.1,8.1,140,135,146,22,7,25,C001,R+82
|
| 9 |
+
C002_serum_L-3,4.3,3.6,5.1,1.6,1,2.5,68,37,153,19,6,29,40,10,35,0.3,0.2,1.2,,6.0,22.0,9.3,8.6,10.4,24,20,32,106,98,110,0.96,0.5,1.05,79,60,,68,60,,2.7,1.9,3.7,78,65,99,3.8,3.5,5.3,7.0,6.1,8.1,143,135,146,17,7,25,C002,L-3
|
| 10 |
+
C002_serum_L-44,4.6,3.6,5.1,1.5,1,2.5,78,37,153,24,6,29,31,10,35,0.4,0.2,1.2,,6.0,22.0,9.7,8.6,10.4,26,20,32,103,98,110,0.88,0.5,1.05,88,60,,76,60,,3.0,1.9,3.7,72,65,99,4.0,3.5,5.3,7.6,6.1,8.1,141,135,146,16,7,25,C002,L-44
|
| 11 |
+
C002_serum_L-92,4.9,3.6,5.1,1.6,1,2.5,73,37,153,16,6,29,19,10,35,0.5,0.2,1.2,,6.0,22.0,10.2,8.6,10.4,23,20,32,103,98,110,0.83,0.5,1.05,95,60,,82,60,,3.0,1.9,3.7,69,65,99,3.9,3.5,5.3,7.9,6.1,8.1,142,135,146,14,7,25,C002,L-92
|
| 12 |
+
C002_serum_R+1,4.4,3.6,5.1,1.5,1,2.5,71,37,153,16,6,29,23,10,35,0.4,0.2,1.2,,6.0,22.0,9.2,8.6,10.4,22,20,32,103,98,110,0.95,0.5,1.05,80,60,,69,60,,3.0,1.9,3.7,83,65,99,3.5,3.5,5.3,7.4,6.1,8.1,137,135,146,20,7,25,C002,R+1
|
| 13 |
+
C002_serum_R+194,4.3,3.6,5.1,1.4,1,2.5,66,37,153,17,6,29,24,10,35,0.3,0.2,1.2,,6.0,22.0,9.5,8.6,10.4,20,20,32,104,98,110,1.02,0.5,1.05,73,60,,63,60,,3.0,1.9,3.7,84,65,99,4.4,3.5,5.3,7.3,6.1,8.1,138,135,146,18,7,25,C002,R+194
|
| 14 |
+
C002_serum_R+45,4.2,3.6,5.1,1.6,1,2.5,65,37,153,14,6,29,17,10,35,0.5,0.2,1.2,,6.0,22.0,9.3,8.6,10.4,21,20,32,104,98,110,1.02,0.5,1.05,74,60,,64,60,,2.6,1.9,3.7,44,65,99,4.1,3.5,5.3,6.8,6.1,8.1,140,135,146,19,7,25,C002,R+45
|
| 15 |
+
C002_serum_R+82,4.5,3.6,5.1,1.6,1,2.5,65,37,153,17,6,29,21,10,35,0.5,0.2,1.2,,,,9.7,8.6,10.4,25,20,32,105,98,110,0.82,0.5,1.05,96,60,,83,60,,2.8,1.9,3.7,77,65,99,3.7,3.5,5.3,7.3,6.1,8.1,141,135,146,16,7,25,C002,R+82
|
| 16 |
+
C003_serum_L-3,4.2,3.6,5.1,1.6,1,2.5,31,36,130,13,9,46,13,10,40,0.2,0.2,1.2,24.0,6.0,22.0,9.5,8.6,10.3,23,20,32,101,98,110,1.08,0.6,1.35,80,60,,69,60,,2.6,1.9,3.7,75,65,99,3.3,3.5,5.3,6.8,6.1,8.1,138,135,146,26,7,25,C003,L-3
|
| 17 |
+
C003_serum_L-44,4.9,3.6,5.1,1.3,1,2.5,41,36,130,13,9,46,17,10,40,0.4,0.2,1.2,,6.0,22.0,10.3,8.6,10.3,21,20,32,99,98,110,1.03,0.6,1.35,85,60,,73,60,,3.9,1.9,3.7,62,65,99,3.5,3.5,5.3,8.8,6.1,8.1,139,135,146,19,7,25,C003,L-44
|
| 18 |
+
C003_serum_L-92,4.4,3.6,5.1,1.6,1,2.5,33,36,130,12,9,46,15,10,40,0.3,0.2,1.2,,6.0,22.0,9.3,8.6,10.3,20,20,32,102,98,110,1.0,0.6,1.35,88,60,,76,60,,2.7,1.9,3.7,77,65,99,3.7,3.5,5.3,7.1,6.1,8.1,139,135,146,17,7,25,C003,L-92
|
| 19 |
+
C003_serum_R+1,4.2,3.6,5.1,1.4,1,2.5,41,31,125,19,6,29,18,10,30,0.3,0.2,1.2,,6.0,22.0,9.4,8.6,10.2,20,20,32,103,98,110,0.89,0.5,1.1,102,60,,88,60,,2.9,1.9,3.7,103,65,99,3.0,3.5,5.3,7.1,6.1,8.1,137,135,146,21,7,25,C003,R+1
|
| 20 |
+
C003_serum_R+194,4.4,3.6,5.1,1.5,1,2.5,43,31,125,10,6,29,15,10,30,0.3,0.2,1.2,25.0,6.0,22.0,9.6,8.6,10.2,22,20,32,101,98,110,1.1,0.5,1.1,78,60,,67,60,,2.9,1.9,3.7,73,65,99,3.8,3.5,5.3,7.3,6.1,8.1,137,135,146,28,7,25,C003,R+194
|
| 21 |
+
C003_serum_R+45,4.5,3.6,5.1,1.6,1,2.5,51,31,125,17,6,29,19,10,30,0.7,0.2,1.2,,6.0,22.0,9.6,8.6,10.2,23,20,32,97,98,110,0.94,0.5,1.1,95,60,,82,60,,2.9,1.9,3.7,36,65,99,4.4,3.5,5.3,7.4,6.1,8.1,136,135,146,19,7,25,C003,R+45
|
| 22 |
+
C003_serum_R+82,4.8,3.6,5.1,1.8,1,2.5,50,31,125,12,6,29,15,10,30,0.5,0.2,1.2,,6.0,22.0,10.0,8.6,10.2,23,20,32,102,98,110,0.89,0.5,1.1,101,60,,87,60,,2.7,1.9,3.7,80,65,99,3.7,3.5,5.3,7.5,6.1,8.1,138,135,146,23,7,25,C003,R+82
|
| 23 |
+
C004_serum_L-3,4.6,3.6,5.1,1.7,1,2.5,55,36,130,37,9,46,39,10,40,0.5,0.2,1.2,,6.0,22.0,10.2,8.6,10.3,24,20,32,103,98,110,1.13,0.6,1.35,92,60,,80,60,,2.7,1.9,3.7,56,65,99,3.7,3.5,5.3,7.3,6.1,8.1,141,135,146,19,7,25,C004,L-3
|
| 24 |
+
C004_serum_L-44,4.4,3.6,5.1,1.5,1,2.5,56,36,130,35,9,46,44,10,40,0.7,0.2,1.2,,6.0,22.0,10.1,8.6,10.3,24,20,32,103,98,110,1.08,0.6,1.35,98,60,,85,60,,2.9,1.9,3.7,83,65,99,4.2,3.5,5.3,7.3,6.1,8.1,140,135,146,19,7,25,C004,L-44
|
| 25 |
+
C004_serum_L-92,4.6,3.6,5.1,1.6,1,2.5,58,36,130,37,9,46,36,10,40,0.7,0.2,1.2,,6.0,22.0,10.0,8.6,10.3,23,20,32,102,98,110,1.14,0.6,1.35,92,60,,79,60,,2.8,1.9,3.7,100,65,99,4.2,3.5,5.3,7.4,6.1,8.1,138,135,146,22,7,25,C004,L-92
|
| 26 |
+
C004_serum_R+1,4.5,3.6,5.1,1.6,1,2.5,62,36,130,40,9,46,48,10,40,0.4,0.2,1.2,23.0,6.0,22.0,9.8,8.6,10.3,23,20,32,105,98,110,1.15,0.6,1.35,90,60,,78,60,,2.9,1.9,3.7,97,65,99,4.0,3.5,5.3,7.4,6.1,8.1,140,135,146,26,7,25,C004,R+1
|
| 27 |
+
C004_serum_R+194,4.3,3.6,5.1,1.5,1,2.5,57,36,130,43,9,46,38,10,40,0.5,0.2,1.2,21.0,6.0,22.0,9.8,8.6,10.3,19,20,32,105,98,110,1.31,0.6,1.35,77,60,,67,60,,2.9,1.9,3.7,84,65,99,3.8,3.5,5.3,7.2,6.1,8.1,141,135,146,27,7,25,C004,R+194
|
| 28 |
+
C004_serum_R+45,4.5,3.6,5.1,1.6,1,2.5,60,36,130,32,9,46,33,10,40,0.7,0.2,1.2,,6.0,22.0,9.8,8.6,10.3,24,20,32,102,98,110,1.08,0.6,1.35,98,60,,84,60,,2.8,1.9,3.7,43,65,99,5.9,3.5,5.3,7.3,6.1,8.1,139,135,146,21,7,25,C004,R+45
|
| 29 |
+
C004_serum_R+82,4.6,3.6,5.1,1.6,1,2.5,54,36,130,23,9,46,25,10,40,1.0,0.2,1.2,,6.0,22.0,10.0,8.6,10.3,24,20,32,103,98,110,1.11,0.6,1.35,94,60,,81,60,,2.8,1.9,3.7,80,65,99,4.2,3.5,5.3,7.4,6.1,8.1,140,135,146,16,7,25,C004,R+82
|
final_data/Serum_Cardiovascular.csv
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Sample Name,a2_macroglobulin_concentration_nanogram_per_milliliter,a2_macroglobulin_percent,agp_concentration_nanogram_per_milliliter,agp_percent,crp_concentration_picogram_per_milliliter,crp_percent,fetuin_a36_concentration_nanogram_per_milliliter,fetuin_a36_percent,fibrinogen_concentration_nanogram_per_milliliter,fibrinogen_percent,haptoglobin_concentration_nanogram_per_milliliter,haptoglobin_percent,l_selectin_concentration_picogram_per_milliliter,l_selectin_percent,pf4_concentration_nanogram_per_milliliter,pf4_percent,sap_concentration_picogram_per_milliliter,sap_percent,astronautID,timepoint
|
| 2 |
+
C001_serum_L-3,1002300.0,68.70291244,1214100.0,56.50502833,5131700,28.97219478,155302.07,63.4017261,2780.38,65.8613441,1305900.0,22.43495888,561452.93,57.94458721,8602.35,74.65951867,6057300,66.13248223,C001,L-3
|
| 3 |
+
C001_serum_L-44,657943.56,45.0989113,1745700.0,81.24604889,2191000,12.36979534,194978.74,79.59963875,5707.32,135.1943858,122441.48,2.10350683,675114.37,69.67498324,3903.78,33.88078092,5428000,59.2619011,C001,L-44
|
| 4 |
+
C001_serum_L-92,903742.69,61.94727616,1048700.0,48.80720139,18385000,103.7967537,144424.41,58.9609455,3208.77,76.00900061,1049100.0,18.02321415,515957.2,53.24921356,6973.32,60.52121975,4760000,51.96880052,C001,L-92
|
| 5 |
+
C001_serum_R+1,1048700.0,71.88341243,1034400.0,48.1416698,10637600,60.05702188,163848.05,66.8906035,2604.7,61.69985505,853104.4,14.65607024,596429.89,61.55437425,8143.69,70.6788233,6183500,67.51031051,C001,R+1
|
| 6 |
+
C001_serum_R+194,1651200.0,113.1819306,2595800.0,120.8102731,15415000,87.02893437,273640.25,111.713026,4587.13,108.6594449,55113000.0,946.824327,762532.23,78.6969182,11371.31,98.69123335,7739400,84.49733924,C001,R+194
|
| 7 |
+
C001_serum_R+45,1130800.0,77.51097814,1285200.0,59.81407002,6713900,37.90486944,159096.21,64.95067535,2732.11,64.7179295,1864800.0,32.03668835,568449.05,58.66661975,6374.28,55.32217088,6670900,72.83165366,C001,R+45
|
| 8 |
+
C001_serum_R+82,2993600.0,205.1970854,2455500.0,114.280617,25030000,141.3126323,300149.84,122.5355074,2513.02,59.52814901,13593000.0,233.5235439,1266600.0,130.7190866,23891.75,207.355729,13775000,150.3929049,C001,R+82
|
| 9 |
+
C002_serum_L-3,1892200.0,129.7013378,3653700.0,170.0456486,11967000,67.56245589,294376.04,120.1783663,6533.18,154.7572692,3825900.0,65.72778097,1336800.0,137.9640573,15879.14,137.8145448,11733000,128.0987262,C002,L-3
|
| 10 |
+
C002_serum_L-44,734243.34,50.32889943,1963300.0,91.37329884,1568500,8.855328158,190495.51,77.76937003,4027.43,95.40133114,80330.74,1.380057316,893494.17,92.21280733,3335.42,28.94800279,4509000,49.2284289,C002,L-44
|
| 11 |
+
C002_serum_L-92,2036900.0,139.6198367,2932000.0,136.4572466,8927100,50.4,276877.57,113.0346547,3657.0,86.62662492,3204700.0,55.05575673,1285900.0,132.7109375,13652.06,118.4857892,10983000,119.9103647,C002,L-92
|
| 12 |
+
C002_serum_R+1,913857.15,62.64057444,942754.34,43.87641932,9311500,52.57021877,125785.45,51.35163136,1999.88,47.37294357,568223.15,9.76189831,629653.79,64.98323724,5951.22,51.65044676,5592700,61.06006527,C002,R+1
|
| 13 |
+
C002_serum_R+194,2342900.0,160.5946858,2220700.0,103.3528675,21462000,121.1686662,241485.24,98.58581436,2852.5,67.56971495,6036000.0,103.6966167,1077400.0,111.1927553,21024.9,182.4744301,11337000,123.7752713,C002,R+194
|
| 14 |
+
C002_serum_R+45,1820800.0,124.8072064,2479200.0,115.3836309,3677200,20.76047989,270655.44,110.4944839,4074.55,96.51750466,3442000.0,59.13249748,1247300.0,128.7272357,14071.86,122.1292198,10765000,117.530281,C002,R+45
|
| 15 |
+
C002_serum_R+82,1403700.0,96.21697914,2968900.0,138.1745973,8633700,48.7435427,253558.23,103.5145858,3279.23,77.67804955,17907000.0,307.6367322,1114100.0,114.9803682,12168.67,105.6114951,10210000,111.4708936,C002,R+82
|
| 16 |
+
C003_serum_L-3,2948400.0,202.0988397,1616800.0,75.24695643,42838000,241.8517996,373933.42,152.6574905,4609.49,109.1891062,6263200.0,107.5998426,1128100.0,116.4252341,26633.16,231.1483381,18242000,199.1627855,C003,L-3
|
| 17 |
+
C003_serum_L-44,531433.88,36.4272726,1068100.0,49.71009041,9383100,52.97445307,188579.51,76.98716727,3723.84,88.20992369,203473.61,3.495613809,847821.61,87.49918398,2371.07,20.57844019,5319800,58.08059349,C003,L-44
|
| 18 |
+
C003_serum_L-92,3030400.0,207.7195509,1850900.0,86.14212745,75749000,427.6584333,357755.04,146.0527027,4153.93,98.39784961,17907000.0,307.6367322,1152700.0,118.96407,21999.42,190.9322578,16583000,181.0501301,C003,L-92
|
| 19 |
+
C003_serum_R+1,3051700.0,209.1795649,1559000.0,72.55690567,83617000,472.0790402,334021.3,136.3634559,4531.17,107.3338704,5442300.0,93.497034,1056400.0,109.0254564,28157.23,244.3756926,16745000,182.8188161,C003,R+1
|
| 20 |
+
C003_serum_R+194,2025600.0,138.8452753,2912600.0,135.5543576,9379400,52.95356387,295881.1,120.7928037,4334.63,102.6782519,2571500.0,44.1775762,1398200.0,144.3008265,18173.95,157.7311269,12038000,131.4286598,C003,R+194
|
| 21 |
+
C003_serum_R+45,2895000.0,198.4385229,3046500.0,141.7861534,39190000,221.256175,271898.71,111.0020461,4491.95,106.4048312,14476000.0,248.6932114,1317500.0,135.9722063,25381.08,220.2815761,13993000,152.7729886,C003,R+45
|
| 22 |
+
C003_serum_R+82,2127300.0,145.8163281,3249500.0,151.2339095,12680000,71.58786168,316530.95,129.2230593,4447.08,105.3419555,11995000.0,206.0703972,1058100.0,109.2009044,21375.52,185.5174498,13595000,148.4276981,C003,R+82
|
| 23 |
+
C004_serum_L-3,1780800.0,122.0653961,3606700.0,167.8582371,13742000,77.58362738,329448.71,134.4967062,5428.66,128.5935175,17907000.0,307.6367322,1364800.0,140.8537892,19678.31,170.7874189,12111000,132.2256603,C004,L-3
|
| 24 |
+
C004_serum_L-44,634017.94,43.45892349,2165100.0,100.7652062,1289600,7.280733945,180155.97,73.54827572,3602.38,85.3327922,73870.48,1.269072043,789635.95,81.49414978,2156.35,18.71489222,4048000,44.19531608,C004,L-44
|
| 25 |
+
C004_serum_L-92,1354300.0,92.83084338,2918800.0,135.8429097,21378000,120.6944248,253064.34,103.3129563,3226.41,76.42685504,17907000.0,307.6367322,1075600.0,111.0069868,13080.91,113.5287967,10138000,110.6848109,C004,L-92
|
| 26 |
+
C004_serum_R+1,1651200.0,113.1819306,3338700.0,155.3853374,17701000,99.9350741,280617.78,114.5615871,4187.89,99.20229046,17907000.0,307.6367322,1119400.0,115.5273532,14317.3,124.2593857,12329000,134.605744,C004,R+1
|
| 27 |
+
C004_serum_R+194,1568600.0,107.5200922,4564200.0,212.4209293,46006000,259.7374735,285112.88,116.3967018,4390.9,104.0111696,17907000.0,307.6367322,1250800.0,129.0884521,15624.9,135.608004,13414000,146.4515736,C004,R+194
|
| 28 |
+
C004_serum_R+45,1526000.0,104.6000642,3484600.0,162.1756212,22407000,126.5038814,287113.08,117.213279,2840.5,67.28546023,17907000.0,307.6367322,1256100.0,129.6354371,15095.42,131.0126641,11850000,129.3761105,C004,R+45
|
| 29 |
+
C004_serum_R+82,1785900.0,122.4149769,2651400.0,123.3979344,7902300,44.61425547,245913.55,100.3936621,3553.01,84.16332092,2919100.0,50.14923689,1193000.0,123.1232199,13342.85,115.8021655,9482000,103.5227241,C004,R+82
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==2.3.3
|
| 2 |
+
pandas==2.3.2
|
| 3 |
+
plotly==6.3.0
|
| 4 |
+
scipy==1.16.2
|
| 5 |
+
streamlit==1.50.0
|
scripts/featureEngineering.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
def parse_timepoint(timepoint: str) -> int:
|
| 5 |
+
"""
|
| 6 |
+
Convert timepoint strings like 'L-3', 'L0', 'R+0', 'R+1' into numeric flight days
|
| 7 |
+
on a stretched scale.
|
| 8 |
+
In particular, we are converting the 3 dats of flight into 30 days so there is a
|
| 9 |
+
difference, the final chart will have fake data in it.
|
| 10 |
+
Convention:
|
| 11 |
+
L-0 -> 0 (launch day = Flight Day 0)
|
| 12 |
+
L-3 -> -3 (3 days before launch)
|
| 13 |
+
R+0 -> 30 (last day in space, stretched to day 30)
|
| 14 |
+
R+1 -> 31 (first recovery day)
|
| 15 |
+
R+N -> N+30 (general rule for post-launch days)
|
| 16 |
+
"""
|
| 17 |
+
label = str(timepoint).strip().upper()
|
| 18 |
+
|
| 19 |
+
if label.startswith("L"): # Pre-launch
|
| 20 |
+
number = int(label.replace("L", "").replace("+", "").replace("-", "") or "0")
|
| 21 |
+
return -number
|
| 22 |
+
elif label.startswith("R"): # Return / post-flight
|
| 23 |
+
number = int(label.replace("R", "").replace("+", "").replace("-", "") or "0")
|
| 24 |
+
return number + 30
|
| 25 |
+
|
| 26 |
+
return np.nan
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def add_flight_day(df: pd.DataFrame) -> pd.DataFrame:
|
| 30 |
+
"""
|
| 31 |
+
Add a 'flight_day' column to a dataframe that already has 'timepoint' and 'astronautID'.
|
| 32 |
+
Drops 'Sample Name' if present, since it's redundant.
|
| 33 |
+
"""
|
| 34 |
+
df = df.copy()
|
| 35 |
+
if "timepoint" not in df.columns:
|
| 36 |
+
raise ValueError("DataFrame must contain a 'timepoint' column")
|
| 37 |
+
|
| 38 |
+
# create numeric scale
|
| 39 |
+
df["flight_day"] = df["timepoint"].apply(parse_timepoint)
|
| 40 |
+
|
| 41 |
+
# drop redundant 'Sample Name' if it exists
|
| 42 |
+
if "Sample Name" in df.columns:
|
| 43 |
+
df = df.drop(columns=["Sample Name"])
|
| 44 |
+
|
| 45 |
+
return df
|
| 46 |
+
|
| 47 |
+
def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 48 |
+
"""
|
| 49 |
+
Add derived feature: Anion Gap.
|
| 50 |
+
Anion Gap = Sodium − Chloride − Carbon Dioxide
|
| 51 |
+
"""
|
| 52 |
+
df = df.copy()
|
| 53 |
+
|
| 54 |
+
if all(c in df.columns for c in ["sodium_value", "chloride_value", "carbon_dioxide_value"]):
|
| 55 |
+
df["anion_gap_value"] = (
|
| 56 |
+
df["sodium_value"].astype(float)
|
| 57 |
+
- df["chloride_value"].astype(float)
|
| 58 |
+
- df["carbon_dioxide_value"].astype(float)
|
| 59 |
+
)
|
| 60 |
+
# Placeholders; min/max defined manually in stats.ANALYTE_INFO
|
| 61 |
+
df["anion_gap_range_min"] = np.nan
|
| 62 |
+
df["anion_gap_range_max"] = np.nan
|
| 63 |
+
return df
|
scripts/graphMaking.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import plotly.graph_objects as go
|
| 2 |
+
import plotly.express as px
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
def make_figure(
|
| 7 |
+
tidy_df: pd.DataFrame,
|
| 8 |
+
stats_df: pd.DataFrame,
|
| 9 |
+
analytes: list,
|
| 10 |
+
astronaut_filter=None,
|
| 11 |
+
show_error: str = None
|
| 12 |
+
):
|
| 13 |
+
"""
|
| 14 |
+
Build interactive mission-day plots with stats overlays.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
fig = go.Figure()
|
| 18 |
+
|
| 19 |
+
# Highlight stretched space interval (0 to 30 days)
|
| 20 |
+
fig.add_vrect(x0=0, x1=30, fillcolor="LightGray", opacity=0.3,
|
| 21 |
+
layer="below", line_width=0)
|
| 22 |
+
for day in [10, 20]:
|
| 23 |
+
fig.add_vline(x=day, line=dict(color="white", width=2, dash="dot"),
|
| 24 |
+
layer="below")
|
| 25 |
+
|
| 26 |
+
df = tidy_df.copy()
|
| 27 |
+
|
| 28 |
+
# Apply participant filter
|
| 29 |
+
if astronaut_filter is None:
|
| 30 |
+
pass # show all
|
| 31 |
+
elif isinstance(astronaut_filter, str) and astronaut_filter in ["Male", "Female"]:
|
| 32 |
+
if "sex" in df.columns:
|
| 33 |
+
df = df[df["sex"] == astronaut_filter]
|
| 34 |
+
elif isinstance(astronaut_filter, (list, tuple, set)):
|
| 35 |
+
df = df[df["astronautID"].isin(astronaut_filter)]
|
| 36 |
+
|
| 37 |
+
# Loop analytes requested
|
| 38 |
+
for analyte in analytes:
|
| 39 |
+
subdf = df[df["analyte"] == analyte]
|
| 40 |
+
if subdf.empty:
|
| 41 |
+
print(f"[make_figure] Skipping {analyte} – no data")
|
| 42 |
+
continue
|
| 43 |
+
|
| 44 |
+
## Y-axis scaling
|
| 45 |
+
ref_min = subdf["min"].dropna().min()
|
| 46 |
+
ref_max = subdf["max"].dropna().max()
|
| 47 |
+
data_min = subdf["value"].min()
|
| 48 |
+
data_max = subdf["value"].max()
|
| 49 |
+
|
| 50 |
+
if "unit" in subdf.columns and not subdf["unit"].dropna().empty:
|
| 51 |
+
unit = subdf["unit"].dropna().iloc[0]
|
| 52 |
+
y_label = f"{analyte.title()} ({unit})"
|
| 53 |
+
else:
|
| 54 |
+
y_label = analyte.title()
|
| 55 |
+
|
| 56 |
+
## Add healthy range lines from min / max
|
| 57 |
+
if pd.notna(ref_min):
|
| 58 |
+
fig.add_hline(
|
| 59 |
+
y=ref_min,
|
| 60 |
+
line=dict(color="green", width=2, dash="dot"),
|
| 61 |
+
annotation_text="Min",
|
| 62 |
+
annotation_position="bottom right"
|
| 63 |
+
)
|
| 64 |
+
if pd.notna(ref_max):
|
| 65 |
+
fig.add_hline(
|
| 66 |
+
y=ref_max,
|
| 67 |
+
line=dict(color="green", width=2, dash="dot"),
|
| 68 |
+
annotation_text="Max",
|
| 69 |
+
annotation_position="top right"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
## Decide axis limits: must include BOTH healthy range and all data
|
| 73 |
+
low_candidates = [v for v in [ref_min, data_min] if pd.notna(v)]
|
| 74 |
+
high_candidates = [v for v in [ref_max, data_max] if pd.notna(v)]
|
| 75 |
+
|
| 76 |
+
if low_candidates and high_candidates:
|
| 77 |
+
low = min(low_candidates)
|
| 78 |
+
high = max(high_candidates)
|
| 79 |
+
span = high - low if high > low else 1
|
| 80 |
+
padding = 0.1 * span
|
| 81 |
+
y_range = [low - padding, high + padding]
|
| 82 |
+
else:
|
| 83 |
+
y_range = None
|
| 84 |
+
|
| 85 |
+
## Apply axis update once
|
| 86 |
+
if y_range:
|
| 87 |
+
fig.update_yaxes(title=y_label, range=y_range)
|
| 88 |
+
else:
|
| 89 |
+
fig.update_yaxes(title=y_label)
|
| 90 |
+
|
| 91 |
+
## Plot each astronaut trace - first colors
|
| 92 |
+
palette = px.colors.qualitative.Set2
|
| 93 |
+
astronaut_colors = {astr: palette[i % len(palette)]
|
| 94 |
+
for i, astr in enumerate(subdf["astronautID"].unique())}
|
| 95 |
+
|
| 96 |
+
## Plot each astronaut trace
|
| 97 |
+
for astronaut, adf in subdf.groupby("astronautID"):
|
| 98 |
+
if adf.empty:
|
| 99 |
+
continue
|
| 100 |
+
adf = adf.sort_values("flight_day")
|
| 101 |
+
base_color = astronaut_colors[astronaut]
|
| 102 |
+
|
| 103 |
+
### Skip if astronaut not in filter
|
| 104 |
+
if isinstance(astronaut_filter, (list, tuple, set)) and astronaut not in astronaut_filter:
|
| 105 |
+
continue
|
| 106 |
+
|
| 107 |
+
# Main Scatter Plot
|
| 108 |
+
fig.add_trace(go.Scatter(
|
| 109 |
+
x=adf["flight_day"],
|
| 110 |
+
y=adf["value"],
|
| 111 |
+
mode="lines+markers",
|
| 112 |
+
name=f"{astronaut} ({analyte})",
|
| 113 |
+
hovertext=adf["timepoint"],
|
| 114 |
+
hovertemplate="Day %{hovertext}<br>Value %{y}<extra></extra>",
|
| 115 |
+
line=dict(color=base_color),
|
| 116 |
+
marker=dict(color=base_color)
|
| 117 |
+
))
|
| 118 |
+
|
| 119 |
+
### Within-astronaut error band
|
| 120 |
+
if show_error == "within" and not stats_df.empty:
|
| 121 |
+
stat_rows = stats_df[
|
| 122 |
+
(stats_df["analyte"] == analyte)
|
| 123 |
+
& (stats_df["test_type"] == "within")
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
for _, row in stat_rows.iterrows():
|
| 127 |
+
astronaut = row["astronautID"]
|
| 128 |
+
if astronaut not in subdf["astronautID"].unique():
|
| 129 |
+
continue # skip astronauts not in this analyte subset
|
| 130 |
+
|
| 131 |
+
mean_L = row.get("mean_L", np.nan)
|
| 132 |
+
se = row.get("se_L", np.nan)
|
| 133 |
+
R1 = row.get("R1", np.nan)
|
| 134 |
+
|
| 135 |
+
if pd.isna(mean_L) or pd.isna(se):
|
| 136 |
+
continue
|
| 137 |
+
|
| 138 |
+
base_color = astronaut_colors.get(astronaut, "gray")
|
| 139 |
+
if base_color.startswith("rgb"):
|
| 140 |
+
fill_color = base_color.replace("rgb", "rgba").replace(")", ",0.15)")
|
| 141 |
+
else:
|
| 142 |
+
fill_color = base_color
|
| 143 |
+
|
| 144 |
+
#### Horizontal band: L +/- SE
|
| 145 |
+
fig.add_hrect(
|
| 146 |
+
y0=mean_L - se, y1=mean_L + se,
|
| 147 |
+
fillcolor=fill_color,
|
| 148 |
+
opacity=0.2,
|
| 149 |
+
line_width=0,
|
| 150 |
+
layer="below"
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
#### Asterisk if R+1 outside band
|
| 154 |
+
if pd.notna(R1) and (R1 < mean_L - se or R1 > mean_L + se):
|
| 155 |
+
fig.add_annotation(
|
| 156 |
+
x=31,
|
| 157 |
+
y=R1,
|
| 158 |
+
text="*",
|
| 159 |
+
showarrow=False,
|
| 160 |
+
font=dict(size=20, color="red"),
|
| 161 |
+
yshift=15
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
## Group-level error band
|
| 165 |
+
if show_error == "group" and not stats_df.empty:
|
| 166 |
+
stat_rows = stats_df[
|
| 167 |
+
(stats_df["analyte"] == analyte)
|
| 168 |
+
& (stats_df["test_type"] == "group")
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
for _, row in stat_rows.iterrows():
|
| 172 |
+
mean_L = row.get("mean_L", np.nan)
|
| 173 |
+
n = row.get("n_L", 0)
|
| 174 |
+
|
| 175 |
+
error = np.nan
|
| 176 |
+
if pd.notna(row.get("effect_size")) and n > 1 and row["effect_size"] != 0:
|
| 177 |
+
error = abs(row.get("R1", np.nan) - mean_L) / abs(row["effect_size"])
|
| 178 |
+
if pd.isna(error):
|
| 179 |
+
error = 0
|
| 180 |
+
|
| 181 |
+
#### Filter bands only if stats_df has group info
|
| 182 |
+
should_plot = True
|
| 183 |
+
if "group" in row.index and astronaut_filter is not None:
|
| 184 |
+
group_id = row["group"]
|
| 185 |
+
|
| 186 |
+
if isinstance(astronaut_filter, str) and astronaut_filter in ["Male", "Female"]:
|
| 187 |
+
should_plot = (group_id == astronaut_filter)
|
| 188 |
+
elif isinstance(astronaut_filter, (list, tuple, set)):
|
| 189 |
+
# Only show if group_id matches one of the selected astronauts
|
| 190 |
+
should_plot = (group_id in astronaut_filter)
|
| 191 |
+
|
| 192 |
+
if should_plot and pd.notna(mean_L):
|
| 193 |
+
fig.add_hrect(
|
| 194 |
+
y0=mean_L - error, y1=mean_L + error,
|
| 195 |
+
fillcolor="gray", opacity=0.2,
|
| 196 |
+
layer="below", line_width=0,
|
| 197 |
+
annotation_text = "Group Error Band",
|
| 198 |
+
annotation_position="top left"
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
if row.get("p_value") is not None and row["p_value"] < 0.05:
|
| 202 |
+
fig.add_annotation(
|
| 203 |
+
x=31, # R+1 = 31
|
| 204 |
+
y=row.get("R1", mean_L),
|
| 205 |
+
text="*",
|
| 206 |
+
showarrow=False,
|
| 207 |
+
font=dict(size=20, color="red"),
|
| 208 |
+
yshift=15
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
## Only update range if ref_min/ref_max are valid
|
| 212 |
+
if pd.notna(ref_min) and pd.notna(ref_max):
|
| 213 |
+
fig.update_yaxes(title=y_label,
|
| 214 |
+
range=[ref_min * 0.9, ref_max * 1.1])
|
| 215 |
+
else:
|
| 216 |
+
fig.update_yaxes(title=y_label)
|
| 217 |
+
|
| 218 |
+
# Layout: Build Dynamic Title
|
| 219 |
+
if astronaut_filter is None:
|
| 220 |
+
group_label = "All Participants"
|
| 221 |
+
elif isinstance(astronaut_filter, str) and astronaut_filter in ["Male", "Female"]:
|
| 222 |
+
group_label = f"{astronaut_filter} Participants"
|
| 223 |
+
elif isinstance(astronaut_filter, (list, tuple, set)):
|
| 224 |
+
group_label = "Subset: " + ", ".join(astronaut_filter)
|
| 225 |
+
else:
|
| 226 |
+
group_label = "Participants"
|
| 227 |
+
|
| 228 |
+
# Build analyte label with units if available
|
| 229 |
+
ana_label = ", ".join(analytes)
|
| 230 |
+
unit_label = ""
|
| 231 |
+
subdf = df[df["analyte"] == analytes[0]]
|
| 232 |
+
if "unit" in subdf.columns and not subdf["unit"].dropna().empty:
|
| 233 |
+
unit_label = f" ({subdf['unit'].dropna().iloc[0]})"
|
| 234 |
+
|
| 235 |
+
fig.update_layout(
|
| 236 |
+
title=f"{ana_label.title()}{unit_label} Trends ({group_label})",
|
| 237 |
+
xaxis_title="Mission Day",
|
| 238 |
+
legend_title="Participant / Analyte",
|
| 239 |
+
hovermode="x unified",
|
| 240 |
+
template="plotly_white",
|
| 241 |
+
margin=dict(l=60, r=30, t=60, b=60)
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
# Custom ticks
|
| 245 |
+
ticks = [t for t in sorted(df["flight_day"].dropna().unique()) if pd.notna(t)]
|
| 246 |
+
ticktext = []
|
| 247 |
+
for t in ticks:
|
| 248 |
+
if t >= 30:
|
| 249 |
+
lbl = f"R+{int(t-30)}"
|
| 250 |
+
else:
|
| 251 |
+
lbl = f"L{int(t)}"
|
| 252 |
+
ticktext.append(lbl)
|
| 253 |
+
if ticks:
|
| 254 |
+
fig.update_xaxes(tickmode="array", tickvals=ticks, ticktext=ticktext)
|
| 255 |
+
|
| 256 |
+
return fig
|
scripts/stats.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from scipy import stats
|
| 4 |
+
from .featureEngineering import parse_timepoint
|
| 5 |
+
|
| 6 |
+
# Map analyte base names to human labels + units + reference ranges
|
| 7 |
+
## To get sub and superscripts in Markdown I used ChatGPT: https://chatgpt.com/share/68d9c8f6-2674-8008-8ff7-0731bec9ad49
|
| 8 |
+
ANALYTE_INFO = {
|
| 9 |
+
#Blood Chemistry
|
| 10 |
+
"albumin": {"label": "Albumin", "unit": "g/dL"},
|
| 11 |
+
"alkaline_phosphatase": {"label": "Alkaline Phosphatase", "unit": "U/L"},
|
| 12 |
+
"alt": {"label": "ALT", "unit": "U/L"},
|
| 13 |
+
"ast": {"label": "AST", "unit": "U/L"},
|
| 14 |
+
"total_bilirubin": {"label": "Bilirubin", "unit": "mg/dL"},
|
| 15 |
+
"bun_to_creatinine_ratio": {"label": "BUN/Creatinine Ratio", "unit": ""},
|
| 16 |
+
"calcium": {"label": "Ca²⁺", "unit": "mg/dL"},
|
| 17 |
+
"carbon_dioxide": {"label": "CO₂", "unit": "mmol/L"},
|
| 18 |
+
"chloride": {"label": "Cl⁻", "unit": "mmol/L"},
|
| 19 |
+
"creatinine": {"label": "Creatinine", "unit": "mg/dL"},
|
| 20 |
+
"egfr_african_american": {"label": "eGFR (AA)", "unit": "mL/min/1.73m²"},
|
| 21 |
+
"egfr_non_african_american": {"label": "eGFR (non-AA)", "unit": "mL/min/1.73m²"},
|
| 22 |
+
"globulin": {"label": "Globulin", "unit": "g/dL"},
|
| 23 |
+
"glucose": {"label": "Glucose", "unit": "mg/dL"},
|
| 24 |
+
"potassium": {"label": "K⁺", "unit": "mmol/L"},
|
| 25 |
+
"total_protein": {"label": "Protein", "unit": "g/dL"},
|
| 26 |
+
"sodium": {"label": "Na⁺", "unit": "mmol/L"},
|
| 27 |
+
"urea_nitrogen_bun": {"label": "BUN", "unit": "mg/dL"},
|
| 28 |
+
|
| 29 |
+
# Derived feature
|
| 30 |
+
"anion_gap": {
|
| 31 |
+
"label": "Anion Gap",
|
| 32 |
+
"unit": "mmol/L",
|
| 33 |
+
"min": 8, # manual reference range
|
| 34 |
+
"max": 24
|
| 35 |
+
},
|
| 36 |
+
|
| 37 |
+
## cardiovascular
|
| 38 |
+
## Cardiovascular
|
| 39 |
+
"a2_macroglobulin": {"label": "α₂-Macroglobulin", "unit": "ng/mL"},
|
| 40 |
+
"agp": {"label": "AGP (α1-acid glycoprotein)", "unit": "ng/mL"},
|
| 41 |
+
"crp": {"label": "CRP (C-reactive protein)", "unit": "pg/mL"},
|
| 42 |
+
"fetuin_a36": {"label": "Fetuin A3/6", "unit": "ng/mL"},
|
| 43 |
+
"fibrinogen": {"label": "Fibrinogen", "unit": "ng/mL"},
|
| 44 |
+
"haptoglobin": {"label": "Haptoglobin", "unit": "ng/mL"},
|
| 45 |
+
"l_selectin": {"label": "L-Selectin", "unit": "pg/mL"},
|
| 46 |
+
"pf4": {"label": "Platelet Factor 4", "unit": "ng/mL"},
|
| 47 |
+
"sap": {"label": "SAP (Serum Amyloid P)", "unit": "pg/mL"},
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# Helpers to find columns by prefix (robust to unit suffixes)
|
| 51 |
+
def _first_col_startswith(df: pd.DataFrame, prefixes) -> str | None:
|
| 52 |
+
"""
|
| 53 |
+
Return the first column whose lowercase name starts with any prefix in `prefixes`.
|
| 54 |
+
"""
|
| 55 |
+
if isinstance(prefixes, str):
|
| 56 |
+
prefixes = [prefixes]
|
| 57 |
+
prefixes = [p.lower() for p in prefixes]
|
| 58 |
+
for col in df.columns:
|
| 59 |
+
cl = col.lower()
|
| 60 |
+
for p in prefixes:
|
| 61 |
+
if cl.startswith(p):
|
| 62 |
+
return col
|
| 63 |
+
return None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _value_min_max_cols(df: pd.DataFrame, analyte: str):
|
| 67 |
+
"""
|
| 68 |
+
For a given base analyte name, return (value_col, min_col, max_col).
|
| 69 |
+
Works with clinical chemistry (…_value) and cardiovascular (…_concentration / …_percent).
|
| 70 |
+
"""
|
| 71 |
+
v = _first_col_startswith(df, f"{analyte}_value")
|
| 72 |
+
if v is None:
|
| 73 |
+
v = _first_col_startswith(df, f"{analyte}_concentration")
|
| 74 |
+
|
| 75 |
+
mn = _first_col_startswith(df, [f"{analyte}_range_min", f"{analyte}_min"])
|
| 76 |
+
mx = _first_col_startswith(df, [f"{analyte}_range_max", f"{analyte}_max"])
|
| 77 |
+
|
| 78 |
+
return v, mn, mx
|
| 79 |
+
|
| 80 |
+
# Tidy Transformation
|
| 81 |
+
def tidy_from_wide(df: pd.DataFrame) -> pd.DataFrame:
|
| 82 |
+
"""
|
| 83 |
+
Transform astronaut CSV with value/min/max triplets into tidy format.
|
| 84 |
+
Adds derived analytes (like Anion Gap) using flexible column matching.
|
| 85 |
+
Returns: columns [astronautID, timepoint, flight_day, analyte, value, min, max, unit, label, sex]
|
| 86 |
+
"""
|
| 87 |
+
tidy_records = []
|
| 88 |
+
|
| 89 |
+
# normalize lookup for id/timepoint columns
|
| 90 |
+
colmap = {c.lower(): c for c in df.columns}
|
| 91 |
+
astronaut_col = colmap.get("astronautid")
|
| 92 |
+
timepoint_col = colmap.get("timepoint")
|
| 93 |
+
|
| 94 |
+
if astronaut_col is None or timepoint_col is None:
|
| 95 |
+
raise KeyError("Expected astronautID and timepoint columns in input CSV")
|
| 96 |
+
|
| 97 |
+
for analyte, meta in ANALYTE_INFO.items():
|
| 98 |
+
if analyte == "anion_gap":
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
value_col, min_col, max_col = _value_min_max_cols(df, analyte)
|
| 102 |
+
if value_col is None:
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
for _, row in df.iterrows():
|
| 106 |
+
rec = {
|
| 107 |
+
"astronautID": row[astronaut_col],
|
| 108 |
+
"timepoint": row[timepoint_col],
|
| 109 |
+
"flight_day": parse_timepoint(row[timepoint_col]),
|
| 110 |
+
"analyte": analyte,
|
| 111 |
+
"value": row[value_col],
|
| 112 |
+
"min": (row[min_col] if (min_col and pd.notna(row[min_col])) else meta.get("min")),
|
| 113 |
+
"max": (row[max_col] if (max_col and pd.notna(row[max_col])) else meta.get("max")),
|
| 114 |
+
"label": meta["label"],
|
| 115 |
+
"unit": meta["unit"],
|
| 116 |
+
"sex": "Male" if str(row[astronaut_col]) in ["C001", "C004"] else "Female",
|
| 117 |
+
}
|
| 118 |
+
tidy_records.append(rec)
|
| 119 |
+
|
| 120 |
+
return pd.DataFrame(tidy_records)
|
| 121 |
+
|
| 122 |
+
# Statistical Comparison: R+1 vs L-series
|
| 123 |
+
def analyze_r1_vs_L(tidy: pd.DataFrame) -> pd.DataFrame:
|
| 124 |
+
"""
|
| 125 |
+
Compare R+1 vs L-series for each analyte.
|
| 126 |
+
- Within-astronaut: one-sample t-test (H0: mean(L) == R+1)
|
| 127 |
+
Returns per-astronaut mean, std, SE, t-stat, p-value, and Cohen's d.
|
| 128 |
+
- Across-astronauts (group-level): paired t-test on per-astronaut mean(L) vs R+1
|
| 129 |
+
Returns group mean, std across astronauts, SEM, t-stat, p-value, and Cohen's d.
|
| 130 |
+
"""
|
| 131 |
+
results = []
|
| 132 |
+
for analyte, subdf in tidy.groupby("analyte"):
|
| 133 |
+
|
| 134 |
+
## Within-astronaut tests
|
| 135 |
+
for astronaut, adf in subdf.groupby("astronautID"):
|
| 136 |
+
L_mask = adf["timepoint"].astype(str).str.startswith("L")
|
| 137 |
+
R1_mask = adf["timepoint"].astype(str).isin(["R+1", "R1", "R+01"])
|
| 138 |
+
|
| 139 |
+
L_vals = adf.loc[L_mask, "value"].dropna().astype(float)
|
| 140 |
+
R1_vals = adf.loc[R1_mask, "value"].dropna().astype(float)
|
| 141 |
+
|
| 142 |
+
if len(L_vals) >= 2 and len(R1_vals) == 1:
|
| 143 |
+
R1 = float(R1_vals.iloc[0])
|
| 144 |
+
mean_L = float(L_vals.mean())
|
| 145 |
+
std_L = float(L_vals.std(ddof=1))
|
| 146 |
+
n_L = int(L_vals.shape[0])
|
| 147 |
+
|
| 148 |
+
if std_L > 0:
|
| 149 |
+
se = std_L / np.sqrt(n_L)
|
| 150 |
+
t_stat = (mean_L - R1) / se
|
| 151 |
+
p_val = 2 * (1 - stats.t.cdf(abs(t_stat), df=n_L - 1))
|
| 152 |
+
cohen_d = (R1 - mean_L) / std_L
|
| 153 |
+
else:
|
| 154 |
+
se = t_stat = p_val = cohen_d = np.nan
|
| 155 |
+
|
| 156 |
+
results.append({
|
| 157 |
+
"analyte": analyte,
|
| 158 |
+
"astronautID": astronaut,
|
| 159 |
+
"test_type": "within",
|
| 160 |
+
"n_L": n_L,
|
| 161 |
+
"mean_L": round(mean_L, 2),
|
| 162 |
+
"R1": round(R1, 2),
|
| 163 |
+
"std_L": round(std_L, 2),
|
| 164 |
+
"se_L": round(se, 2) if pd.notna(se) else np.nan,
|
| 165 |
+
"t_stat": round(t_stat, 3) if pd.notna(t_stat) else np.nan,
|
| 166 |
+
"p_value": round(p_val, 4) if pd.notna(p_val) else np.nan,
|
| 167 |
+
"effect_size": round(cohen_d, 3) if pd.notna(cohen_d) else np.nan,
|
| 168 |
+
})
|
| 169 |
+
|
| 170 |
+
## Across-astronauts (paired test)
|
| 171 |
+
astronaut_means, astronaut_R1 = [], []
|
| 172 |
+
for astronaut, adf in subdf.groupby("astronautID"):
|
| 173 |
+
L_mask = adf["timepoint"].astype(str).str.startswith("L")
|
| 174 |
+
R1_mask = adf["timepoint"].astype(str).isin(["R+1", "R1", "R+01"])
|
| 175 |
+
|
| 176 |
+
L_vals = adf.loc[L_mask, "value"].dropna().astype(float)
|
| 177 |
+
R1_vals = adf.loc[R1_mask, "value"].dropna().astype(float)
|
| 178 |
+
|
| 179 |
+
if len(L_vals) >= 2 and len(R1_vals) == 1:
|
| 180 |
+
astronaut_means.append(float(L_vals.mean()))
|
| 181 |
+
astronaut_R1.append(float(R1_vals.iloc[0]))
|
| 182 |
+
|
| 183 |
+
if len(astronaut_means) >= 2:
|
| 184 |
+
diffs = np.array(astronaut_R1) - np.array(astronaut_means)
|
| 185 |
+
t_stat, p_val = stats.ttest_rel(astronaut_R1, astronaut_means)
|
| 186 |
+
|
| 187 |
+
# Group-level variability
|
| 188 |
+
std_L = np.std(astronaut_means, ddof=1)
|
| 189 |
+
se_L = std_L / np.sqrt(len(astronaut_means))
|
| 190 |
+
|
| 191 |
+
cohen_d = diffs.mean() / diffs.std(ddof=1) if diffs.std(ddof=1) > 0 else np.nan
|
| 192 |
+
|
| 193 |
+
results.append({
|
| 194 |
+
"analyte": analyte,
|
| 195 |
+
"astronautID": "ALL",
|
| 196 |
+
"test_type": "group",
|
| 197 |
+
"n_L": len(astronaut_means),
|
| 198 |
+
"mean_L": round(float(np.mean(astronaut_means)), 2),
|
| 199 |
+
"R1": round(float(np.mean(astronaut_R1)), 2),
|
| 200 |
+
"t_stat": round(float(t_stat), 3),
|
| 201 |
+
"p_value": round(float(p_val), 4),
|
| 202 |
+
"effect_size": round(float(cohen_d), 3) if pd.notna(cohen_d) else np.nan,
|
| 203 |
+
})
|
| 204 |
+
|
| 205 |
+
return pd.DataFrame(results)
|