Spaces:
Sleeping
Sleeping
File size: 2,309 Bytes
1cfa929 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import numpy as np
import pandas as pd
def parse_timepoint(timepoint: str) -> int:
"""
Convert timepoint strings like 'L-3', 'L0', 'R+0', 'R+1' into numeric flight days
on a stretched scale.
In particular, we are converting the 3 dats of flight into 30 days so there is a
difference, the final chart will have fake data in it.
Convention:
L-0 -> 0 (launch day = Flight Day 0)
L-3 -> -3 (3 days before launch)
R+0 -> 30 (last day in space, stretched to day 30)
R+1 -> 31 (first recovery day)
R+N -> N+30 (general rule for post-launch days)
"""
label = str(timepoint).strip().upper()
if label.startswith("L"): # Pre-launch
number = int(label.replace("L", "").replace("+", "").replace("-", "") or "0")
return -number
elif label.startswith("R"): # Return / post-flight
number = int(label.replace("R", "").replace("+", "").replace("-", "") or "0")
return number + 30
return np.nan
def add_flight_day(df: pd.DataFrame) -> pd.DataFrame:
"""
Add a 'flight_day' column to a dataframe that already has 'timepoint' and 'astronautID'.
Drops 'Sample Name' if present, since it's redundant.
"""
df = df.copy()
if "timepoint" not in df.columns:
raise ValueError("DataFrame must contain a 'timepoint' column")
# create numeric scale
df["flight_day"] = df["timepoint"].apply(parse_timepoint)
# drop redundant 'Sample Name' if it exists
if "Sample Name" in df.columns:
df = df.drop(columns=["Sample Name"])
return df
def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
"""
Add derived feature: Anion Gap.
Anion Gap = Sodium − Chloride − Carbon Dioxide
"""
df = df.copy()
if all(c in df.columns for c in ["sodium_value", "chloride_value", "carbon_dioxide_value"]):
df["anion_gap_value"] = (
df["sodium_value"].astype(float)
- df["chloride_value"].astype(float)
- df["carbon_dioxide_value"].astype(float)
)
# Placeholders; min/max defined manually in stats.ANALYTE_INFO
df["anion_gap_range_min"] = np.nan
df["anion_gap_range_max"] = np.nan
return df |