Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| def parse_timepoint(timepoint: str) -> int: | |
| """ | |
| Convert timepoint strings like 'L-3', 'L0', 'R+0', 'R+1' into numeric flight days | |
| on a stretched scale. | |
| In particular, we are converting the 3 dats of flight into 30 days so there is a | |
| difference, the final chart will have fake data in it. | |
| Convention: | |
| L-0 -> 0 (launch day = Flight Day 0) | |
| L-3 -> -3 (3 days before launch) | |
| R+0 -> 30 (last day in space, stretched to day 30) | |
| R+1 -> 31 (first recovery day) | |
| R+N -> N+30 (general rule for post-launch days) | |
| """ | |
| label = str(timepoint).strip().upper() | |
| if label.startswith("L"): # Pre-launch | |
| number = int(label.replace("L", "").replace("+", "").replace("-", "") or "0") | |
| return -number | |
| elif label.startswith("R"): # Return / post-flight | |
| number = int(label.replace("R", "").replace("+", "").replace("-", "") or "0") | |
| return number + 30 | |
| return np.nan | |
| def add_flight_day(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Add a 'flight_day' column to a dataframe that already has 'timepoint' and 'astronautID'. | |
| Drops 'Sample Name' if present, since it's redundant. | |
| """ | |
| df = df.copy() | |
| if "timepoint" not in df.columns: | |
| raise ValueError("DataFrame must contain a 'timepoint' column") | |
| # create numeric scale | |
| df["flight_day"] = df["timepoint"].apply(parse_timepoint) | |
| # drop redundant 'Sample Name' if it exists | |
| if "Sample Name" in df.columns: | |
| df = df.drop(columns=["Sample Name"]) | |
| return df | |
| def add_derived_features(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Add derived feature: Anion Gap. | |
| Anion Gap = Sodium − Chloride − Carbon Dioxide | |
| """ | |
| df = df.copy() | |
| if all(c in df.columns for c in ["sodium_value", "chloride_value", "carbon_dioxide_value"]): | |
| df["anion_gap_value"] = ( | |
| df["sodium_value"].astype(float) | |
| - df["chloride_value"].astype(float) | |
| - df["carbon_dioxide_value"].astype(float) | |
| ) | |
| # Placeholders; min/max defined manually in stats.ANALYTE_INFO | |
| df["anion_gap_range_min"] = np.nan | |
| df["anion_gap_range_max"] = np.nan | |
| return df |