Spaces:

DTanzillo
/

Inspiration-Health-Data

Sleeping

File size: 2,309 Bytes

1cfa929

import numpy as np
import pandas as pd

def parse_timepoint(timepoint: str) -> int:
    """

    Convert timepoint strings like 'L-3', 'L0', 'R+0', 'R+1' into numeric flight days

    on a stretched scale.

    In particular, we are converting the 3 dats of flight into 30 days so there is a

    difference, the final chart will have fake data in it.

    Convention:

        L-0 ->   0   (launch day = Flight Day 0)

        L-3 ->  -3   (3 days before launch)

        R+0 ->  30   (last day in space, stretched to day 30)

        R+1 ->  31   (first recovery day)

        R+N ->  N+30 (general rule for post-launch days)

    """
    label = str(timepoint).strip().upper()

    if label.startswith("L"):  # Pre-launch
        number = int(label.replace("L", "").replace("+", "").replace("-", "") or "0")
        return -number
    elif label.startswith("R"):  # Return / post-flight
        number = int(label.replace("R", "").replace("+", "").replace("-", "") or "0")
        return number + 30

    return np.nan


def add_flight_day(df: pd.DataFrame) -> pd.DataFrame:
    """

    Add a 'flight_day' column to a dataframe that already has 'timepoint' and 'astronautID'.

    Drops 'Sample Name' if present, since it's redundant.

    """
    df = df.copy()
    if "timepoint" not in df.columns:
        raise ValueError("DataFrame must contain a 'timepoint' column")

    # create numeric scale
    df["flight_day"] = df["timepoint"].apply(parse_timepoint)

    # drop redundant 'Sample Name' if it exists
    if "Sample Name" in df.columns:
        df = df.drop(columns=["Sample Name"])

    return df

def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
    """

    Add derived feature: Anion Gap.

    Anion Gap = Sodium − Chloride − Carbon Dioxide

    """
    df = df.copy()

    if all(c in df.columns for c in ["sodium_value", "chloride_value", "carbon_dioxide_value"]):
        df["anion_gap_value"] = (
            df["sodium_value"].astype(float)
            - df["chloride_value"].astype(float)
            - df["carbon_dioxide_value"].astype(float)
        )
        # Placeholders; min/max defined manually in stats.ANALYTE_INFO
        df["anion_gap_range_min"] = np.nan
        df["anion_gap_range_max"] = np.nan
    return df