Spaces:
Sleeping
Sleeping
| """ | |
| Backend helpers for flux distribution analysis across domains. | |
| Provides: | |
| - adata_to_long_df : tidy long-format DataFrame from AnnData | |
| - compute_domain_stats: Welch t-tests + FDR correction per (reaction, domain) | |
| - p_to_star : p-value -> significance star string | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from scipy.stats import ttest_ind | |
| from scipy.sparse import issparse | |
| try: | |
| from statsmodels.stats.multitest import multipletests | |
| _HAS_STATSMODELS = True | |
| except ImportError: | |
| _HAS_STATSMODELS = False | |
| # --------------------------------------------------------------------------- | |
| # Core helpers | |
| # --------------------------------------------------------------------------- | |
| def p_to_star(p: float) -> str: | |
| """Convert a p-value to a significance annotation string.""" | |
| if p < 1e-4: | |
| return "****" | |
| elif p < 1e-3: | |
| return "***" | |
| elif p < 1e-2: | |
| return "**" | |
| elif p < 0.05: | |
| return "*" | |
| return "ns" | |
| def adata_to_long_df(adata, reactions=None) -> pd.DataFrame: | |
| """ | |
| Convert an AnnData object to a tidy long-format DataFrame. | |
| Parameters | |
| ---------- | |
| adata : AnnData | |
| Must have obs['domain'] and (optionally) obs['condition']. | |
| reactions : list[str] | None | |
| Subset of adata.var_names to include. None = all reactions. | |
| Returns | |
| ------- | |
| pd.DataFrame with columns: spot, domain, condition, reaction, flux | |
| """ | |
| if reactions is None: | |
| reactions = adata.var_names.tolist() | |
| else: | |
| reactions = [r for r in reactions if r in adata.var_names] | |
| sub = adata[:, reactions] | |
| X = sub.X.toarray() if issparse(sub.X) else np.array(sub.X) | |
| df = pd.DataFrame(X, columns=reactions, index=sub.obs_names) | |
| df["domain"] = sub.obs["domain"].astype(str).values | |
| df["condition"] = sub.obs.get("condition", pd.Series("all", index=sub.obs_names)).astype(str).values | |
| long = df.melt( | |
| id_vars=["domain", "condition"], | |
| var_name="reaction", | |
| value_name="flux" | |
| ) | |
| return long | |
| def compute_domain_stats(df_long: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Welch t-test for each (reaction, domain) pair between the two conditions. | |
| Applies FDR-BH correction across all tests. | |
| Returns a DataFrame with columns: | |
| reaction, domain, pvalue, p_adj, signif | |
| """ | |
| results = [] | |
| for (rxn, dom), sub in df_long.groupby(["reaction", "domain"]): | |
| conds = sub["condition"].unique() | |
| if len(conds) != 2: | |
| continue | |
| g1 = sub[sub["condition"] == conds[0]]["flux"].dropna() | |
| g2 = sub[sub["condition"] == conds[1]]["flux"].dropna() | |
| if len(g1) < 2 or len(g2) < 2: | |
| continue | |
| stat, p = ttest_ind(g1, g2, equal_var=False, nan_policy="omit") | |
| results.append({"reaction": rxn, "domain": dom, "pvalue": p}) | |
| if not results: | |
| return pd.DataFrame(columns=["reaction", "domain", "pvalue", "p_adj", "signif"]) | |
| ttest_df = pd.DataFrame(results) | |
| if _HAS_STATSMODELS: | |
| ttest_df["p_adj"] = multipletests(ttest_df["pvalue"], method="fdr_bh")[1] | |
| else: | |
| ttest_df["p_adj"] = ttest_df["pvalue"] # fallback: no correction | |
| ttest_df["signif"] = ttest_df["p_adj"].apply(p_to_star) | |
| return ttest_df | |