""" Backend helpers for flux distribution analysis across domains. Provides: - adata_to_long_df : tidy long-format DataFrame from AnnData - compute_domain_stats: Welch t-tests + FDR correction per (reaction, domain) - p_to_star : p-value -> significance star string """ import numpy as np import pandas as pd from scipy.stats import ttest_ind from scipy.sparse import issparse try: from statsmodels.stats.multitest import multipletests _HAS_STATSMODELS = True except ImportError: _HAS_STATSMODELS = False # --------------------------------------------------------------------------- # Core helpers # --------------------------------------------------------------------------- def p_to_star(p: float) -> str: """Convert a p-value to a significance annotation string.""" if p < 1e-4: return "****" elif p < 1e-3: return "***" elif p < 1e-2: return "**" elif p < 0.05: return "*" return "ns" def adata_to_long_df(adata, reactions=None) -> pd.DataFrame: """ Convert an AnnData object to a tidy long-format DataFrame. Parameters ---------- adata : AnnData Must have obs['domain'] and (optionally) obs['condition']. reactions : list[str] | None Subset of adata.var_names to include. None = all reactions. Returns ------- pd.DataFrame with columns: spot, domain, condition, reaction, flux """ if reactions is None: reactions = adata.var_names.tolist() else: reactions = [r for r in reactions if r in adata.var_names] sub = adata[:, reactions] X = sub.X.toarray() if issparse(sub.X) else np.array(sub.X) df = pd.DataFrame(X, columns=reactions, index=sub.obs_names) df["domain"] = sub.obs["domain"].astype(str).values df["condition"] = sub.obs.get("condition", pd.Series("all", index=sub.obs_names)).astype(str).values long = df.melt( id_vars=["domain", "condition"], var_name="reaction", value_name="flux" ) return long def compute_domain_stats(df_long: pd.DataFrame) -> pd.DataFrame: """ Welch t-test for each (reaction, domain) pair between the two conditions. Applies FDR-BH correction across all tests. Returns a DataFrame with columns: reaction, domain, pvalue, p_adj, signif """ results = [] for (rxn, dom), sub in df_long.groupby(["reaction", "domain"]): conds = sub["condition"].unique() if len(conds) != 2: continue g1 = sub[sub["condition"] == conds[0]]["flux"].dropna() g2 = sub[sub["condition"] == conds[1]]["flux"].dropna() if len(g1) < 2 or len(g2) < 2: continue stat, p = ttest_ind(g1, g2, equal_var=False, nan_policy="omit") results.append({"reaction": rxn, "domain": dom, "pvalue": p}) if not results: return pd.DataFrame(columns=["reaction", "domain", "pvalue", "p_adj", "signif"]) ttest_df = pd.DataFrame(results) if _HAS_STATSMODELS: ttest_df["p_adj"] = multipletests(ttest_df["pvalue"], method="fdr_bh")[1] else: ttest_df["p_adj"] = ttest_df["pvalue"] # fallback: no correction ttest_df["signif"] = ttest_df["p_adj"].apply(p_to_star) return ttest_df