spMetaTME-Atlas / src /backend /flux_distribution.py
Surajv's picture
initial commit
31d5c57
"""
Backend helpers for flux distribution analysis across domains.
Provides:
- adata_to_long_df : tidy long-format DataFrame from AnnData
- compute_domain_stats: Welch t-tests + FDR correction per (reaction, domain)
- p_to_star : p-value -> significance star string
"""
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from scipy.sparse import issparse
try:
from statsmodels.stats.multitest import multipletests
_HAS_STATSMODELS = True
except ImportError:
_HAS_STATSMODELS = False
# ---------------------------------------------------------------------------
# Core helpers
# ---------------------------------------------------------------------------
def p_to_star(p: float) -> str:
"""Convert a p-value to a significance annotation string."""
if p < 1e-4:
return "****"
elif p < 1e-3:
return "***"
elif p < 1e-2:
return "**"
elif p < 0.05:
return "*"
return "ns"
def adata_to_long_df(adata, reactions=None) -> pd.DataFrame:
"""
Convert an AnnData object to a tidy long-format DataFrame.
Parameters
----------
adata : AnnData
Must have obs['domain'] and (optionally) obs['condition'].
reactions : list[str] | None
Subset of adata.var_names to include. None = all reactions.
Returns
-------
pd.DataFrame with columns: spot, domain, condition, reaction, flux
"""
if reactions is None:
reactions = adata.var_names.tolist()
else:
reactions = [r for r in reactions if r in adata.var_names]
sub = adata[:, reactions]
X = sub.X.toarray() if issparse(sub.X) else np.array(sub.X)
df = pd.DataFrame(X, columns=reactions, index=sub.obs_names)
df["domain"] = sub.obs["domain"].astype(str).values
df["condition"] = sub.obs.get("condition", pd.Series("all", index=sub.obs_names)).astype(str).values
long = df.melt(
id_vars=["domain", "condition"],
var_name="reaction",
value_name="flux"
)
return long
def compute_domain_stats(df_long: pd.DataFrame) -> pd.DataFrame:
"""
Welch t-test for each (reaction, domain) pair between the two conditions.
Applies FDR-BH correction across all tests.
Returns a DataFrame with columns:
reaction, domain, pvalue, p_adj, signif
"""
results = []
for (rxn, dom), sub in df_long.groupby(["reaction", "domain"]):
conds = sub["condition"].unique()
if len(conds) != 2:
continue
g1 = sub[sub["condition"] == conds[0]]["flux"].dropna()
g2 = sub[sub["condition"] == conds[1]]["flux"].dropna()
if len(g1) < 2 or len(g2) < 2:
continue
stat, p = ttest_ind(g1, g2, equal_var=False, nan_policy="omit")
results.append({"reaction": rxn, "domain": dom, "pvalue": p})
if not results:
return pd.DataFrame(columns=["reaction", "domain", "pvalue", "p_adj", "signif"])
ttest_df = pd.DataFrame(results)
if _HAS_STATSMODELS:
ttest_df["p_adj"] = multipletests(ttest_df["pvalue"], method="fdr_bh")[1]
else:
ttest_df["p_adj"] = ttest_df["pvalue"] # fallback: no correction
ttest_df["signif"] = ttest_df["p_adj"].apply(p_to_star)
return ttest_df