hackathon / server /tasks /bio_palette.py
Ev3Dev's picture
Upload folder using huggingface_hub
5c3cfae verified
"""Curated biological building blocks for procedural scenario generation.
Provides tissue-specific cell types, disease profiles, pathway libraries,
regulatory network templates, and perturbation effect profiles. The
procedural generator composes these into complete ``Scenario`` objects
with fully populated ``LatentBiologicalState``.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
# ── Cell type templates ─────────────────────────────────────────────────────
@dataclass
class CellTypeTemplate:
name: str
marker_genes: List[str]
proportion_range: Tuple[float, float] = (0.05, 0.30)
states: List[str] = field(default_factory=lambda: ["quiescent"])
disease_responsive: bool = False
response_range: Tuple[float, float] = (0.5, 1.5)
TISSUE_CELL_TYPES: Dict[str, List[CellTypeTemplate]] = {
"heart": [
CellTypeTemplate("cardiomyocyte", ["TNNT2", "MYH7", "ACTC1"], (0.25, 0.40), ["contractile", "stressed"]),
CellTypeTemplate("cardiac_fibroblast", ["COL1A1", "DCN", "LUM"], (0.15, 0.30), ["quiescent", "activated"], True, (1.1, 1.8)),
CellTypeTemplate("endothelial", ["PECAM1", "VWF", "CDH5"], (0.10, 0.20), ["quiescent"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.05, 0.15), ["quiescent", "activated", "inflammatory"], True, (1.2, 2.0)),
CellTypeTemplate("smooth_muscle", ["ACTA2", "MYH11", "TAGLN"], (0.08, 0.18), ["quiescent"]),
CellTypeTemplate("pericyte", ["PDGFRB", "RGS5", "NOTCH3"], (0.03, 0.10), ["quiescent"]),
],
"lung": [
CellTypeTemplate("AT2", ["SFTPC", "SFTPB", "ABCA3"], (0.15, 0.25), ["normal", "stressed"]),
CellTypeTemplate("AT1", ["AGER", "PDPN", "CAV1"], (0.10, 0.18), ["normal"]),
CellTypeTemplate("alveolar_macrophage", ["MARCO", "FABP4", "MCEMP1"], (0.10, 0.20), ["resident", "activated"]),
CellTypeTemplate("fibroblast", ["COL1A1", "COL3A1", "POSTN"], (0.12, 0.25), ["quiescent", "activated"], True, (1.2, 2.0)),
CellTypeTemplate("endothelial", ["PECAM1", "CLDN5", "VWF"], (0.08, 0.15), ["quiescent"]),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "IL7R"], (0.08, 0.18), ["quiescent", "activated"]),
CellTypeTemplate("ciliated", ["FOXJ1", "DNAH5", "TPPP3"], (0.05, 0.12), ["normal"]),
],
"brain": [
CellTypeTemplate("excitatory_neuron", ["SLC17A7", "CAMK2A", "NRGN"], (0.25, 0.40), ["normal", "stressed"]),
CellTypeTemplate("inhibitory_neuron", ["GAD1", "GAD2", "SLC32A1"], (0.12, 0.22), ["normal"]),
CellTypeTemplate("astrocyte", ["GFAP", "AQP4", "SLC1A3"], (0.10, 0.20), ["quiescent", "activated"], True, (1.2, 1.8)),
CellTypeTemplate("microglia", ["CX3CR1", "P2RY12", "TMEM119"], (0.05, 0.12), ["homeostatic", "activated", "inflammatory"], True, (1.3, 2.5)),
CellTypeTemplate("oligodendrocyte", ["MBP", "PLP1", "MOG"], (0.10, 0.18), ["myelinating"]),
CellTypeTemplate("OPC", ["PDGFRA", "CSPG4", "OLIG2"], (0.03, 0.08), ["progenitor"]),
CellTypeTemplate("endothelial", ["CLDN5", "FLT1", "PECAM1"], (0.03, 0.08), ["quiescent"]),
],
"liver": [
CellTypeTemplate("hepatocyte", ["ALB", "APOB", "CYP3A4"], (0.55, 0.70), ["normal", "stressed"]),
CellTypeTemplate("cholangiocyte", ["KRT19", "KRT7", "EPCAM"], (0.05, 0.10), ["normal"]),
CellTypeTemplate("kupffer_cell", ["CD68", "CLEC4F", "MARCO"], (0.08, 0.15), ["quiescent", "activated", "inflammatory"], True, (1.2, 2.0)),
CellTypeTemplate("stellate_cell", ["ACTA2", "LRAT", "PDGFRB"], (0.05, 0.12), ["quiescent", "activated"], True, (1.3, 2.0)),
CellTypeTemplate("endothelial", ["PECAM1", "LYVE1", "STAB2"], (0.05, 0.10), ["quiescent"]),
CellTypeTemplate("NK_cell", ["NKG7", "GNLY", "KLRD1"], (0.03, 0.08), ["quiescent", "activated"]),
],
"bone_marrow": [
CellTypeTemplate("HSC", ["CD34", "KIT", "THY1"], (0.03, 0.08), ["stem"]),
CellTypeTemplate("CMP", ["CD34", "FLT3"], (0.08, 0.15), ["progenitor"]),
CellTypeTemplate("GMP", ["CSF3R", "CEBPA"], (0.08, 0.15), ["progenitor"]),
CellTypeTemplate("MEP", ["GATA1", "KLF1"], (0.06, 0.12), ["progenitor"]),
CellTypeTemplate("erythrocyte", ["HBA1", "HBB", "GYPA"], (0.15, 0.25), ["mature"]),
CellTypeTemplate("neutrophil", ["ELANE", "MPO", "CTSG"], (0.12, 0.22), ["mature"]),
CellTypeTemplate("monocyte", ["CD14", "CSF1R", "FCGR3A"], (0.10, 0.18), ["mature"]),
CellTypeTemplate("megakaryocyte", ["ITGA2B", "GP1BA", "PF4"], (0.05, 0.12), ["mature"]),
],
"kidney": [
CellTypeTemplate("proximal_tubule", ["SLC34A1", "LRP2", "CUBN"], (0.30, 0.45), ["normal", "stressed"]),
CellTypeTemplate("distal_tubule", ["SLC12A3", "CALB1"], (0.10, 0.18), ["normal"]),
CellTypeTemplate("collecting_duct", ["AQP2", "FXYD4"], (0.08, 0.15), ["normal"]),
CellTypeTemplate("podocyte", ["NPHS1", "NPHS2", "WT1"], (0.05, 0.10), ["normal", "stressed"]),
CellTypeTemplate("endothelial", ["PECAM1", "EMCN", "FLT1"], (0.05, 0.12), ["quiescent"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.05, 0.10), ["quiescent", "inflammatory"], True, (1.3, 2.0)),
CellTypeTemplate("fibroblast", ["COL1A1", "PDGFRA", "DCN"], (0.05, 0.12), ["quiescent", "activated"], True, (1.2, 1.8)),
],
"colon": [
CellTypeTemplate("colonocyte", ["CA2", "AQP8", "SLC26A3"], (0.25, 0.40), ["normal", "stressed"]),
CellTypeTemplate("goblet_cell", ["MUC2", "TFF3", "FCGBP"], (0.10, 0.18), ["secretory"]),
CellTypeTemplate("stem_cell", ["LGR5", "ASCL2", "OLFM4"], (0.05, 0.10), ["stem"]),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "IL7R"], (0.10, 0.18), ["quiescent", "activated"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.05, 0.12), ["quiescent", "inflammatory"], True, (1.3, 2.0)),
CellTypeTemplate("fibroblast", ["COL1A1", "COL3A1", "VIM"], (0.08, 0.15), ["quiescent", "activated"], True, (1.2, 1.8)),
CellTypeTemplate("endothelial", ["PECAM1", "VWF", "CDH5"], (0.05, 0.10), ["quiescent"]),
],
"pancreas": [
CellTypeTemplate("beta_cell", ["INS", "MAFA", "NKX6-1"], (0.25, 0.40), ["normal", "stressed"], True, (0.4, 0.8)),
CellTypeTemplate("alpha_cell", ["GCG", "ARX", "IRX2"], (0.15, 0.25), ["normal"]),
CellTypeTemplate("delta_cell", ["SST", "HHEX"], (0.05, 0.10), ["normal"]),
CellTypeTemplate("ductal", ["KRT19", "SOX9", "CFTR"], (0.10, 0.18), ["normal"]),
CellTypeTemplate("acinar", ["PRSS1", "CPA1", "CELA3A"], (0.10, 0.20), ["normal"]),
CellTypeTemplate("stellate", ["ACTA2", "PDGFRA", "COL1A1"], (0.05, 0.10), ["quiescent", "activated"], True, (1.2, 1.8)),
CellTypeTemplate("macrophage", ["CD68", "CD163"], (0.03, 0.08), ["quiescent", "inflammatory"]),
],
"skin": [
CellTypeTemplate("keratinocyte", ["KRT14", "KRT5", "KRT1"], (0.40, 0.55), ["basal", "differentiated"]),
CellTypeTemplate("melanocyte", ["MLANA", "PMEL", "TYR"], (0.05, 0.10), ["normal", "activated"]),
CellTypeTemplate("fibroblast", ["COL1A1", "COL3A1", "DCN"], (0.10, 0.20), ["quiescent", "activated"]),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "IL7R"], (0.08, 0.15), ["quiescent", "activated"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.05, 0.10), ["quiescent", "inflammatory"]),
CellTypeTemplate("endothelial", ["PECAM1", "VWF"], (0.05, 0.10), ["quiescent"]),
],
"breast": [
CellTypeTemplate("luminal_epithelial", ["KRT8", "KRT18", "EPCAM"], (0.25, 0.40), ["normal", "stressed"]),
CellTypeTemplate("basal_epithelial", ["KRT14", "KRT5", "TP63"], (0.10, 0.20), ["normal"]),
CellTypeTemplate("fibroblast", ["COL1A1", "COL3A1", "FAP"], (0.10, 0.20), ["quiescent", "activated"], True, (1.2, 1.8)),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "CD8A"], (0.08, 0.15), ["quiescent", "activated", "exhausted"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.05, 0.12), ["quiescent", "inflammatory"], True, (1.3, 2.0)),
CellTypeTemplate("endothelial", ["PECAM1", "VWF", "CDH5"], (0.05, 0.10), ["quiescent"]),
],
"synovium": [
CellTypeTemplate("fibroblast", ["COL1A1", "FAP", "THY1"], (0.20, 0.30), ["quiescent", "activated"], True, (1.2, 1.8)),
CellTypeTemplate("CD4_T_cell", ["CD3D", "CD4", "IL7R"], (0.12, 0.22), ["quiescent", "activated"]),
CellTypeTemplate("CD8_T_cell", ["CD3D", "CD8A", "GZMB"], (0.08, 0.15), ["quiescent", "activated"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "MARCO"], (0.10, 0.18), ["quiescent", "inflammatory"], True, (1.3, 2.0)),
CellTypeTemplate("B_cell", ["CD19", "MS4A1", "CD79A"], (0.05, 0.12), ["quiescent"]),
CellTypeTemplate("endothelial", ["PECAM1", "VWF"], (0.05, 0.10), ["quiescent"]),
CellTypeTemplate("mast_cell", ["KIT", "TPSAB1", "CPA3"], (0.03, 0.08), ["quiescent"]),
],
"aorta": [
CellTypeTemplate("smooth_muscle", ["ACTA2", "MYH11", "TAGLN"], (0.30, 0.45), ["contractile", "synthetic"], True, (0.6, 0.9)),
CellTypeTemplate("endothelial", ["PECAM1", "VWF", "CDH5"], (0.15, 0.25), ["quiescent", "activated"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "TREM2"], (0.08, 0.15), ["quiescent", "inflammatory"], True, (1.5, 2.5)),
CellTypeTemplate("fibroblast", ["COL1A1", "LUM", "DCN"], (0.08, 0.15), ["quiescent", "activated"]),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "IL7R"], (0.05, 0.12), ["quiescent", "activated"]),
CellTypeTemplate("dendritic_cell", ["FCER1A", "CD1C", "CLEC10A"], (0.03, 0.08), ["quiescent"]),
],
"blood": [
CellTypeTemplate("CD4_T_cell", ["CD3D", "CD4", "IL7R"], (0.15, 0.25), ["quiescent", "activated"]),
CellTypeTemplate("CD8_T_cell", ["CD3D", "CD8A", "GZMB"], (0.10, 0.18), ["quiescent", "activated"]),
CellTypeTemplate("B_cell", ["CD19", "MS4A1", "CD79A"], (0.08, 0.15), ["quiescent"]),
CellTypeTemplate("NK_cell", ["NKG7", "GNLY", "KLRD1"], (0.05, 0.12), ["quiescent", "activated"]),
CellTypeTemplate("monocyte", ["CD14", "CSF1R", "FCGR3A"], (0.15, 0.25), ["classical", "non_classical"]),
CellTypeTemplate("neutrophil", ["ELANE", "MPO", "CTSG"], (0.10, 0.20), ["mature"]),
CellTypeTemplate("platelet", ["ITGA2B", "GP1BA", "PF4"], (0.03, 0.08), ["normal"]),
],
"spleen": [
CellTypeTemplate("B_cell", ["CD19", "MS4A1", "CD79A"], (0.20, 0.35), ["quiescent", "activated"]),
CellTypeTemplate("T_cell", ["CD3D", "CD3E", "IL7R"], (0.15, 0.25), ["quiescent", "activated"]),
CellTypeTemplate("macrophage", ["CD68", "CD163", "CSF1R"], (0.10, 0.18), ["quiescent", "inflammatory"]),
CellTypeTemplate("dendritic_cell", ["FCER1A", "CD1C", "CLEC10A"], (0.05, 0.10), ["quiescent"]),
CellTypeTemplate("NK_cell", ["NKG7", "GNLY", "KLRD1"], (0.05, 0.12), ["quiescent"]),
CellTypeTemplate("endothelial", ["PECAM1", "STAB2"], (0.05, 0.10), ["quiescent"]),
],
"thymus": [
CellTypeTemplate("double_negative_T", ["CD3D", "PTCRA"], (0.10, 0.18), ["progenitor"]),
CellTypeTemplate("double_positive_T", ["CD3D", "CD4", "CD8A"], (0.30, 0.45), ["progenitor"]),
CellTypeTemplate("CD4_SP", ["CD3D", "CD4", "IL7R"], (0.10, 0.18), ["mature"]),
CellTypeTemplate("CD8_SP", ["CD3D", "CD8A", "CD8B"], (0.08, 0.15), ["mature"]),
CellTypeTemplate("thymic_epithelial", ["FOXN1", "KRT5", "KRT8"], (0.05, 0.12), ["cortical", "medullary"]),
CellTypeTemplate("dendritic_cell", ["FCER1A", "CD1C"], (0.03, 0.08), ["quiescent"]),
CellTypeTemplate("macrophage", ["CD68", "CD163"], (0.03, 0.08), ["quiescent"]),
],
}
# ── Disease profiles ────────────────────────────────────────────────────────
@dataclass
class DiseaseProfile:
name: str
display_name: str
tissue: str
condition_name: str
de_genes: Dict[str, Tuple[float, float]]
pathways: Dict[str, Tuple[float, float]]
markers: List[str]
mechanism_templates: List[str]
responding_cell_types: List[str] = field(default_factory=list)
hidden_failure_templates: List[str] = field(default_factory=list)
DISEASE_PROFILES: Dict[str, DiseaseProfile] = {
"dilated_cardiomyopathy": DiseaseProfile(
name="dilated_cardiomyopathy",
display_name="dilated cardiomyopathy",
tissue="heart",
condition_name="dilated_cardiomyopathy",
de_genes={
"NPPA": (1.8, 3.5), "NPPB": (2.0, 4.0), "MYH7": (1.0, 2.5),
"COL1A1": (1.0, 2.2), "COL3A1": (0.8, 1.8), "POSTN": (1.5, 3.0),
"CCL2": (0.8, 1.8), "IL6": (0.5, 1.5), "TGFB1": (0.7, 1.6),
"ANKRD1": (1.5, 3.0), "XIRP2": (-2.0, -0.8), "MYL2": (-1.5, -0.5),
},
pathways={
"cardiac_muscle_contraction": (0.3, 0.6),
"extracellular_matrix_organisation": (0.7, 0.95),
"inflammatory_response": (0.5, 0.8),
"TGF_beta_signalling": (0.6, 0.85),
"apoptosis": (0.4, 0.65),
},
markers=["NPPA", "NPPB", "POSTN", "COL1A1"],
mechanism_templates=[
"TGF-beta-driven fibrosis",
"inflammatory macrophage infiltration",
],
responding_cell_types=["cardiac_fibroblast", "macrophage"],
),
"IPF": DiseaseProfile(
name="IPF",
display_name="idiopathic pulmonary fibrosis",
tissue="lung",
condition_name="IPF",
de_genes={
"SPP1": (2.0, 4.0), "MERTK": (0.8, 2.0), "MMP9": (1.0, 2.5),
"TREM2": (0.8, 2.0), "COL1A1": (1.5, 3.0), "COL3A1": (1.2, 2.5),
"POSTN": (1.5, 3.5), "SFTPC": (-2.0, -0.8), "AGER": (-2.5, -1.0),
},
pathways={
"extracellular_matrix_organisation": (0.75, 0.95),
"integrin_signalling": (0.6, 0.85),
"macrophage_activation": (0.65, 0.9),
"Wnt_signalling": (0.4, 0.7),
},
markers=["SPP1", "MERTK", "POSTN", "MMP9"],
mechanism_templates=[
"SPP1+ macrophage-driven fibroblast activation",
"integrin-mediated SPP1 signalling in fibrosis",
],
responding_cell_types=["fibroblast", "alveolar_macrophage"],
),
"Alzheimer": DiseaseProfile(
name="Alzheimer",
display_name="Alzheimer's disease",
tissue="brain",
condition_name="Alzheimer",
de_genes={
"TREM2": (1.0, 2.5), "APOE": (1.2, 2.8), "CLU": (0.8, 2.0),
"C1QA": (1.0, 2.2), "C1QB": (0.9, 2.0), "GFAP": (1.5, 3.0),
"AQP4": (0.6, 1.5), "SLC17A7": (-1.5, -0.5), "NRGN": (-2.0, -0.8),
"SNAP25": (-1.2, -0.4),
},
pathways={
"complement_cascade": (0.7, 0.9),
"neuroinflammation": (0.65, 0.85),
"amyloid_processing": (0.6, 0.8),
"synaptic_signalling": (0.3, 0.5),
"lipid_metabolism": (0.5, 0.7),
},
markers=["TREM2", "APOE", "GFAP", "C1QA"],
mechanism_templates=[
"TREM2-mediated microglial activation in amyloid clearance",
"complement-driven synaptic pruning",
"reactive astrogliosis amplifying neuroinflammation",
],
responding_cell_types=["microglia", "astrocyte"],
),
"colorectal_cancer": DiseaseProfile(
name="colorectal_cancer",
display_name="colorectal cancer",
tissue="colon",
condition_name="colorectal_cancer",
de_genes={
"MYC": (1.5, 3.0), "KRAS": (0.8, 1.8), "TP53": (-1.5, -0.5),
"APC": (-1.8, -0.8), "CDH1": (-1.2, -0.4), "VIM": (1.0, 2.5),
"MKI67": (1.5, 3.0), "CD44": (1.0, 2.2), "LGR5": (0.8, 2.0),
},
pathways={
"Wnt_signalling": (0.75, 0.95),
"cell_cycle": (0.7, 0.9),
"EMT": (0.6, 0.85),
"p53_signalling": (0.3, 0.5),
"MAPK_signalling": (0.55, 0.75),
},
markers=["MYC", "CD44", "VIM", "MKI67"],
mechanism_templates=[
"Wnt/beta-catenin-driven tumour stem cell expansion",
"epithelial-mesenchymal transition promoting invasion",
],
responding_cell_types=["stem_cell", "macrophage", "fibroblast"],
),
"type2_diabetes": DiseaseProfile(
name="type2_diabetes",
display_name="type 2 diabetes",
tissue="pancreas",
condition_name="type2_diabetes",
de_genes={
"INS": (-2.0, -0.8), "MAFA": (-1.5, -0.5), "PDX1": (-1.2, -0.4),
"UCN3": (-1.8, -0.6), "GCG": (0.8, 2.0), "ARX": (0.5, 1.5),
"IAPP": (0.6, 1.8), "TXNIP": (1.0, 2.5), "DDIT3": (0.8, 2.0),
},
pathways={
"insulin_signalling": (0.3, 0.5),
"ER_stress_response": (0.7, 0.9),
"oxidative_stress": (0.6, 0.8),
"glucagon_signalling": (0.6, 0.8),
"apoptosis": (0.5, 0.7),
},
markers=["INS", "TXNIP", "IAPP", "DDIT3"],
mechanism_templates=[
"ER stress-induced beta cell apoptosis",
"glucotoxicity-driven beta cell dedifferentiation",
],
responding_cell_types=["beta_cell", "stellate"],
),
"AML": DiseaseProfile(
name="AML",
display_name="acute myeloid leukemia",
tissue="bone_marrow",
condition_name="AML",
de_genes={
"FLT3": (1.5, 3.0), "NPM1": (0.8, 2.0), "IDH2": (0.6, 1.5),
"RUNX1": (-1.5, -0.5), "CEBPA": (-1.2, -0.4), "KIT": (1.0, 2.5),
"WT1": (1.2, 2.8), "MYC": (0.8, 2.0),
},
pathways={
"hematopoietic_cell_lineage": (0.3, 0.5),
"MAPK_signalling": (0.7, 0.9),
"PI3K_AKT_signalling": (0.65, 0.85),
"cell_cycle": (0.7, 0.9),
"apoptosis": (0.3, 0.5),
},
markers=["FLT3", "NPM1", "WT1", "KIT"],
mechanism_templates=[
"FLT3-ITD-driven proliferative advantage",
"myeloid differentiation block via RUNX1 loss",
],
responding_cell_types=["HSC", "GMP"],
),
"rheumatoid_arthritis": DiseaseProfile(
name="rheumatoid_arthritis",
display_name="rheumatoid arthritis",
tissue="synovium",
condition_name="rheumatoid_arthritis",
de_genes={
"IFNG": (1.0, 2.5), "TBX21": (0.8, 1.8), "IL17A": (1.0, 2.2),
"RORC": (0.6, 1.5), "TNF": (1.2, 2.5), "IL6": (1.0, 2.2),
"MMP3": (1.5, 3.0), "MMP1": (1.2, 2.5), "CXCL13": (1.0, 2.5),
},
pathways={
"JAK_STAT_signalling": (0.7, 0.9),
"TNF_signalling": (0.7, 0.9),
"Th17_differentiation": (0.6, 0.8),
"NF_kB_signalling": (0.65, 0.85),
"matrix_metalloproteinase_activity": (0.7, 0.9),
},
markers=["TNF", "IL6", "MMP3", "CXCL13"],
mechanism_templates=[
"TNF/NF-kB-driven synovial inflammation",
"Th17-mediated cartilage destruction via MMPs",
],
responding_cell_types=["fibroblast", "macrophage", "CD4_T_cell"],
),
"hepatocellular_carcinoma": DiseaseProfile(
name="hepatocellular_carcinoma",
display_name="hepatocellular carcinoma",
tissue="liver",
condition_name="HCC",
de_genes={
"GPC3": (2.0, 4.0), "AFP": (1.5, 3.5), "EPCAM": (1.0, 2.5),
"MYC": (1.0, 2.5), "VEGFA": (1.2, 2.8), "MKI67": (1.5, 3.0),
"ALB": (-2.0, -0.8), "CYP3A4": (-1.8, -0.6), "APOB": (-1.5, -0.5),
},
pathways={
"Wnt_signalling": (0.7, 0.9),
"cell_cycle": (0.75, 0.95),
"angiogenesis": (0.6, 0.8),
"PI3K_AKT_signalling": (0.65, 0.85),
"p53_signalling": (0.3, 0.5),
},
markers=["GPC3", "AFP", "VEGFA", "MKI67"],
mechanism_templates=[
"Wnt/beta-catenin-driven hepatocyte dedifferentiation",
"VEGF-mediated tumour angiogenesis",
],
responding_cell_types=["kupffer_cell", "stellate_cell"],
hidden_failure_templates=[
"Tumour heterogeneity may confound DE in mixed biopsies",
],
),
"atherosclerosis": DiseaseProfile(
name="atherosclerosis",
display_name="atherosclerosis",
tissue="aorta",
condition_name="atherosclerosis",
de_genes={
"TREM2": (1.5, 3.0), "CD9": (1.0, 2.2), "LGALS3": (1.2, 2.5),
"APOE": (0.8, 2.0), "MMP9": (1.0, 2.5), "IL1B": (0.8, 2.0),
"ACTA2": (-1.5, -0.5), "MYH11": (-2.0, -0.8), "CNN1": (-1.5, -0.5),
},
pathways={
"lipid_metabolism": (0.7, 0.9),
"inflammatory_response": (0.65, 0.85),
"foam_cell_formation": (0.7, 0.9),
"smooth_muscle_contraction": (0.3, 0.5),
"complement_cascade": (0.5, 0.7),
},
markers=["TREM2", "LGALS3", "MMP9", "CD9"],
mechanism_templates=[
"TREM2+ macrophage-driven foam cell formation",
"smooth muscle cell phenotypic switching in plaque",
],
responding_cell_types=["macrophage", "smooth_muscle"],
),
"breast_cancer": DiseaseProfile(
name="breast_cancer",
display_name="breast cancer",
tissue="breast",
condition_name="breast_cancer",
de_genes={
"ERBB2": (1.5, 3.5), "ESR1": (-1.5, 1.5), "MKI67": (1.5, 3.0),
"MYC": (1.0, 2.5), "CDH1": (-1.5, -0.3), "VIM": (0.8, 2.2),
"CD274": (0.8, 2.0), "FOXP3": (0.6, 1.5), "GZMB": (0.8, 2.0),
},
pathways={
"cell_cycle": (0.7, 0.9),
"PI3K_AKT_signalling": (0.65, 0.85),
"EMT": (0.55, 0.8),
"immune_checkpoint": (0.5, 0.75),
"estrogen_signalling": (0.3, 0.7),
},
markers=["ERBB2", "MKI67", "CD274", "VIM"],
mechanism_templates=[
"ERBB2-driven proliferative signalling",
"immune evasion via PD-L1 upregulation",
],
responding_cell_types=["macrophage", "T_cell", "fibroblast"],
),
"multiple_sclerosis": DiseaseProfile(
name="multiple_sclerosis",
display_name="multiple sclerosis",
tissue="brain",
condition_name="multiple_sclerosis",
de_genes={
"CD68": (1.0, 2.5), "CXCL10": (1.2, 2.8), "STAT1": (0.8, 2.0),
"IRF1": (0.8, 1.8), "MBP": (-2.0, -0.8), "PLP1": (-1.8, -0.6),
"MOG": (-1.5, -0.5), "GFAP": (1.0, 2.5), "C3": (0.8, 2.0),
},
pathways={
"interferon_signalling": (0.7, 0.9),
"neuroinflammation": (0.7, 0.9),
"complement_cascade": (0.6, 0.8),
"myelination": (0.2, 0.4),
"T_cell_activation": (0.6, 0.8),
},
markers=["CXCL10", "STAT1", "GFAP", "C3"],
mechanism_templates=[
"interferon-driven microglial activation in demyelination",
"complement-mediated oligodendrocyte damage",
],
responding_cell_types=["microglia", "astrocyte"],
),
"diabetic_nephropathy": DiseaseProfile(
name="diabetic_nephropathy",
display_name="diabetic nephropathy",
tissue="kidney",
condition_name="diabetic_nephropathy",
de_genes={
"HAVCR1": (1.5, 3.0), "LCN2": (1.2, 2.8), "COL4A1": (1.0, 2.5),
"VEGFA": (0.8, 2.0), "NPHS1": (-1.8, -0.6), "NPHS2": (-1.5, -0.5),
"WT1": (-1.2, -0.4), "TGFB1": (1.0, 2.2), "FN1": (1.2, 2.5),
},
pathways={
"TGF_beta_signalling": (0.7, 0.9),
"extracellular_matrix_organisation": (0.7, 0.9),
"oxidative_stress": (0.6, 0.8),
"VEGF_signalling": (0.5, 0.7),
"apoptosis": (0.5, 0.7),
},
markers=["HAVCR1", "LCN2", "TGFB1", "FN1"],
mechanism_templates=[
"TGF-beta-driven glomerular fibrosis",
"podocyte loss via oxidative stress",
],
responding_cell_types=["fibroblast", "macrophage"],
),
"melanoma": DiseaseProfile(
name="melanoma",
display_name="melanoma",
tissue="skin",
condition_name="melanoma",
de_genes={
"MLANA": (1.5, 3.0), "PMEL": (1.2, 2.5), "SOX10": (1.0, 2.2),
"MKI67": (1.5, 3.0), "CD274": (0.8, 2.0), "PDCD1": (0.8, 2.0),
"GZMB": (0.8, 2.0), "HAVCR2": (0.6, 1.5), "LAG3": (0.6, 1.5),
},
pathways={
"MAPK_signalling": (0.7, 0.9),
"immune_checkpoint": (0.6, 0.85),
"cell_cycle": (0.7, 0.9),
"melanogenesis": (0.5, 0.7),
"T_cell_exhaustion": (0.55, 0.8),
},
markers=["MLANA", "CD274", "GZMB", "MKI67"],
mechanism_templates=[
"MAPK-driven melanocyte proliferation",
"T cell exhaustion via immune checkpoint upregulation",
],
responding_cell_types=["T_cell", "macrophage"],
),
}
# ── Pathway library ─────────────────────────────────────────────────────────
PATHWAY_LIBRARY: Dict[str, List[str]] = {
"TGF_beta_signalling": ["TGFB1", "TGFB2", "SMAD2", "SMAD3", "SMAD4", "ACVR1"],
"Wnt_signalling": ["WNT3A", "CTNNB1", "APC", "AXIN2", "LGR5", "TCF7L2"],
"MAPK_signalling": ["KRAS", "BRAF", "MAP2K1", "MAPK1", "MAPK3", "FOS", "JUN"],
"JAK_STAT_signalling": ["JAK1", "JAK2", "STAT1", "STAT3", "STAT5A", "SOCS1", "SOCS3"],
"PI3K_AKT_signalling": ["PIK3CA", "AKT1", "MTOR", "PTEN", "TSC2"],
"NF_kB_signalling": ["NFKB1", "RELA", "IKBKB", "TNF", "IL1B"],
"cell_cycle": ["CDK4", "CDK6", "CCND1", "CCNE1", "RB1", "E2F1", "MKI67"],
"apoptosis": ["BCL2", "BAX", "BAK1", "CASP3", "CASP9", "TP53", "BID"],
"inflammatory_response": ["TNF", "IL6", "IL1B", "CCL2", "CXCL8", "NFKB1"],
"extracellular_matrix_organisation": ["COL1A1", "COL3A1", "FN1", "POSTN", "MMP2", "MMP9", "TIMP1"],
"complement_cascade": ["C1QA", "C1QB", "C3", "C4A", "C5", "CFB"],
"neuroinflammation": ["TREM2", "CX3CR1", "P2RY12", "IL1B", "TNF", "C1QA"],
"synaptic_signalling": ["SLC17A7", "GRIA1", "GRIN1", "DLG4", "SNAP25", "SYP"],
"hematopoietic_cell_lineage": ["CD34", "KIT", "FLT3", "GATA1", "CEBPA", "SPI1"],
"insulin_signalling": ["INS", "INSR", "IRS1", "PIK3CA", "AKT1", "SLC2A4"],
"ER_stress_response": ["DDIT3", "ATF4", "XBP1", "HSPA5", "EIF2AK3"],
"oxidative_stress": ["SOD1", "SOD2", "CAT", "GPX1", "NFE2L2", "HMOX1"],
"angiogenesis": ["VEGFA", "VEGFB", "KDR", "FLT1", "ANGPT1", "ANGPT2"],
"EMT": ["CDH1", "CDH2", "VIM", "SNAI1", "SNAI2", "TWIST1", "ZEB1"],
"immune_checkpoint": ["CD274", "PDCD1", "CTLA4", "HAVCR2", "LAG3", "TIGIT"],
"T_cell_activation": ["CD3D", "CD28", "LCK", "ZAP70", "IL2", "IFNG"],
"T_cell_exhaustion": ["PDCD1", "HAVCR2", "LAG3", "TIGIT", "TOX", "ENTPD1"],
"TNF_signalling": ["TNF", "TNFRSF1A", "TRADD", "RIPK1", "NFKB1", "CASP8"],
"Th17_differentiation": ["IL17A", "IL17F", "RORC", "IL23R", "CCR6", "STAT3"],
"interferon_signalling": ["IFNG", "IFNB1", "STAT1", "IRF1", "IRF7", "MX1", "OAS1"],
"lipid_metabolism": ["APOE", "APOB", "LDLR", "HMGCR", "ABCA1", "PPARG"],
"myelination": ["MBP", "PLP1", "MOG", "MAG", "OLIG2", "SOX10"],
"foam_cell_formation": ["CD36", "MSR1", "ABCA1", "APOE", "LGALS3", "TREM2"],
"smooth_muscle_contraction": ["ACTA2", "MYH11", "TAGLN", "CNN1", "MYLK"],
"glucagon_signalling": ["GCG", "GCGR", "CREB1", "PCK1", "G6PC"],
"matrix_metalloproteinase_activity": ["MMP1", "MMP2", "MMP3", "MMP9", "TIMP1", "TIMP2"],
"estrogen_signalling": ["ESR1", "ESR2", "PGR", "GREB1", "TFF1"],
"melanogenesis": ["MITF", "TYR", "TYRP1", "DCT", "MLANA", "PMEL"],
"VEGF_signalling": ["VEGFA", "VEGFB", "KDR", "FLT1", "NRP1"],
}
# ── Regulatory network templates ────────────────────────────────────────────
REGULATORY_TEMPLATES: Dict[str, Dict[str, List[str]]] = {
"erythroid": {
"GATA1": ["KLF1", "HBB", "HBA1", "GYPA", "ALAS2"],
"KLF1": ["HBB", "HBA1", "SLC4A1"],
},
"myeloid": {
"CEBPA": ["CSF3R", "ELANE", "MPO", "CTSG"],
"SPI1": ["CSF1R", "CD14", "FCGR3A", "CD68"],
},
"lymphoid": {
"TCF7": ["CD3D", "CD3E", "IL7R", "LEF1"],
"PAX5": ["CD19", "MS4A1", "CD79A"],
},
"fibrotic": {
"SMAD3": ["COL1A1", "COL3A1", "FN1", "POSTN"],
"TGFB1": ["ACTA2", "COL1A1", "CTGF"],
},
"inflammatory": {
"NFKB1": ["TNF", "IL6", "IL1B", "CCL2", "CXCL8"],
"STAT1": ["IRF1", "CXCL10", "MX1", "OAS1"],
},
"stem_cell": {
"RUNX1": ["CD34", "KIT", "FLT3"],
"MYC": ["CDK4", "CCND1", "E2F1"],
},
"neuronal": {
"NEUROD1": ["SLC17A7", "NRGN", "SNAP25"],
"DLX1": ["GAD1", "GAD2", "SLC32A1"],
},
}
# ── Perturbation templates ──────────────────────────────────────────────────
@dataclass
class PerturbationTemplate:
name: str
target_pathway: str
gene_effects: Dict[str, float]
description: str
PERTURBATION_TEMPLATES: Dict[str, PerturbationTemplate] = {
"JAK_inhibitor": PerturbationTemplate(
name="JAK_inhibitor",
target_pathway="JAK_STAT_signalling",
gene_effects={"STAT1": -0.8, "STAT3": -0.7, "IFNG": -1.5, "IL17A": -1.3, "SOCS1": 1.2},
description="JAK inhibitor treatment",
),
"anti_TNF": PerturbationTemplate(
name="anti_TNF",
target_pathway="TNF_signalling",
gene_effects={"TNF": -1.5, "IL6": -1.0, "IL1B": -0.8, "MMP3": -1.2, "SOCS3": 0.8},
description="anti-TNF biologic therapy",
),
"PD1_blockade": PerturbationTemplate(
name="PD1_blockade",
target_pathway="immune_checkpoint",
gene_effects={"PDCD1": -1.0, "GZMB": 1.5, "IFNG": 1.2, "PRF1": 1.0, "TNF": 0.8},
description="anti-PD-1 immune checkpoint blockade",
),
"BRAF_inhibitor": PerturbationTemplate(
name="BRAF_inhibitor",
target_pathway="MAPK_signalling",
gene_effects={"BRAF": -0.5, "MAPK1": -1.0, "MKI67": -1.5, "CCND1": -1.2, "FOS": -0.8},
description="BRAF inhibitor treatment",
),
"TGFb_inhibitor": PerturbationTemplate(
name="TGFb_inhibitor",
target_pathway="TGF_beta_signalling",
gene_effects={"TGFB1": -0.8, "COL1A1": -1.2, "COL3A1": -1.0, "POSTN": -1.5, "ACTA2": -0.8},
description="TGF-beta pathway inhibitor",
),
"mTOR_inhibitor": PerturbationTemplate(
name="mTOR_inhibitor",
target_pathway="PI3K_AKT_signalling",
gene_effects={"MTOR": -0.8, "AKT1": -0.6, "MKI67": -1.2, "CCND1": -1.0, "HIF1A": -0.7},
description="mTOR inhibitor treatment",
),
"CRISPR_TP53_KO": PerturbationTemplate(
name="CRISPR_TP53_KO",
target_pathway="p53_signalling",
gene_effects={"TP53": -2.0, "BAX": -1.0, "CDKN1A": -1.5, "MDM2": -0.8, "MKI67": 1.0},
description="CRISPR knockout of TP53",
),
}
# ── Trajectory templates ────────────────────────────────────────────────────
@dataclass
class TrajectoryTemplate:
"""Template for a developmental trajectory through cell populations."""
root_population: str
branches: List[List[str]]
n_lineages: int
tissue: str
TRAJECTORY_TEMPLATES: List[TrajectoryTemplate] = [
TrajectoryTemplate(
root_population="HSC",
branches=[
["HSC", "CMP", "GMP", "neutrophil"],
["HSC", "CMP", "GMP", "monocyte"],
["HSC", "MEP", "erythrocyte"],
["HSC", "MEP", "megakaryocyte"],
],
n_lineages=3,
tissue="bone_marrow",
),
TrajectoryTemplate(
root_population="double_negative_T",
branches=[
["double_negative_T", "double_positive_T", "CD4_SP"],
["double_negative_T", "double_positive_T", "CD8_SP"],
],
n_lineages=2,
tissue="thymus",
),
TrajectoryTemplate(
root_population="stem_cell",
branches=[
["stem_cell", "colonocyte"],
["stem_cell", "goblet_cell"],
],
n_lineages=2,
tissue="colon",
),
TrajectoryTemplate(
root_population="OPC",
branches=[
["OPC", "oligodendrocyte"],
],
n_lineages=1,
tissue="brain",
),
]
# ── Hidden failure condition templates ──────────────────────────────────────
HIDDEN_FAILURE_TEMPLATES: List[str] = [
"High ambient RNA may confound DE in low-abundance transcripts",
"Strong batch effects between conditions may inflate false positives",
"Low cell viability in disease samples reduces statistical power",
"Doublet contamination in dense populations obscures rare cell types",
"Sample degradation during processing introduces 3' bias artefacts",
"Dissociation-induced gene expression changes confound stress signatures",
"Unbalanced sample sizes between conditions reduce DE sensitivity",
]