| | """Pre-defined biological scenarios for task generation.
|
| |
|
| | Each ``Scenario`` bundles a task specification together with the matching
|
| | hidden ground-truth biology so the simulator can instantiate consistent
|
| | episodes. The library is intentionally diverse: it covers differential
|
| | expression, trajectory inference, perturbation response, and biomarker
|
| | validation across tissues and modalities.
|
| | """
|
| |
|
| | from __future__ import annotations
|
| |
|
| | from dataclasses import dataclass, field
|
| | from typing import Any, Dict, List, Optional
|
| |
|
| | from models import ExpectedFinding, PaperReference, TaskSpec
|
| |
|
| | from server.simulator.latent_state import (
|
| | CellPopulation,
|
| | GeneProgram,
|
| | LatentBiologicalState,
|
| | TechnicalState,
|
| | )
|
| |
|
| |
|
| | @dataclass
|
| | class Scenario:
|
| | """A reproducible (task, ground-truth) pair."""
|
| |
|
| | name: str
|
| | task: TaskSpec
|
| | biology: LatentBiologicalState
|
| | technical: TechnicalState = field(default_factory=TechnicalState)
|
| | hidden_failure_conditions: List[str] = field(default_factory=list)
|
| | difficulty: str = "medium"
|
| | tags: List[str] = field(default_factory=list)
|
| |
|
| |
|
| |
|
| |
|
| | SCENARIO_LIBRARY: List[Scenario] = [
|
| |
|
| | Scenario(
|
| | name="cardiac_disease_de",
|
| | difficulty="easy",
|
| | tags=["de", "scRNA-seq", "cardiac"],
|
| | task=TaskSpec(
|
| | problem_statement=(
|
| | "Identify differentially expressed genes between diseased "
|
| | "and healthy cardiomyocytes using single-cell RNA sequencing."
|
| | ),
|
| | modality="scRNA-seq",
|
| | organism="human",
|
| | tissue="heart",
|
| | conditions=["healthy", "dilated_cardiomyopathy"],
|
| | budget_limit=80_000.0,
|
| | time_limit_days=120.0,
|
| | success_criteria=[
|
| | "Identify DE genes between conditions",
|
| | "Validate at least one candidate marker",
|
| | ],
|
| | ),
|
| | biology=LatentBiologicalState(
|
| | cell_populations=[
|
| | CellPopulation(
|
| | name="cardiomyocyte",
|
| | proportion=0.35,
|
| | marker_genes=["TNNT2", "MYH7", "ACTC1"],
|
| | state="contractile",
|
| | condition_response={"dilated_cardiomyopathy": 0.8},
|
| | ),
|
| | CellPopulation(
|
| | name="fibroblast",
|
| | proportion=0.25,
|
| | marker_genes=["COL1A1", "DCN", "LUM"],
|
| | state="quiescent",
|
| | condition_response={"dilated_cardiomyopathy": 1.3},
|
| | ),
|
| | CellPopulation(
|
| | name="endothelial",
|
| | proportion=0.15,
|
| | marker_genes=["PECAM1", "VWF", "CDH5"],
|
| | state="quiescent",
|
| | ),
|
| | CellPopulation(
|
| | name="macrophage",
|
| | proportion=0.10,
|
| | marker_genes=["CD68", "CD163", "CSF1R"],
|
| | state="activated",
|
| | condition_response={"dilated_cardiomyopathy": 1.5},
|
| | ),
|
| | CellPopulation(
|
| | name="smooth_muscle",
|
| | proportion=0.15,
|
| | marker_genes=["ACTA2", "MYH11", "TAGLN"],
|
| | state="quiescent",
|
| | ),
|
| | ],
|
| | true_de_genes={
|
| | "disease_vs_healthy": {
|
| | "NPPA": 2.5, "NPPB": 3.1, "MYH7": 1.8,
|
| | "COL1A1": 1.6, "COL3A1": 1.4, "POSTN": 2.0,
|
| | "CCL2": 1.2, "IL6": 0.9, "TGFB1": 1.1,
|
| | "ANKRD1": 2.2, "XIRP2": -1.3, "MYL2": -0.8,
|
| | },
|
| | },
|
| | true_pathways={
|
| | "cardiac_muscle_contraction": 0.4,
|
| | "extracellular_matrix_organisation": 0.85,
|
| | "inflammatory_response": 0.7,
|
| | "TGF_beta_signalling": 0.75,
|
| | "apoptosis": 0.55,
|
| | },
|
| | true_markers=["NPPA", "NPPB", "POSTN", "COL1A1"],
|
| | causal_mechanisms=[
|
| | "TGF-beta-driven fibrosis",
|
| | "inflammatory macrophage infiltration",
|
| | ],
|
| | n_true_cells=12_000,
|
| | ),
|
| | technical=TechnicalState(
|
| | batch_effects={"batch_1": 0.15, "batch_2": 0.10},
|
| | doublet_rate=0.05,
|
| | dropout_rate=0.08,
|
| | ),
|
| | ),
|
| |
|
| |
|
| | Scenario(
|
| | name="hematopoiesis_trajectory",
|
| | difficulty="medium",
|
| | tags=["trajectory", "scRNA-seq", "hematopoiesis"],
|
| | task=TaskSpec(
|
| | problem_statement=(
|
| | "Infer the developmental trajectory of hematopoietic "
|
| | "stem cells differentiating into mature blood lineages."
|
| | ),
|
| | modality="scRNA-seq",
|
| | organism="human",
|
| | tissue="bone_marrow",
|
| | conditions=["steady_state"],
|
| | budget_limit=100_000.0,
|
| | time_limit_days=150.0,
|
| | success_criteria=[
|
| | "Reconstruct branching lineage structure",
|
| | "Identify key transcription factors driving fate decisions",
|
| | ],
|
| | paper_references=[
|
| | PaperReference(
|
| | title=(
|
| | "Single-cell RNA-sequencing uncovers transcriptional "
|
| | "states and fate decisions in haematopoiesis"
|
| | ),
|
| | citation="Nature Communications (2018)",
|
| | doi="10.1038/s41467-017-02305-6",
|
| | url=(
|
| | "https://www.nature.com/articles/"
|
| | "s41467-017-02305-6"
|
| | ),
|
| | ),
|
| | ],
|
| | expected_findings=[
|
| | ExpectedFinding(
|
| | finding=(
|
| | "Trajectory analysis should recover branching blood "
|
| | "lineages rooted in HSCs."
|
| | ),
|
| | category="trajectory",
|
| | keywords=["HSC", "branching", "lineage", "trajectory"],
|
| | ),
|
| | ExpectedFinding(
|
| | finding=(
|
| | "GATA1 should appear as a driver of erythroid fate "
|
| | "commitment."
|
| | ),
|
| | category="regulatory_network",
|
| | keywords=["GATA1", "erythroid", "commitment"],
|
| | ),
|
| | ExpectedFinding(
|
| | finding=(
|
| | "CEBPA and SPI1 should support myeloid branch "
|
| | "decisions."
|
| | ),
|
| | category="regulatory_network",
|
| | keywords=["CEBPA", "SPI1", "myeloid", "branch"],
|
| | ),
|
| | ],
|
| | ),
|
| | biology=LatentBiologicalState(
|
| | cell_populations=[
|
| | CellPopulation(name="HSC", proportion=0.05,
|
| | marker_genes=["CD34", "KIT", "THY1"],
|
| | state="stem"),
|
| | CellPopulation(name="CMP", proportion=0.10,
|
| | marker_genes=["CD34", "FLT3"],
|
| | state="progenitor"),
|
| | CellPopulation(name="GMP", proportion=0.12,
|
| | marker_genes=["CSF3R", "CEBPA"],
|
| | state="progenitor"),
|
| | CellPopulation(name="MEP", proportion=0.10,
|
| | marker_genes=["GATA1", "KLF1"],
|
| | state="progenitor"),
|
| | CellPopulation(name="erythrocyte", proportion=0.20,
|
| | marker_genes=["HBA1", "HBB", "GYPA"],
|
| | state="mature"),
|
| | CellPopulation(name="neutrophil", proportion=0.18,
|
| | marker_genes=["ELANE", "MPO", "CTSG"],
|
| | state="mature"),
|
| | CellPopulation(name="monocyte", proportion=0.15,
|
| | marker_genes=["CD14", "CSF1R", "FCGR3A"],
|
| | state="mature"),
|
| | CellPopulation(name="megakaryocyte", proportion=0.10,
|
| | marker_genes=["ITGA2B", "GP1BA"],
|
| | state="mature"),
|
| | ],
|
| | true_de_genes={},
|
| | true_pathways={
|
| | "hematopoietic_cell_lineage": 0.9,
|
| | "MAPK_signalling": 0.6,
|
| | "JAK_STAT_signalling": 0.7,
|
| | },
|
| | true_trajectory={
|
| | "root": "HSC",
|
| | "n_lineages": 3,
|
| | "branching": True,
|
| | "branches": [
|
| | ["HSC", "CMP", "GMP", "neutrophil"],
|
| | ["HSC", "CMP", "GMP", "monocyte"],
|
| | ["HSC", "MEP", "erythrocyte"],
|
| | ["HSC", "MEP", "megakaryocyte"],
|
| | ],
|
| | },
|
| | true_regulatory_network={
|
| | "GATA1": ["KLF1", "HBB", "HBA1", "GYPA"],
|
| | "CEBPA": ["CSF3R", "ELANE", "MPO"],
|
| | "SPI1": ["CSF1R", "CD14", "FCGR3A"],
|
| | "RUNX1": ["CD34", "KIT"],
|
| | },
|
| | true_markers=["GATA1", "CEBPA", "SPI1"],
|
| | causal_mechanisms=[
|
| | "GATA1-driven erythroid commitment",
|
| | "PU.1/CEBPA antagonism at myeloid branch point",
|
| | ],
|
| | n_true_cells=15_000,
|
| | ),
|
| | technical=TechnicalState(dropout_rate=0.12, doublet_rate=0.06),
|
| | ),
|
| |
|
| |
|
| | Scenario(
|
| | name="perturbation_immune",
|
| | difficulty="hard",
|
| | tags=["perturbation", "scRNA-seq", "immune"],
|
| | task=TaskSpec(
|
| | problem_statement=(
|
| | "Determine the effect of JAK inhibitor treatment on "
|
| | "T-cell activation states in rheumatoid arthritis."
|
| | ),
|
| | modality="scRNA-seq",
|
| | organism="human",
|
| | tissue="synovial_fluid",
|
| | conditions=["untreated_RA", "JAK_inhibitor_treated"],
|
| | budget_limit=120_000.0,
|
| | time_limit_days=180.0,
|
| | prior_observations=[
|
| | "Elevated JAK-STAT signalling observed in prior bulk RNA-seq",
|
| | ],
|
| | success_criteria=[
|
| | "Quantify shift in T-cell activation states",
|
| | "Identify pathways modulated by JAK inhibitor",
|
| | "Propose validation strategy",
|
| | ],
|
| | ),
|
| | biology=LatentBiologicalState(
|
| | cell_populations=[
|
| | CellPopulation(name="CD4_Th1", proportion=0.20,
|
| | marker_genes=["IFNG", "TBX21", "IL2"],
|
| | state="activated",
|
| | condition_response={"JAK_inhibitor_treated": 0.5}),
|
| | CellPopulation(name="CD4_Th17", proportion=0.15,
|
| | marker_genes=["IL17A", "RORC", "CCR6"],
|
| | state="activated",
|
| | condition_response={"JAK_inhibitor_treated": 0.6}),
|
| | CellPopulation(name="CD4_Treg", proportion=0.08,
|
| | marker_genes=["FOXP3", "IL2RA", "CTLA4"],
|
| | state="regulatory",
|
| | condition_response={"JAK_inhibitor_treated": 1.2}),
|
| | CellPopulation(name="CD8_cytotoxic", proportion=0.18,
|
| | marker_genes=["GZMB", "PRF1", "CD8A"],
|
| | state="activated",
|
| | condition_response={"JAK_inhibitor_treated": 0.7}),
|
| | CellPopulation(name="macrophage", proportion=0.15,
|
| | marker_genes=["CD68", "CD163", "MARCO"],
|
| | state="inflammatory"),
|
| | CellPopulation(name="fibroblast", proportion=0.14,
|
| | marker_genes=["COL1A1", "FAP", "THY1"],
|
| | state="activated"),
|
| | CellPopulation(name="B_cell", proportion=0.10,
|
| | marker_genes=["CD19", "MS4A1", "CD79A"],
|
| | state="quiescent"),
|
| | ],
|
| | true_de_genes={
|
| | "treated_vs_untreated": {
|
| | "IFNG": -1.8, "TBX21": -1.2, "IL17A": -1.5,
|
| | "RORC": -0.9, "JAK1": -0.3, "STAT1": -1.0,
|
| | "STAT3": -0.8, "SOCS1": 1.5, "SOCS3": 1.3,
|
| | "FOXP3": 0.6, "IL10": 0.7,
|
| | },
|
| | },
|
| | true_pathways={
|
| | "JAK_STAT_signalling": 0.3,
|
| | "Th1_differentiation": 0.35,
|
| | "Th17_differentiation": 0.4,
|
| | "cytokine_signalling": 0.45,
|
| | "regulatory_T_cell_function": 0.7,
|
| | },
|
| | perturbation_effects={
|
| | "JAK_inhibitor": {
|
| | "STAT1": -0.8, "STAT3": -0.7, "IFNG": -1.5,
|
| | "IL17A": -1.3, "SOCS1": 1.2,
|
| | },
|
| | },
|
| | true_markers=["STAT1", "SOCS1", "IFNG"],
|
| | causal_mechanisms=[
|
| | "JAK-STAT pathway inhibition reduces Th1/Th17 activation",
|
| | "Compensatory Treg expansion under JAK inhibition",
|
| | ],
|
| | n_true_cells=18_000,
|
| | ),
|
| | technical=TechnicalState(
|
| | batch_effects={"batch_ctrl": 0.12, "batch_treated": 0.18},
|
| | ambient_rna_fraction=0.07,
|
| | dropout_rate=0.10,
|
| | ),
|
| | hidden_failure_conditions=[
|
| | "High ambient RNA may confound DE in low-abundance transcripts",
|
| | ],
|
| | ),
|
| |
|
| |
|
| | Scenario(
|
| | name="biomarker_validation_lung",
|
| | difficulty="medium",
|
| | tags=["biomarker", "validation", "scRNA-seq", "lung"],
|
| | task=TaskSpec(
|
| | problem_statement=(
|
| | "Design a follow-up validation experiment for candidate "
|
| | "biomarker SPP1 in idiopathic pulmonary fibrosis (IPF)."
|
| | ),
|
| | modality="scRNA-seq",
|
| | organism="human",
|
| | tissue="lung",
|
| | conditions=["healthy", "IPF"],
|
| | budget_limit=90_000.0,
|
| | time_limit_days=150.0,
|
| | prior_observations=[
|
| | "A macrophage subpopulation shows elevated expression in IPF tissue relative to controls",
|
| | "Pro-fibrotic macrophage enrichment has been observed in fibrotic regions by spatial profiling",
|
| | ],
|
| | success_criteria=[
|
| | "Validate SPP1 as a marker for pro-fibrotic macrophages",
|
| | "Confirm spatial localisation in fibrotic tissue",
|
| | ],
|
| | paper_references=[
|
| | PaperReference(
|
| | title=(
|
| | "Proliferating SPP1/MERTK-expressing macrophages in "
|
| | "idiopathic pulmonary fibrosis"
|
| | ),
|
| | citation="European Respiratory Journal (2019)",
|
| | doi="10.1183/13993003.02441-2018",
|
| | pmid="31221805",
|
| | url="https://pubmed.ncbi.nlm.nih.gov/31221805/",
|
| | ),
|
| | ],
|
| | expected_findings=[
|
| | ExpectedFinding(
|
| | finding=(
|
| | "SPP1-positive macrophages should be enriched in IPF "
|
| | "fibrotic regions."
|
| | ),
|
| | category="marker",
|
| | keywords=["SPP1", "macrophage", "IPF", "fibrotic"],
|
| | ),
|
| | ExpectedFinding(
|
| | finding=(
|
| | "MERTK should co-occur with the profibrotic macrophage "
|
| | "state."
|
| | ),
|
| | category="marker",
|
| | keywords=["MERTK", "macrophage", "SPP1"],
|
| | ),
|
| | ExpectedFinding(
|
| | finding=(
|
| | "Extracellular matrix organization should emerge as a "
|
| | "top fibrotic program."
|
| | ),
|
| | category="pathway",
|
| | keywords=["extracellular_matrix", "fibrosis", "pathway"],
|
| | ),
|
| | ],
|
| | dataset_metadata={
|
| | "literature_grounding": "single_cell_ipf_macrophages",
|
| | },
|
| | ),
|
| | biology=LatentBiologicalState(
|
| | cell_populations=[
|
| | CellPopulation(name="alveolar_macrophage", proportion=0.18,
|
| | marker_genes=["MARCO", "FABP4", "MCEMP1"],
|
| | state="resident"),
|
| | CellPopulation(name="SPP1_macrophage", proportion=0.12,
|
| | marker_genes=["SPP1", "MERTK", "MMP9", "TREM2"],
|
| | state="pro-fibrotic",
|
| | condition_response={"IPF": 2.0}),
|
| | CellPopulation(name="AT2", proportion=0.20,
|
| | marker_genes=["SFTPC", "SFTPB", "ABCA3"],
|
| | state="normal"),
|
| | CellPopulation(name="fibroblast", proportion=0.22,
|
| | marker_genes=["COL1A1", "COL3A1", "POSTN"],
|
| | state="activated",
|
| | condition_response={"IPF": 1.5}),
|
| | CellPopulation(name="endothelial", proportion=0.13,
|
| | marker_genes=["PECAM1", "CLDN5"],
|
| | state="quiescent"),
|
| | CellPopulation(name="T_cell", proportion=0.15,
|
| | marker_genes=["CD3D", "CD3E", "IL7R"],
|
| | state="quiescent"),
|
| | ],
|
| | true_de_genes={
|
| | "IPF_vs_healthy": {
|
| | "SPP1": 3.2, "MERTK": 1.4, "MMP9": 1.8, "TREM2": 1.5,
|
| | "COL1A1": 2.1, "COL3A1": 1.9, "POSTN": 2.4,
|
| | "SFTPC": -1.2, "AGER": -1.6,
|
| | },
|
| | },
|
| | true_pathways={
|
| | "extracellular_matrix_organisation": 0.9,
|
| | "integrin_signalling": 0.75,
|
| | "macrophage_activation": 0.8,
|
| | "Wnt_signalling": 0.6,
|
| | },
|
| | true_markers=["SPP1", "MERTK", "POSTN", "MMP9"],
|
| | causal_mechanisms=[
|
| | "SPP1+ macrophage-driven fibroblast activation",
|
| | "Integrin-mediated SPP1 signalling in fibrosis",
|
| | ],
|
| | n_true_cells=14_000,
|
| | ),
|
| | technical=TechnicalState(
|
| | batch_effects={"batch_1": 0.10},
|
| | dropout_rate=0.09,
|
| | sample_quality=0.85,
|
| | ),
|
| | ),
|
| | ]
|
| |
|
| |
|
| |
|