File size: 5,555 Bytes
4db0438 5c3cfae 4db0438 5c3cfae 4db0438 5c3cfae 4db0438 5c3cfae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | """Latent biological and technical state — hidden from the agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class CellPopulation(BaseModel):
"""Ground-truth cell sub-population in the simulated tissue."""
name: str
proportion: float = Field(ge=0.0, le=1.0)
marker_genes: List[str] = Field(default_factory=list)
state: str = "quiescent"
condition_response: Dict[str, float] = Field(default_factory=dict)
class GeneProgram(BaseModel):
"""A latent gene-regulatory programme."""
name: str
genes: List[str] = Field(default_factory=list)
activity_level: float = Field(0.5, ge=0.0, le=1.0)
condition_dependent: bool = False
conditions_active: List[str] = Field(default_factory=list)
class LatentBiologicalState(BaseModel):
"""Hidden ground-truth biology the agent cannot directly observe."""
cell_populations: List[CellPopulation] = Field(default_factory=list)
true_de_genes: Dict[str, Dict[str, float]] = Field(
default_factory=dict,
description="comparison_key → {gene: log2FC}",
)
true_pathways: Dict[str, float] = Field(
default_factory=dict,
description="pathway → activity level",
)
gene_programs: List[GeneProgram] = Field(default_factory=list)
true_trajectory: Optional[Dict[str, Any]] = None
true_regulatory_network: Dict[str, List[str]] = Field(
default_factory=dict,
description="TF → target genes",
)
perturbation_effects: Dict[str, Dict[str, float]] = Field(
default_factory=dict,
description="perturbation → {gene: effect_size}",
)
confounders: Dict[str, float] = Field(default_factory=dict)
true_markers: List[str] = Field(default_factory=list)
causal_mechanisms: List[str] = Field(default_factory=list)
n_true_cells: int = 10_000
class TechnicalState(BaseModel):
"""Hidden technical parameters that shape experimental noise."""
batch_effects: Dict[str, float] = Field(default_factory=dict)
ambient_rna_fraction: float = 0.05
doublet_rate: float = 0.04
dropout_rate: float = 0.1
sample_quality: float = Field(0.9, ge=0.0, le=1.0)
library_complexity: float = Field(0.8, ge=0.0, le=1.0)
sequencing_depth_factor: float = 1.0
capture_efficiency: float = 0.6
class ExperimentProgress(BaseModel):
"""Flags tracking which experiment stages have been completed."""
samples_collected: bool = False
cohort_selected: bool = False
cells_cultured: bool = False
library_prepared: bool = False
perturbation_applied: bool = False
cells_sequenced: bool = False
qc_performed: bool = False
data_filtered: bool = False
data_normalized: bool = False
batches_integrated: bool = False
cells_clustered: bool = False
de_performed: bool = False
trajectories_inferred: bool = False
pathways_analyzed: bool = False
networks_inferred: bool = False
markers_discovered: bool = False
markers_validated: bool = False
followup_designed: bool = False
subagent_review_requested: bool = False
conclusion_reached: bool = False
n_cells_sequenced: Optional[int] = None
n_cells_after_filter: Optional[int] = None
n_clusters_found: Optional[int] = None
n_de_genes_found: Optional[int] = None
n_markers_found: Optional[int] = None
class ResourceState(BaseModel):
"""Full internal resource tracking (superset of agent-visible ResourceUsage)."""
budget_total: float = 100_000.0
budget_used: float = 0.0
time_limit_days: float = 180.0
time_used_days: float = 0.0
samples_available: int = 0
samples_consumed: int = 0
compute_hours_used: float = 0.0
sequencing_lanes_used: int = 0
reagent_kits_used: int = 0
@property
def budget_remaining(self) -> float:
return max(0.0, self.budget_total - self.budget_used)
@property
def time_remaining_days(self) -> float:
return max(0.0, self.time_limit_days - self.time_used_days)
@property
def budget_exhausted(self) -> bool:
return self.budget_remaining <= 0
@property
def time_exhausted(self) -> bool:
return self.time_remaining_days <= 0
class FullLatentState(BaseModel):
"""Complete hidden state of the simulated biological world."""
biology: LatentBiologicalState = Field(
default_factory=LatentBiologicalState
)
technical: TechnicalState = Field(default_factory=TechnicalState)
progress: ExperimentProgress = Field(default_factory=ExperimentProgress)
resources: ResourceState = Field(default_factory=ResourceState)
hidden_failure_conditions: List[str] = Field(default_factory=list)
mechanism_confidence: Dict[str, float] = Field(default_factory=dict)
discovered_de_genes: List[str] = Field(default_factory=list)
discovered_clusters: List[str] = Field(default_factory=list)
task_modality: str = "scRNA-seq"
step_count: int = 0
rng_seed: int = 42
# Transient fields for passing sampled values from the transition engine
# to the output generator within a single step (not serialized).
last_retain_frac: Optional[float] = Field(None, exclude=True)
last_n_clusters: Optional[int] = Field(None, exclude=True)
last_perturbation_efficiency: Optional[float] = Field(None, exclude=True)
|