hackathon / server /simulator /latent_state.py
Ev3Dev's picture
Upload folder using huggingface_hub
5c3cfae verified
"""Latent biological and technical state β€” hidden from the agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class CellPopulation(BaseModel):
"""Ground-truth cell sub-population in the simulated tissue."""
name: str
proportion: float = Field(ge=0.0, le=1.0)
marker_genes: List[str] = Field(default_factory=list)
state: str = "quiescent"
condition_response: Dict[str, float] = Field(default_factory=dict)
class GeneProgram(BaseModel):
"""A latent gene-regulatory programme."""
name: str
genes: List[str] = Field(default_factory=list)
activity_level: float = Field(0.5, ge=0.0, le=1.0)
condition_dependent: bool = False
conditions_active: List[str] = Field(default_factory=list)
class LatentBiologicalState(BaseModel):
"""Hidden ground-truth biology the agent cannot directly observe."""
cell_populations: List[CellPopulation] = Field(default_factory=list)
true_de_genes: Dict[str, Dict[str, float]] = Field(
default_factory=dict,
description="comparison_key β†’ {gene: log2FC}",
)
true_pathways: Dict[str, float] = Field(
default_factory=dict,
description="pathway β†’ activity level",
)
gene_programs: List[GeneProgram] = Field(default_factory=list)
true_trajectory: Optional[Dict[str, Any]] = None
true_regulatory_network: Dict[str, List[str]] = Field(
default_factory=dict,
description="TF β†’ target genes",
)
perturbation_effects: Dict[str, Dict[str, float]] = Field(
default_factory=dict,
description="perturbation β†’ {gene: effect_size}",
)
confounders: Dict[str, float] = Field(default_factory=dict)
true_markers: List[str] = Field(default_factory=list)
causal_mechanisms: List[str] = Field(default_factory=list)
n_true_cells: int = 10_000
class TechnicalState(BaseModel):
"""Hidden technical parameters that shape experimental noise."""
batch_effects: Dict[str, float] = Field(default_factory=dict)
ambient_rna_fraction: float = 0.05
doublet_rate: float = 0.04
dropout_rate: float = 0.1
sample_quality: float = Field(0.9, ge=0.0, le=1.0)
library_complexity: float = Field(0.8, ge=0.0, le=1.0)
sequencing_depth_factor: float = 1.0
capture_efficiency: float = 0.6
class ExperimentProgress(BaseModel):
"""Flags tracking which experiment stages have been completed."""
samples_collected: bool = False
cohort_selected: bool = False
cells_cultured: bool = False
library_prepared: bool = False
perturbation_applied: bool = False
cells_sequenced: bool = False
qc_performed: bool = False
data_filtered: bool = False
data_normalized: bool = False
batches_integrated: bool = False
cells_clustered: bool = False
de_performed: bool = False
trajectories_inferred: bool = False
pathways_analyzed: bool = False
networks_inferred: bool = False
markers_discovered: bool = False
markers_validated: bool = False
followup_designed: bool = False
subagent_review_requested: bool = False
conclusion_reached: bool = False
n_cells_sequenced: Optional[int] = None
n_cells_after_filter: Optional[int] = None
n_clusters_found: Optional[int] = None
n_de_genes_found: Optional[int] = None
n_markers_found: Optional[int] = None
class ResourceState(BaseModel):
"""Full internal resource tracking (superset of agent-visible ResourceUsage)."""
budget_total: float = 100_000.0
budget_used: float = 0.0
time_limit_days: float = 180.0
time_used_days: float = 0.0
samples_available: int = 0
samples_consumed: int = 0
compute_hours_used: float = 0.0
sequencing_lanes_used: int = 0
reagent_kits_used: int = 0
@property
def budget_remaining(self) -> float:
return max(0.0, self.budget_total - self.budget_used)
@property
def time_remaining_days(self) -> float:
return max(0.0, self.time_limit_days - self.time_used_days)
@property
def budget_exhausted(self) -> bool:
return self.budget_remaining <= 0
@property
def time_exhausted(self) -> bool:
return self.time_remaining_days <= 0
class FullLatentState(BaseModel):
"""Complete hidden state of the simulated biological world."""
biology: LatentBiologicalState = Field(
default_factory=LatentBiologicalState
)
technical: TechnicalState = Field(default_factory=TechnicalState)
progress: ExperimentProgress = Field(default_factory=ExperimentProgress)
resources: ResourceState = Field(default_factory=ResourceState)
hidden_failure_conditions: List[str] = Field(default_factory=list)
mechanism_confidence: Dict[str, float] = Field(default_factory=dict)
discovered_de_genes: List[str] = Field(default_factory=list)
discovered_clusters: List[str] = Field(default_factory=list)
task_modality: str = "scRNA-seq"
step_count: int = 0
rng_seed: int = 42
# Transient fields for passing sampled values from the transition engine
# to the output generator within a single step (not serialized).
last_retain_frac: Optional[float] = Field(None, exclude=True)
last_n_clusters: Optional[int] = Field(None, exclude=True)
last_perturbation_efficiency: Optional[float] = Field(None, exclude=True)