bio-experiment

Running on CPU Upgrade

App Files Files Community

bio-experiment / server /simulator /latent_state.py

Ev3Dev

Upload folder using huggingface_hub

5c3cfae verified 3 days ago

raw

history blame contribute delete

5.56 kB

	"""Latent biological and technical state — hidden from the agent."""

	from __future__ import annotations

	from typing import Any, Dict, List, Optional

	from pydantic import BaseModel, Field


	class CellPopulation(BaseModel):
	"""Ground-truth cell sub-population in the simulated tissue."""

	name: str
	proportion: float = Field(ge=0.0, le=1.0)
	marker_genes: List[str] = Field(default_factory=list)
	state: str = "quiescent"
	condition_response: Dict[str, float] = Field(default_factory=dict)


	class GeneProgram(BaseModel):
	"""A latent gene-regulatory programme."""

	name: str
	genes: List[str] = Field(default_factory=list)
	activity_level: float = Field(0.5, ge=0.0, le=1.0)
	condition_dependent: bool = False
	conditions_active: List[str] = Field(default_factory=list)


	class LatentBiologicalState(BaseModel):
	"""Hidden ground-truth biology the agent cannot directly observe."""

	cell_populations: List[CellPopulation] = Field(default_factory=list)
	true_de_genes: Dict[str, Dict[str, float]] = Field(
	default_factory=dict,
	description="comparison_key → {gene: log2FC}",
	)
	true_pathways: Dict[str, float] = Field(
	default_factory=dict,
	description="pathway → activity level",
	)
	gene_programs: List[GeneProgram] = Field(default_factory=list)
	true_trajectory: Optional[Dict[str, Any]] = None
	true_regulatory_network: Dict[str, List[str]] = Field(
	default_factory=dict,
	description="TF → target genes",
	)
	perturbation_effects: Dict[str, Dict[str, float]] = Field(
	default_factory=dict,
	description="perturbation → {gene: effect_size}",
	)
	confounders: Dict[str, float] = Field(default_factory=dict)
	true_markers: List[str] = Field(default_factory=list)
	causal_mechanisms: List[str] = Field(default_factory=list)
	n_true_cells: int = 10_000


	class TechnicalState(BaseModel):
	"""Hidden technical parameters that shape experimental noise."""

	batch_effects: Dict[str, float] = Field(default_factory=dict)
	ambient_rna_fraction: float = 0.05
	doublet_rate: float = 0.04
	dropout_rate: float = 0.1
	sample_quality: float = Field(0.9, ge=0.0, le=1.0)
	library_complexity: float = Field(0.8, ge=0.0, le=1.0)
	sequencing_depth_factor: float = 1.0
	capture_efficiency: float = 0.6


	class ExperimentProgress(BaseModel):
	"""Flags tracking which experiment stages have been completed."""

	samples_collected: bool = False
	cohort_selected: bool = False
	cells_cultured: bool = False
	library_prepared: bool = False
	perturbation_applied: bool = False
	cells_sequenced: bool = False
	qc_performed: bool = False
	data_filtered: bool = False
	data_normalized: bool = False
	batches_integrated: bool = False
	cells_clustered: bool = False
	de_performed: bool = False
	trajectories_inferred: bool = False
	pathways_analyzed: bool = False
	networks_inferred: bool = False
	markers_discovered: bool = False
	markers_validated: bool = False
	followup_designed: bool = False
	subagent_review_requested: bool = False
	conclusion_reached: bool = False

	n_cells_sequenced: Optional[int] = None
	n_cells_after_filter: Optional[int] = None
	n_clusters_found: Optional[int] = None
	n_de_genes_found: Optional[int] = None
	n_markers_found: Optional[int] = None


	class ResourceState(BaseModel):
	"""Full internal resource tracking (superset of agent-visible ResourceUsage)."""

	budget_total: float = 100_000.0
	budget_used: float = 0.0
	time_limit_days: float = 180.0
	time_used_days: float = 0.0
	samples_available: int = 0
	samples_consumed: int = 0
	compute_hours_used: float = 0.0
	sequencing_lanes_used: int = 0
	reagent_kits_used: int = 0

	@property
	def budget_remaining(self) -> float:
	return max(0.0, self.budget_total - self.budget_used)

	@property
	def time_remaining_days(self) -> float:
	return max(0.0, self.time_limit_days - self.time_used_days)

	@property
	def budget_exhausted(self) -> bool:
	return self.budget_remaining <= 0

	@property
	def time_exhausted(self) -> bool:
	return self.time_remaining_days <= 0


	class FullLatentState(BaseModel):
	"""Complete hidden state of the simulated biological world."""

	biology: LatentBiologicalState = Field(
	default_factory=LatentBiologicalState
	)
	technical: TechnicalState = Field(default_factory=TechnicalState)
	progress: ExperimentProgress = Field(default_factory=ExperimentProgress)
	resources: ResourceState = Field(default_factory=ResourceState)
	hidden_failure_conditions: List[str] = Field(default_factory=list)
	mechanism_confidence: Dict[str, float] = Field(default_factory=dict)
	discovered_de_genes: List[str] = Field(default_factory=list)
	discovered_clusters: List[str] = Field(default_factory=list)
	task_modality: str = "scRNA-seq"
	step_count: int = 0
	rng_seed: int = 42

	# Transient fields for passing sampled values from the transition engine
	# to the output generator within a single step (not serialized).
	last_retain_frac: Optional[float] = Field(None, exclude=True)
	last_n_clusters: Optional[int] = Field(None, exclude=True)
	last_perturbation_efficiency: Optional[float] = Field(None, exclude=True)