Spaces:

Ev3Dev
/

hackathon

Running

App Files Files Community

Ev3Dev commited on Mar 8

Commit

4db0438

verified ·

1 Parent(s): 538da5f

Upload folder using huggingface_hub

Browse files

Files changed (39) hide show

Dockerfile +81 -0
README.md +337 -10
__init__.py +48 -0
client.py +53 -0
models.py +268 -0
openenv.yaml +7 -0
outputs/.gitkeep +1 -0
pyproject.toml +66 -0
run_agent.py +292 -0
server/__init__.py +3 -0
server/app.py +45 -0
server/hackathon_environment.py +239 -0
server/requirements.txt +6 -0
server/rewards/__init__.py +3 -0
server/rewards/reward.py +285 -0
server/rules/__init__.py +3 -0
server/rules/engine.py +208 -0
server/simulator/__init__.py +25 -0
server/simulator/latent_state.py +143 -0
server/simulator/noise.py +124 -0
server/simulator/output_generator.py +495 -0
server/simulator/transition.py +216 -0
server/subagents/__init__.py +0 -0
server/tasks/__init__.py +4 -0
server/tasks/generator.py +129 -0
server/tasks/scenarios.py +454 -0
tests/__init__.py +0 -0
tests/test_environment.py +85 -0
tests/test_literature_benchmark.py +36 -0
tests/test_models.py +88 -0
tests/test_rewards.py +105 -0
tests/test_rules.py +79 -0
tests/test_simulator.py +121 -0
training/__init__.py +34 -0
training/evaluation.py +160 -0
training/gym_wrapper.py +174 -0
training/literature_benchmark.py +557 -0
training/trajectory.py +159 -0
uv.lock +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Ensure git is available (required for installing dependencies from VCS)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=hackathon
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv is already vendored in the build context
+# For standalone builds, openenv will be installed via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+ENV ENABLE_WEB_INTERFACE=true
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

README.md CHANGED Viewed

@@ -1,10 +1,337 @@
----
-title: Hackathon
-emoji: 🦀
-colorFrom: blue
-colorTo: gray
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Bio Experiment Environment Server
+sdk: docker
+pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
+  - reinforcement-learning
+  - bioinformatics
+---
+# Bio Experiment Environment
+This repository implements an OpenEnv-compatible reinforcement learning environment for planning biological experiment pipelines. The agent does not directly see the true biological state. Instead, it proposes one structured experiment or analysis step at a time, receives a noisy simulated output, and is rewarded for valid, informative, efficient, well-calibrated plans.
+The environment is designed as a partially observable Markov decision process (POMDP) with:
+- hidden ground-truth biology
+- hidden technical noise and failure conditions
+- visible task metadata, resource usage, step history, and intermediate outputs
+- dense step-wise reward plus terminal reward for conclusion quality
+## What "how it works" means here
+At a high level, each episode looks like this:
+1. `reset()` picks a biological scenario and seeds the simulator.
+2. The agent receives an `ExperimentObservation` describing the task and current visible state.
+3. The agent submits an `ExperimentAction` such as `collect_sample`, `run_qc`, or `differential_expression`.
+4. The rule engine checks whether the action is valid at this point in the pipeline.
+5. The transition engine updates hidden state, spends resources, and asks the output generator to simulate the result.
+6. The reward computer scores the step for validity, ordering, information gain, efficiency, novelty, and penalties.
+7. The environment returns a new observation with updated history, outputs, discoveries, violations, and reward.
+8. The episode ends when the agent synthesizes a conclusion, exhausts resources, or reaches the step limit.
+## The core mental model
+### Hidden state
+The simulator keeps a `FullLatentState` that the agent never directly sees. It contains:
+- true cell populations and marker genes
+- true DE genes, pathways, trajectories, and regulatory networks
+- technical factors such as dropout, doublets, ambient RNA, and batch effects
+- experiment progress flags
+- remaining budget and time
+- hidden failure conditions
+### Visible state
+The agent only sees `ExperimentObservation`, which includes:
+- the current `TaskSpec`
+- pipeline history
+- available assays and tools
+- resource usage
+- the latest and cumulative intermediate outputs
+- discovered markers and candidate mechanisms
+- rule violations
+- per-step reward breakdown
+This separation is what makes the environment a POMDP rather than a fully observed simulator.
+## Main building blocks
+### `models.py`
+Defines the contracts that all other modules use:
+- `ExperimentAction`: one structured step chosen by the agent
+- `ExperimentObservation`: what the agent can see after each step
+- `TaskSpec`: the problem statement, budget, time limit, assays, tools, and expected findings
+- `IntermediateOutput`: the simulated artifact returned by a step
+- `ConclusionClaim`: structured claims used for final synthesis
+The action vocabulary is intentionally broad enough to mix wet-lab, computational, and meta-planning actions.
+### `server/tasks/`
+This is where episodes come from.
+- `scenarios.py` defines a small library of curated biological scenarios
+- `generator.py` turns a scenario into a `(TaskSpec, FullLatentState)` pair
+- optional domain randomization perturbs budget, time, noise, batch effects, cell proportions, and effect sizes
+Right now the scenario library includes:
+- `cardiac_disease_de`: disease vs healthy differential expression in heart tissue
+- `hematopoiesis_trajectory`: developmental trajectory inference in bone marrow
+- `perturbation_immune`: treatment response under JAK inhibition
+- `biomarker_validation_lung`: follow-up validation of `SPP1` in IPF
+### `server/simulator/`
+This is the simulator itself.
+- `latent_state.py` defines hidden biological, technical, progress, and resource state
+- `noise.py` centralizes stochasticity so episodes are reproducible from a seed
+- `output_generator.py` turns an action plus hidden state into a realistic `IntermediateOutput`
+- `transition.py` applies action costs, updates progress flags, propagates artifacts, and decides whether the episode is done
+The output generator does not simply echo the action. It conditions outputs on the hidden state, then injects realistic noise such as dropout, false positives, false negatives, and imperfect clustering.
+### `server/rules/engine.py`
+The rule engine enforces scientific and procedural constraints before each action is applied.
+- hard violations block the action entirely
+- soft violations allow the action, but reduce output quality and add reward penalties
+Examples:
+- sequencing before library prep is a hard violation
+- running QC twice is a soft redundancy violation
+- making causal claims without enough evidence is a soft validity violation
+### `server/rewards/reward.py`
+Rewards are decomposed rather than being a single opaque number.
+Per-step reward includes:
+- validity
+- ordering
+- information gain
+- efficiency
+- novelty
+- penalties
+- potential-based shaping
+Terminal reward adds:
+- pipeline completeness
+- calibration of conclusions against hidden truth
+- remaining budget and time efficiency
+- overconfidence penalties for strong but incorrect claims
+This makes the environment easier to debug, benchmark, and train against.
+### `server/hackathon_environment.py`
+This is the orchestration layer that ties everything together.
+On `reset()` it:
+- seeds the noise model
+- generates a task and latent state
+- clears history, outputs, discoveries, conclusions, and cumulative reward
+On `step()` it:
+- checks rules
+- calls the transition engine
+- computes reward
+- appends a `PipelineStepRecord`
+- updates discovered markers and candidate mechanisms
+- stores conclusion claims if the action is `synthesize_conclusion`
+- builds the next `ExperimentObservation`
+This file is the best place to read if you want the end-to-end control flow.
+## What actually happens on one step
+Here is the concrete order of operations for `env.step(action)`:
+1. Increment the step counter.
+2. Copy the previous latent state for reward comparison.
+3. Run rule checks and split violations into hard vs soft.
+4. If there is a hard violation, return a failure report without applying the action.
+5. Otherwise deduct budget and time based on `ACTION_COSTS`.
+6. Update latent progress flags like `samples_collected`, `qc_performed`, or `de_performed`.
+7. Generate a structured simulated output for the chosen action.
+8. If there were soft violations, degrade output quality and attach warnings.
+9. Propagate artifacts back into latent state, such as discovered DE genes or cluster names.
+10. Compute decomposed reward from state transition plus output quality.
+11. If the episode is ending, compute terminal reward from completeness and conclusion calibration.
+12. Return an observation that exposes the visible summary but not the hidden truth.
+## Typical successful pipeline
+Most scenarios reward a sensible experiment order similar to:
+1. `collect_sample`
+2. `prepare_library`
+3. `sequence_cells`
+4. `run_qc`
+5. `filter_data`
+6. `normalize_data`
+7. `cluster_cells`
+8. one or more of:
+   `differential_expression`, `trajectory_analysis`, `pathway_enrichment`,
+   `regulatory_network_inference`, `marker_selection`, `validate_marker`
+9. `synthesize_conclusion`
+The exact best sequence depends on the scenario. For example:
+- trajectory scenarios benefit from `trajectory_analysis` and regulatory inference
+- biomarker scenarios benefit from DE, marker selection, and validation
+- perturbation scenarios benefit from pathway-level interpretation
+## Interfaces you can use
+### 1. In-process environment
+Use `BioExperimentEnvironment` when you want direct Python access with full structured observations:
+```python
+from models import ActionType, ExperimentAction
+from server.hackathon_environment import BioExperimentEnvironment
+env = BioExperimentEnvironment(scenario_name="biomarker_validation_lung")
+obs = env.reset()
+obs = env.step(ExperimentAction(
+    action_type=ActionType.COLLECT_SAMPLE,
+    parameters={"n_samples": 8},
+    justification="Collect enough material for downstream single-cell analysis.",
+    confidence=0.8,
+))
+print(obs.task.problem_statement)
+print(obs.latest_output.summary if obs.latest_output else "No output yet")
+print(obs.reward)
+```
+### 2. OpenEnv client/server mode
+Use the FastAPI app when you want to serve the environment over HTTP and WebSocket:
+```bash
+uv sync --extra dev
+uv run uvicorn server.app:app --reload
+```
+Then connect with the client:
+```python
+from client import BioExperimentEnv
+from models import ActionType, ExperimentAction
+with BioExperimentEnv(base_url="http://localhost:8000") as env:
+    result = env.reset()
+    result = env.step(ExperimentAction(action_type=ActionType.COLLECT_SAMPLE))
+    print(result.observation.latest_output.summary)
+```
+The environment class supports concurrent sessions, but the bundled server is currently configured with `max_concurrent_envs=1` in `server/app.py`.
+### 3. Gymnasium wrapper
+Use `training/gym_wrapper.py` when you want a classic RL interface:
+```python
+from training.gym_wrapper import BioExperimentGymEnv
+env = BioExperimentGymEnv()
+obs, info = env.reset()
+obs, reward, terminated, truncated, info = env.step({
+    "action_type": 0,
+    "confidence": 0.7,
+})
+```
+This wrapper vectorizes the structured observation into arrays and reduces the action interface to:
+- a discrete action type index
+- a scalar confidence value
+### 4. Benchmark and scripted agents
+- `training/literature_benchmark.py` runs paper-aligned action sequences and compares outcomes against curated expected findings
+- `run_agent.py` runs a local language model planner against the environment
+- `training/trajectory.py` stores trajectories for offline RL, imitation learning, replay, and evaluation
+- `training/evaluation.py` computes online, benchmark, expert-review, and fidelity-oriented metrics
+## Episode termination
+An episode ends when one of the following happens:
+- the agent chooses `synthesize_conclusion`
+- resources are exhausted
+- the environment reaches `MAX_STEPS` which is currently `30`
+## Why this is useful
+This environment is trying to model a realistic scientific planning loop rather than a toy decision problem:
+- actions have prerequisites
+- outputs are noisy and imperfect
+- budget and time matter
+- not every correct-looking answer is well supported
+- final conclusions are scored against hidden ground truth
+That makes it suitable for:
+- agent planning benchmarks
+- RL experiments on long-horizon scientific reasoning
+- literature-grounded evaluation
+- comparing structured policies against LLM-driven planners
+## Minimal project map
+```text
+.
+├── client.py                     # OpenEnv client
+├── models.py                     # Shared action / observation / task schemas
+├── server/
+│   ├── app.py                    # FastAPI/OpenEnv server
+│   ├── hackathon_environment.py  # Main environment orchestration
+│   ├── rewards/                  # Reward model
+│   ├── rules/                    # Constraint checking
+│   ├── simulator/                # Latent state, noise, outputs, transitions
+│   └── tasks/                    # Scenario library and task generation
+├── training/
+│   ├── evaluation.py             # Metrics
+│   ├── gym_wrapper.py            # Gymnasium wrapper
+│   ├── literature_benchmark.py   # Paper-backed benchmark flow
+│   └── trajectory.py             # Trajectory serialization
+└── tests/                        # Unit and integration tests
+```
+## Quick sanity check
+The current implementation was sanity-checked with:
+```bash
+uv run pytest tests/test_environment.py tests/test_literature_benchmark.py -q
+```
+Those tests verify:
+- reset and step lifecycle
+- valid vs invalid pipeline behavior
+- conclusion termination
+- literature-backed scenario selection
+- benchmark matching for curated expected findings

__init__.py ADDED Viewed

	@@ -0,0 +1,48 @@

+try:  # pragma: no cover - package import path
+    from .client import BioExperimentEnv
+    from .models import (
+        ActionType,
+        ConclusionClaim,
+        ExpectedFinding,
+        ExperimentAction,
+        ExperimentObservation,
+        IntermediateOutput,
+        OutputType,
+        PaperReference,
+        PipelineStepRecord,
+        ResourceUsage,
+        SubagentType,
+        TaskSpec,
+    )
+except ImportError:  # pragma: no cover - direct module import path
+    from client import BioExperimentEnv
+    from models import (
+        ActionType,
+        ConclusionClaim,
+        ExpectedFinding,
+        ExperimentAction,
+        ExperimentObservation,
+        IntermediateOutput,
+        OutputType,
+        PaperReference,
+        PipelineStepRecord,
+        ResourceUsage,
+        SubagentType,
+        TaskSpec,
+    )
+__all__ = [
+    "ActionType",
+    "BioExperimentEnv",
+    "ConclusionClaim",
+    "ExpectedFinding",
+    "ExperimentAction",
+    "ExperimentObservation",
+    "IntermediateOutput",
+    "OutputType",
+    "PaperReference",
+    "PipelineStepRecord",
+    "ResourceUsage",
+    "SubagentType",
+    "TaskSpec",
+]

client.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Bio-Experiment Environment Client.
+Provides the ``BioExperimentEnv`` class that communicates with the
+environment server over WebSocket / HTTP using the OpenEnv protocol.
+"""
+from typing import Any, Dict, List
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import State
+from openenv.core import EnvClient
+try:  # pragma: no cover - package import path
+    from .models import ExperimentAction, ExperimentObservation
+except ImportError:  # pragma: no cover - direct module import path
+    from models import ExperimentAction, ExperimentObservation
+class BioExperimentEnv(
+    EnvClient[ExperimentAction, ExperimentObservation, State]
+):
+    """Client for the Bio-Experiment Planning Environment.
+    Example:
+        >>> with BioExperimentEnv(base_url="http://localhost:8000") as env:
+        ...     result = env.reset()
+        ...     print(result.observation.task.problem_statement)
+        ...     result = env.step(ExperimentAction(
+        ...         action_type="collect_sample",
+        ...         parameters={"n_samples": 6},
+        ...     ))
+        ...     print(result.observation.latest_output.summary)
+    """
+    def _step_payload(self, action: ExperimentAction) -> Dict:
+        return action.model_dump()
+    def _parse_result(
+        self, payload: Dict
+    ) -> StepResult[ExperimentObservation]:
+        obs_data = payload.get("observation", {})
+        observation = ExperimentObservation(**obs_data)
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> State:
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )

models.py ADDED Viewed

	@@ -0,0 +1,268 @@

+"""
+Data models for the Bio-Experiment Planning RL Environment.
+Defines the POMDP action and observation contracts for a scientific agent
+that constructs biological experiment pipelines step-by-step.
+"""
+from __future__ import annotations
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+from openenv.core.env_server.types import Action, Observation
+# ── Action vocabulary ───────────────────────────────────────────────────────
+class ActionType(str, Enum):
+    COLLECT_SAMPLE = "collect_sample"
+    SELECT_COHORT = "select_cohort"
+    PREPARE_LIBRARY = "prepare_library"
+    CULTURE_CELLS = "culture_cells"
+    PERTURB_GENE = "perturb_gene"
+    PERTURB_COMPOUND = "perturb_compound"
+    SEQUENCE_CELLS = "sequence_cells"
+    RUN_QC = "run_qc"
+    FILTER_DATA = "filter_data"
+    NORMALIZE_DATA = "normalize_data"
+    INTEGRATE_BATCHES = "integrate_batches"
+    CLUSTER_CELLS = "cluster_cells"
+    DIFFERENTIAL_EXPRESSION = "differential_expression"
+    TRAJECTORY_ANALYSIS = "trajectory_analysis"
+    PATHWAY_ENRICHMENT = "pathway_enrichment"
+    REGULATORY_NETWORK_INFERENCE = "regulatory_network_inference"
+    MARKER_SELECTION = "marker_selection"
+    VALIDATE_MARKER = "validate_marker"
+    DESIGN_FOLLOWUP = "design_followup_experiment"
+    REQUEST_SUBAGENT_REVIEW = "request_subagent_review"
+    SYNTHESIZE_CONCLUSION = "synthesize_conclusion"
+WET_LAB_ACTIONS = frozenset({
+    ActionType.COLLECT_SAMPLE,
+    ActionType.SELECT_COHORT,
+    ActionType.PREPARE_LIBRARY,
+    ActionType.CULTURE_CELLS,
+    ActionType.PERTURB_GENE,
+    ActionType.PERTURB_COMPOUND,
+    ActionType.SEQUENCE_CELLS,
+    ActionType.VALIDATE_MARKER,
+})
+COMPUTATIONAL_ACTIONS = frozenset({
+    ActionType.RUN_QC,
+    ActionType.FILTER_DATA,
+    ActionType.NORMALIZE_DATA,
+    ActionType.INTEGRATE_BATCHES,
+    ActionType.CLUSTER_CELLS,
+    ActionType.DIFFERENTIAL_EXPRESSION,
+    ActionType.TRAJECTORY_ANALYSIS,
+    ActionType.PATHWAY_ENRICHMENT,
+    ActionType.REGULATORY_NETWORK_INFERENCE,
+    ActionType.MARKER_SELECTION,
+})
+META_ACTIONS = frozenset({
+    ActionType.DESIGN_FOLLOWUP,
+    ActionType.REQUEST_SUBAGENT_REVIEW,
+    ActionType.SYNTHESIZE_CONCLUSION,
+})
+class SubagentType(str, Enum):
+    WET_LAB_PLANNER = "wet_lab_planner"
+    COMPUTATIONAL_ANALYST = "computational_analyst"
+    OMICS_QC_AGENT = "omics_qc_agent"
+    CAUSAL_REASONING_AGENT = "causal_reasoning_agent"
+    BUDGET_SCHEDULER = "budget_scheduler"
+    BIOLOGICAL_RULE_CHECKER = "biological_rule_checker"
+    TOOL_EXECUTOR = "tool_executor"
+    RETROSPECTIVE_CRITIC = "retrospective_critic"
+    REPORT_SYNTHESIZER = "report_synthesizer"
+# ── Action schema ───────────────────────────────────────────────────────────
+class ExperimentAction(Action):
+    """Structured, compositional action for one experiment / analysis step.
+    Hybrid representation: discrete *action_type* plus typed arguments,
+    optional sub-agent / tool invocation, and calibration fields.
+    """
+    action_type: ActionType = Field(
+        ..., description="Discrete experiment or analysis step type"
+    )
+    input_targets: List[str] = Field(
+        default_factory=list,
+        description="References to prior outputs, samples, or artifacts",
+    )
+    method: Optional[str] = Field(
+        None, description="Specific method or tool (e.g. 'Seurat', 'CellRanger')"
+    )
+    parameters: Dict[str, Any] = Field(
+        default_factory=dict, description="Method-specific parameters"
+    )
+    expected_output_type: Optional[str] = Field(
+        None, description="What the agent expects this step to produce"
+    )
+    justification: Optional[str] = Field(
+        None, description="Scientific rationale for this step"
+    )
+    invoked_subagent: Optional[SubagentType] = Field(
+        None, description="Sub-agent to delegate to, if any"
+    )
+    tool_call_spec: Optional[Dict[str, Any]] = Field(
+        None, description="Structured tool invocation specification"
+    )
+    confidence: float = Field(
+        0.5, ge=0.0, le=1.0, description="Agent confidence in this step"
+    )
+# ── Intermediate outputs ────────────────────────────────────────────────────
+class OutputType(str, Enum):
+    QC_METRICS = "qc_metrics"
+    COUNT_MATRIX_SUMMARY = "count_matrix_summary"
+    EMBEDDING_SUMMARY = "embedding_summary"
+    CLUSTER_RESULT = "cluster_result"
+    DE_RESULT = "de_result"
+    PATHWAY_RESULT = "pathway_result"
+    TRAJECTORY_RESULT = "trajectory_result"
+    VALIDATION_RESULT = "validation_result"
+    NETWORK_RESULT = "network_result"
+    SAMPLE_COLLECTION_RESULT = "sample_collection_result"
+    LIBRARY_PREP_RESULT = "library_prep_result"
+    SEQUENCING_RESULT = "sequencing_result"
+    PERTURBATION_RESULT = "perturbation_result"
+    CULTURE_RESULT = "culture_result"
+    COHORT_RESULT = "cohort_result"
+    FOLLOWUP_DESIGN = "followup_design"
+    MARKER_RESULT = "marker_result"
+    FAILURE_REPORT = "failure_report"
+    SUBAGENT_REPORT = "subagent_report"
+    CONCLUSION = "conclusion"
+class IntermediateOutput(BaseModel):
+    """A single simulated output from one pipeline step."""
+    output_type: OutputType
+    step_index: int
+    success: bool = True
+    quality_score: float = Field(1.0, ge=0.0, le=1.0)
+    summary: str = ""
+    data: Dict[str, Any] = Field(default_factory=dict)
+    uncertainty: float = Field(0.0, ge=0.0, le=1.0)
+    warnings: List[str] = Field(default_factory=list)
+    artifacts_available: List[str] = Field(default_factory=list)
+# ── Observable state components ─────────────────────────────────────────────
+class ResourceUsage(BaseModel):
+    budget_used: float = 0.0
+    budget_remaining: float = 100_000.0
+    time_used_days: float = 0.0
+    time_remaining_days: float = 180.0
+    samples_consumed: int = 0
+    compute_hours_used: float = 0.0
+class PipelineStepRecord(BaseModel):
+    step_index: int
+    action_type: ActionType
+    method: Optional[str] = None
+    parameters: Dict[str, Any] = Field(default_factory=dict)
+    output_summary: str = ""
+    output_type: OutputType
+    success: bool = True
+    quality_score: float = 1.0
+    resource_cost: float = 0.0
+    time_cost_days: float = 0.0
+class PaperReference(BaseModel):
+    """Metadata for a literature source used to ground a task."""
+    title: str
+    citation: Optional[str] = None
+    doi: Optional[str] = None
+    pmid: Optional[str] = None
+    url: Optional[str] = None
+class ExpectedFinding(BaseModel):
+    """A paper-backed result that the agent should try to recover."""
+    finding: str
+    category: str = "claim"
+    keywords: List[str] = Field(default_factory=list)
+class TaskSpec(BaseModel):
+    """Specification of the biological problem to solve."""
+    problem_statement: str = "Unspecified biological problem"
+    modality: str = "scRNA-seq"
+    organism: str = "human"
+    tissue: str = "blood"
+    conditions: List[str] = Field(default_factory=list)
+    available_assays: List[str] = Field(default_factory=lambda: [
+        "10x_chromium", "smart-seq2", "bulk_rna_seq",
+        "atac-seq", "cite-seq", "spatial_transcriptomics",
+    ])
+    available_tools: List[str] = Field(default_factory=lambda: [
+        "CellRanger", "Seurat", "Scanpy", "DESeq2", "GSEA",
+        "Monocle", "scVelo", "CellChat", "SCENIC",
+    ])
+    budget_limit: float = 100_000.0
+    time_limit_days: float = 180.0
+    prior_observations: List[str] = Field(default_factory=list)
+    success_criteria: List[str] = Field(default_factory=list)
+    dataset_metadata: Dict[str, Any] = Field(default_factory=dict)
+    paper_references: List[PaperReference] = Field(default_factory=list)
+    expected_findings: List[ExpectedFinding] = Field(default_factory=list)
+class ConclusionClaim(BaseModel):
+    claim: str
+    evidence_steps: List[int] = Field(default_factory=list)
+    confidence: float = Field(0.5, ge=0.0, le=1.0)
+    claim_type: str = "correlational"
+    supporting_data: Dict[str, Any] = Field(default_factory=dict)
+# ── Observation schema ──────────────────────────────────────────────────────
+class ExperimentObservation(Observation):
+    """Full observable state returned to the agent at each timestep.
+    Deliberately excludes hidden latent biological truth, hidden failure
+    conditions, and ground-truth mechanisms.
+    """
+    task: TaskSpec = Field(default_factory=TaskSpec)
+    step_index: int = 0
+    pipeline_history: List[PipelineStepRecord] = Field(default_factory=list)
+    available_assays: List[str] = Field(default_factory=list)
+    available_tools: List[str] = Field(default_factory=list)
+    resource_usage: ResourceUsage = Field(default_factory=ResourceUsage)
+    latest_output: Optional[IntermediateOutput] = None
+    all_outputs: List[IntermediateOutput] = Field(default_factory=list)
+    discovered_markers: List[str] = Field(default_factory=list)
+    candidate_mechanisms: List[str] = Field(default_factory=list)
+    uncertainty_summary: Dict[str, float] = Field(default_factory=dict)
+    subagent_outputs: List[Dict[str, Any]] = Field(default_factory=list)
+    conclusions: List[ConclusionClaim] = Field(default_factory=list)
+    rule_violations: List[str] = Field(default_factory=list)
+    step_reward_breakdown: Dict[str, float] = Field(default_factory=dict)

openenv.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+spec_version: 1
+name: hackathon
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000

outputs/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+

pyproject.toml ADDED Viewed

	@@ -0,0 +1,66 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-bio-experiment"
+version = "0.1.0"
+description = "RL environment for biological experiment pipeline planning"
+requires-python = ">=3.10"
+dependencies = [
+    "openenv-core[core]>=0.2.0",
+    "numpy>=1.24.0",
+    "scipy>=1.10.0",
+    "pydantic>=2.0.0",
+]
+[project.optional-dependencies]
+train = [
+    "gymnasium>=0.29.0",
+]
+bio = [
+    "biopython>=1.84",
+    "gseapy>=1.1.3",
+    "scanpy>=1.10.0",
+]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+    "gymnasium>=0.29.0",
+]
+[project.scripts]
+server = "hackathon.server.app:main"
+[tool.uv]
+package = false
+[tool.setuptools]
+include-package-data = true
+packages = [
+    "hackathon",
+    "hackathon.server",
+    "hackathon.server.simulator",
+    "hackathon.server.rules",
+    "hackathon.server.rewards",
+    "hackathon.server.tasks",
+    "hackathon.server.subagents",
+    "hackathon.training",
+    "hackathon.tests",
+]
+[tool.setuptools.package-dir]
+hackathon = "."
+"hackathon.server" = "server"
+"hackathon.server.simulator" = "server/simulator"
+"hackathon.server.rules" = "server/rules"
+"hackathon.server.rewards" = "server/rewards"
+"hackathon.server.tasks" = "server/tasks"
+"hackathon.server.subagents" = "server/subagents"
+"hackathon.training" = "training"
+"hackathon.tests" = "tests"

run_agent.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""Run the bio-experiment environment with Qwen3.5-2B as the planning agent."""
+from __future__ import annotations
+import json
+import re
+import sys
+import time
+from typing import Any, Dict, List, Optional
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from models import ActionType, ExperimentAction, ExperimentObservation
+from server.hackathon_environment import BioExperimentEnvironment
+MODEL_ID = "Qwen/Qwen3.5-0.8B"
+MAX_EPISODE_STEPS = 12
+PIPELINE_TASK = "image-text-to-text"
+USE_PIPELINE = True
+ACTION_TYPES = [a.value for a in ActionType]
+SYSTEM_PROMPT = """\
+You are an expert biologist planning a single-cell experiment pipeline.
+At each turn you see the experiment state and must pick the next step.
+Action types (in typical order):
+  collect_sample, prepare_library, sequence_cells, run_qc, filter_data,
+  normalize_data, cluster_cells, differential_expression,
+  pathway_enrichment, marker_selection, validate_marker, synthesize_conclusion
+Other actions: select_cohort, culture_cells, perturb_gene, perturb_compound,
+  integrate_batches, trajectory_analysis, regulatory_network_inference,
+  design_followup_experiment, request_subagent_review
+Respond with ONLY valid JSON, nothing else:
+{"action_type": "...", "method": null, "parameters": {}, "justification": "...", "confidence": 0.8}
+"""
+def format_observation(obs: ExperimentObservation) -> str:
+    parts = [
+        f"TASK: {obs.task.problem_statement}",
+        f"Organism: {obs.task.organism} | Tissue: {obs.task.tissue}",
+        f"Conditions: {', '.join(obs.task.conditions) or 'N/A'}",
+        f"Step: {obs.step_index} | Budget: ${obs.resource_usage.budget_remaining:,.0f} | Time: {obs.resource_usage.time_remaining_days:.0f}d",
+    ]
+    if obs.pipeline_history:
+        last5 = obs.pipeline_history[-5:]
+        parts.append("History:")
+        for h in last5:
+            tag = "OK" if h.success else "FAIL"
+            parts.append(f"  [{tag}] {h.action_type.value}: {h.output_summary[:80]}")
+    if obs.rule_violations:
+        parts.append(f"VIOLATIONS: {obs.rule_violations}")
+    if obs.discovered_markers:
+        parts.append(f"Markers: {obs.discovered_markers[:5]}")
+    return "\n".join(parts)
+def parse_action(text: str) -> Optional[ExperimentAction]:
+    match = re.search(r"\{[^{}]*\}", text, re.DOTALL)
+    if not match:
+        return None
+    try:
+        d = json.loads(match.group())
+    except json.JSONDecodeError:
+        return None
+    action_type = d.get("action_type")
+    if action_type not in ACTION_TYPES:
+        return None
+    return ExperimentAction(
+        action_type=ActionType(action_type),
+        method=d.get("method"),
+        parameters=d.get("parameters") or {},
+        justification=d.get("justification"),
+        confidence=min(1.0, max(0.0, float(d.get("confidence", 0.5)))),
+    )
+FALLBACK_SEQUENCE = [
+    ActionType.COLLECT_SAMPLE,
+    ActionType.PREPARE_LIBRARY,
+    ActionType.SEQUENCE_CELLS,
+    ActionType.RUN_QC,
+    ActionType.FILTER_DATA,
+    ActionType.NORMALIZE_DATA,
+    ActionType.CLUSTER_CELLS,
+    ActionType.DIFFERENTIAL_EXPRESSION,
+    ActionType.PATHWAY_ENRICHMENT,
+    ActionType.MARKER_SELECTION,
+    ActionType.SYNTHESIZE_CONCLUSION,
+]
+def fallback_action(step: int) -> ExperimentAction:
+    idx = min(step, len(FALLBACK_SEQUENCE) - 1)
+    return ExperimentAction(
+        action_type=FALLBACK_SEQUENCE[idx],
+        justification="fallback",
+        confidence=0.3,
+    )
+def log(msg: str) -> None:
+    print(msg, flush=True)
+def build_observation_prompt(obs: ExperimentObservation) -> str:
+    return format_observation(obs)
+def run_with_pipeline(pipe, prompt: str) -> str:
+    attempts = [
+        {"text": prompt},
+        {"text": prompt, "image": None},
+        {"image": prompt},
+    ]
+    for payload in attempts:
+        try:
+            result = pipe(payload, max_new_tokens=220)
+            if isinstance(result, list) and result:
+                result = result[0]
+            if isinstance(result, dict):
+                text = result.get("generated_text") or result.get("text") or result.get("answer")
+            elif isinstance(result, str):
+                text = result
+            else:
+                text = ""
+            if isinstance(text, str) and text.strip():
+                return text.strip()
+        except Exception:
+            continue
+    return ""
+def main():
+    tokenizer = None
+    model = None
+    eos_ids: List[int] = []
+    active_pipeline = None
+    if USE_PIPELINE:
+        log(f"Loading pipeline ({PIPELINE_TASK}) for {MODEL_ID} ...")
+        try:
+            active_pipeline = pipeline(
+                PIPELINE_TASK,
+                model=MODEL_ID,
+                trust_remote_code=True,
+                torch_dtype=torch.bfloat16,
+            )
+            log("Pipeline loaded.")
+        except Exception as exc:
+            log(f"Pipeline load failed ({exc}), falling back to tokenizer+model.")
+    if active_pipeline is None:
+        log(f"Loading tokenizer for {MODEL_ID} ...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_ID, trust_remote_code=True,
+        )
+        log("Tokenizer loaded. Loading model (this downloads ~4 GB on first run) ...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True,
+        )
+        log(f"Model loaded. Device: {model.device}")
+        if tokenizer.eos_token_id is not None:
+            eos_ids.append(tokenizer.eos_token_id)
+        extra = tokenizer.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
+        for tid in extra:
+            if isinstance(tid, int) and tid not in eos_ids:
+                eos_ids.append(tid)
+        log(f"EOS token ids: {eos_ids}")
+    env = BioExperimentEnvironment()
+    obs = env.reset()
+    log("\n" + "=" * 70)
+    log(f"TASK: {obs.task.problem_statement}")
+    log(f"Conditions: {obs.task.conditions}")
+    log(f"Budget: ${obs.task.budget_limit:,.0f} | Time: {obs.task.time_limit_days:.0f} days")
+    log("=" * 70)
+    cumulative_reward = 0.0
+    for step in range(MAX_EPISODE_STEPS):
+        user_msg = build_observation_prompt(obs)
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_msg},
+        ]
+        if tokenizer is None:
+            # Pipeline path usually ignores chat templates.
+            prompt = f"{SYSTEM_PROMPT}\n\n{user_msg}"
+        else:
+            try:
+                prompt = tokenizer.apply_chat_template(
+                    messages,
+                    tokenize=False,
+                    add_generation_prompt=True,
+                    enable_thinking=False,
+                )
+            except TypeError:
+                prompt = tokenizer.apply_chat_template(
+                    messages,
+                    tokenize=False,
+                    add_generation_prompt=True,
+                )
+        t0 = time.time()
+        if active_pipeline is not None:
+            response = run_with_pipeline(active_pipeline, prompt)
+            if not response:
+                response = format_observation(obs)
+        else:
+            assert tokenizer is not None and model is not None
+            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+            n_input = inputs["input_ids"].shape[1]
+            with torch.no_grad():
+                output_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=200,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.8,
+                    top_k=20,
+                    repetition_penalty=1.3,
+                    eos_token_id=eos_ids if eos_ids else None,
+                )
+            new_tokens = output_ids[0][n_input:]
+            response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+        gen_time = time.time() - t0
+        action = parse_action(response)
+        used_fallback = False
+        if action is None:
+            log(f"\n  [!] Parse failed, using fallback. Raw: {response[:150]}")
+            action = fallback_action(step)
+            used_fallback = True
+        tag = " [FALLBACK]" if used_fallback else ""
+        log(f"\nStep {step + 1}: {action.action_type.value}{tag}  ({gen_time:.1f}s)")
+        if action.justification:
+            log(f"  Rationale: {action.justification}")
+        obs = env.step(action)
+        if obs.latest_output:
+            lo = obs.latest_output
+            status = "OK" if lo.success else "FAIL"
+            log(f"  [{status}] {lo.summary}")
+            if lo.warnings:
+                log(f"  Warnings: {lo.warnings}")
+        step_reward = obs.reward
+        cumulative_reward += step_reward
+        log(f"  Reward: {step_reward:+.3f}  (cum: {cumulative_reward:+.3f})")
+        log(f"  Budget: ${obs.resource_usage.budget_remaining:,.0f} | Time: {obs.resource_usage.time_remaining_days:.0f}d")
+        if obs.rule_violations:
+            log(f"  Violations: {obs.rule_violations}")
+        if obs.done:
+            break
+    log(f"\n{'=' * 70}")
+    log("EPISODE COMPLETE" if obs.done else f"MAX STEPS ({MAX_EPISODE_STEPS})")
+    log(f"  Steps: {obs.step_index}")
+    log(f"  Total reward: {cumulative_reward:+.3f}")
+    log(f"  Budget used: ${obs.resource_usage.budget_used:,.0f}")
+    log(f"  Time used: {obs.resource_usage.time_used_days:.0f} days")
+    if obs.conclusions:
+        log("  Conclusions:")
+        for c in obs.conclusions:
+            log(f"    [{c.claim_type}, conf={c.confidence:.2f}] {c.claim}")
+    log("=" * 70)
+if __name__ == "__main__":
+    main()

server/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .hackathon_environment import BioExperimentEnvironment
2	+
3	+ __all__ = ["BioExperimentEnvironment"]

server/app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""FastAPI application for the Bio-Experiment Planning Environment.
+Endpoints:
+    - POST /reset:  Reset the environment
+    - POST /step:   Execute an action
+    - GET  /state:  Get current environment state
+    - GET  /schema: Get action/observation schemas
+    - WS   /ws:     WebSocket endpoint for persistent sessions
+"""
+try:
+    from openenv.core.env_server.http_server import create_app
+except Exception as e:  # pragma: no cover
+    raise ImportError(
+        "openenv is required for the web interface. "
+        "Install dependencies with 'uv sync'"
+    ) from e
+from models import ExperimentAction, ExperimentObservation
+from .hackathon_environment import BioExperimentEnvironment
+app = create_app(
+    BioExperimentEnvironment,
+    ExperimentAction,
+    ExperimentObservation,
+    env_name="bio_experiment",
+    max_concurrent_envs=1,
+)
+def main(host: str = "0.0.0.0", port: int = 8000):
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=8000)
+    args = parser.parse_args()
+    if args.host == "0.0.0.0" and args.port == 8000:
+        main()
+    else:
+        main(host=args.host, port=args.port)

server/hackathon_environment.py ADDED Viewed

	@@ -0,0 +1,239 @@

+"""Bio-Experiment Planning Environment.
+Implements the OpenEnv ``Environment`` interface as a POMDP where the
+agent proposes one structured experiment / analysis step at a time and
+receives simulated intermediate outputs from a latent biological world.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from openenv.core.env_server.types import State
+from models import (
+    ActionType,
+    ConclusionClaim,
+    ExperimentAction,
+    ExperimentObservation,
+    IntermediateOutput,
+    PipelineStepRecord,
+    ResourceUsage,
+    TaskSpec,
+)
+from server.rules.engine import RuleEngine
+from server.rewards.reward import RewardBreakdown, RewardComputer
+from server.simulator.latent_state import FullLatentState
+from server.simulator.noise import NoiseModel
+from server.simulator.transition import ACTION_COSTS, TransitionEngine
+from server.tasks.generator import TaskGenerator
+MAX_STEPS = 30
+class BioExperimentEnvironment(Environment):
+    """POMDP environment for iterative biological experiment planning.
+    The agent observes ``ExperimentObservation`` (partial view) while the
+    environment maintains a ``FullLatentState`` (hidden ground truth).
+    """
+    SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    def __init__(
+        self,
+        scenario_name: Optional[str] = None,
+        *,
+        domain_randomise: bool = True,
+    ) -> None:
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._latent: Optional[FullLatentState] = None
+        self._task: Optional[TaskSpec] = None
+        self._scenario_name = scenario_name
+        self._noise = NoiseModel()
+        self._engine = TransitionEngine(self._noise)
+        self._rules = RuleEngine()
+        self._rewards = RewardComputer()
+        self._task_gen = TaskGenerator(domain_randomise=domain_randomise)
+        self._history: List[PipelineStepRecord] = []
+        self._outputs: List[IntermediateOutput] = []
+        self._conclusions: List[ConclusionClaim] = []
+        self._subagent_outputs: List[Dict[str, Any]] = []
+        self._discovered_markers: List[str] = []
+        self._candidate_mechanisms: List[str] = []
+        self._cumulative_reward: float = 0.0
+    # ── Environment interface ───────────────────────────────────────────
+    def reset(self) -> ExperimentObservation:
+        seed = hash(uuid4()) % (2**31)
+        self._noise.reseed(seed)
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._task, self._latent = self._task_gen.generate(
+            seed=seed,
+            scenario_name=self._scenario_name,
+        )
+        self._latent.rng_seed = seed
+        self._history.clear()
+        self._outputs.clear()
+        self._conclusions.clear()
+        self._subagent_outputs.clear()
+        self._discovered_markers.clear()
+        self._candidate_mechanisms.clear()
+        self._cumulative_reward = 0.0
+        return self._build_observation(reward=0.0, done=False)
+    def step(  # type: ignore[override]
+        self, action: ExperimentAction
+    ) -> ExperimentObservation:
+        assert self._latent is not None, "Call reset() before step()"
+        assert self._task is not None
+        self._state.step_count += 1
+        prev_state = self._latent.model_copy(deep=True)
+        violations = self._rules.check(action, self._latent)
+        hard_v = self._rules.hard_violations(violations)
+        soft_v = self._rules.soft_violations(violations)
+        result = self._engine.step(
+            self._latent,
+            action,
+            hard_violations=hard_v,
+            soft_violations=soft_v,
+        )
+        self._latent = result.next_state
+        step_rb = self._rewards.step_reward(
+            action, prev_state, self._latent, result.output, hard_v, soft_v,
+        )
+        cost_budget, cost_time = ACTION_COSTS.get(action.action_type, (0, 0))
+        self._history.append(PipelineStepRecord(
+            step_index=self._state.step_count,
+            action_type=action.action_type,
+            method=action.method,
+            parameters=action.parameters,
+            output_summary=result.output.summary,
+            output_type=result.output.output_type,
+            success=result.output.success,
+            quality_score=result.output.quality_score,
+            resource_cost=cost_budget,
+            time_cost_days=cost_time,
+        ))
+        self._outputs.append(result.output)
+        self._update_discoveries(action, result.output)
+        if action.action_type == ActionType.SYNTHESIZE_CONCLUSION:
+            raw_claims = action.parameters.get("claims", [])
+            for c in raw_claims:
+                if isinstance(c, dict):
+                    self._conclusions.append(ConclusionClaim(**c))
+        done = result.done or self._state.step_count >= MAX_STEPS
+        terminal_rb = RewardBreakdown()
+        if done:
+            terminal_rb = self._rewards.terminal_reward(
+                self._latent, self._conclusions, self._task.success_criteria,
+            )
+        total_reward = step_rb.total + terminal_rb.total
+        self._cumulative_reward += total_reward
+        breakdown = step_rb.to_dict()
+        breakdown.update({f"term_{k}": v for k, v in terminal_rb.to_dict().items()})
+        return self._build_observation(
+            reward=total_reward,
+            done=done,
+            latest_output=result.output,
+            rule_violations=hard_v + soft_v,
+            reward_breakdown=breakdown,
+        )
+    @property
+    def state(self) -> State:
+        return self._state
+    def set_scenario(self, scenario_name: Optional[str]) -> None:
+        """Set the scenario used on the next reset."""
+        self._scenario_name = scenario_name
+    # ── internal helpers ────────────────────────────────────────────────
+    def _build_observation(
+        self,
+        *,
+        reward: float,
+        done: bool,
+        latest_output: Optional[IntermediateOutput] = None,
+        rule_violations: Optional[List[str]] = None,
+        reward_breakdown: Optional[Dict[str, float]] = None,
+    ) -> ExperimentObservation:
+        assert self._task is not None
+        assert self._latent is not None
+        res = self._latent.resources
+        return ExperimentObservation(
+            task=self._task,
+            step_index=self._state.step_count,
+            pipeline_history=list(self._history),
+            available_assays=list(self._task.available_assays),
+            available_tools=list(self._task.available_tools),
+            resource_usage=ResourceUsage(
+                budget_used=res.budget_used,
+                budget_remaining=res.budget_remaining,
+                time_used_days=res.time_used_days,
+                time_remaining_days=res.time_remaining_days,
+                samples_consumed=res.samples_consumed,
+                compute_hours_used=res.compute_hours_used,
+            ),
+            latest_output=latest_output,
+            all_outputs=list(self._outputs),
+            discovered_markers=list(self._discovered_markers),
+            candidate_mechanisms=list(self._candidate_mechanisms),
+            uncertainty_summary=self._compute_uncertainty_summary(),
+            subagent_outputs=list(self._subagent_outputs),
+            conclusions=list(self._conclusions),
+            rule_violations=rule_violations or [],
+            step_reward_breakdown=reward_breakdown or {},
+            done=done,
+            reward=reward,
+            metadata={
+                "episode_id": self._state.episode_id,
+                "step": self._state.step_count,
+                "cumulative_reward": self._cumulative_reward,
+            },
+        )
+    def _compute_uncertainty_summary(self) -> Dict[str, float]:
+        if not self._outputs:
+            return {}
+        recent = self._outputs[-5:]
+        avg_unc = sum(o.uncertainty for o in recent) / len(recent)
+        avg_qual = sum(o.quality_score for o in recent) / len(recent)
+        return {"avg_uncertainty": avg_unc, "avg_quality": avg_qual}
+    def _update_discoveries(
+        self, action: ExperimentAction, output: IntermediateOutput
+    ) -> None:
+        if action.action_type == ActionType.MARKER_SELECTION:
+            markers = output.data.get("markers", [])
+            self._discovered_markers.extend(markers)
+        if action.action_type == ActionType.REGULATORY_NETWORK_INFERENCE:
+            regs = output.data.get("top_regulators", [])
+            self._candidate_mechanisms.extend(regs)
+        if action.action_type == ActionType.PATHWAY_ENRICHMENT:
+            pathways = output.data.get("top_pathways", [])
+            self._candidate_mechanisms.extend(
+                [p["pathway"] for p in pathways if isinstance(p, dict)]
+            )

server/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openenv[core]>=0.2.0
+fastapi>=0.115.0
+uvicorn>=0.24.0

server/rewards/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .reward import RewardBreakdown, RewardComputer
2	+
3	+ __all__ = ["RewardBreakdown", "RewardComputer"]

server/rewards/reward.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""Decomposable reward function for the bio-experiment planning POMDP.
+Reward components
+─────────────────
+  r_validity      — biological validity of the chosen action
+  r_ordering      — correct ordering of experiment steps
+  r_info_gain     — information gain from the step's output
+  r_efficiency    — resource efficiency (budget & time normalised)
+  r_novelty       — bonus for non-redundant, non-trivial actions
+  r_penalty       — penalties for violations, redundancy, waste
+  r_terminal      — terminal quality & calibration against hidden truth
+Potential-based shaping
+  φ(s)            — progress potential used for dense shaping signal
+The final step reward is:
+  R_t = r_validity + r_ordering + r_info_gain + r_efficiency
+        + r_novelty + r_penalty + γ[φ(s_{t+1}) − φ(s_t)]
+The terminal reward adds:
+  R_T += r_terminal
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from models import (
+    ActionType,
+    ConclusionClaim,
+    ExperimentAction,
+    IntermediateOutput,
+    META_ACTIONS,
+    WET_LAB_ACTIONS,
+)
+from server.simulator.latent_state import FullLatentState
+@dataclass
+class RewardBreakdown:
+    validity: float = 0.0
+    ordering: float = 0.0
+    info_gain: float = 0.0
+    efficiency: float = 0.0
+    novelty: float = 0.0
+    penalty: float = 0.0
+    shaping: float = 0.0
+    terminal: float = 0.0
+    components: Dict[str, float] = field(default_factory=dict)
+    @property
+    def total(self) -> float:
+        return (
+            self.validity
+            + self.ordering
+            + self.info_gain
+            + self.efficiency
+            + self.novelty
+            + self.penalty
+            + self.shaping
+            + self.terminal
+        )
+    def to_dict(self) -> Dict[str, float]:
+        d = {
+            "validity": self.validity,
+            "ordering": self.ordering,
+            "info_gain": self.info_gain,
+            "efficiency": self.efficiency,
+            "novelty": self.novelty,
+            "penalty": self.penalty,
+            "shaping": self.shaping,
+            "terminal": self.terminal,
+            "total": self.total,
+        }
+        d.update(self.components)
+        return d
+class RewardComputer:
+    """Computes step-wise and terminal rewards.
+    Parameters
+    ----------
+    gamma : float
+        Discount factor for potential-based shaping (default 0.99).
+    efficiency_weight : float
+        Relative importance of resource efficiency.
+    """
+    def __init__(
+        self,
+        gamma: float = 0.99,
+        efficiency_weight: float = 0.3,
+        info_gain_weight: float = 0.4,
+        validity_weight: float = 0.3,
+    ):
+        self.gamma = gamma
+        self.w_eff = efficiency_weight
+        self.w_ig = info_gain_weight
+        self.w_val = validity_weight
+    # ── step reward ─────────────────────────────────────────────────────
+    def step_reward(
+        self,
+        action: ExperimentAction,
+        prev_state: FullLatentState,
+        next_state: FullLatentState,
+        output: IntermediateOutput,
+        hard_violations: List[str],
+        soft_violations: List[str],
+    ) -> RewardBreakdown:
+        rb = RewardBreakdown()
+        # validity
+        if hard_violations:
+            rb.validity = -1.0
+            rb.penalty = -0.5 * len(hard_violations)
+            rb.components["hard_violations"] = len(hard_violations)
+            return rb
+        rb.validity = self.w_val * (1.0 if output.success else 0.0)
+        # ordering bonus: +0.2 if the step was a natural next step
+        rb.ordering = 0.2 * self._ordering_score(action, prev_state)
+        # information gain proxy: quality × (1 - uncertainty)
+        rb.info_gain = self.w_ig * output.quality_score * (1.0 - output.uncertainty)
+        # efficiency: normalised cost relative to budget
+        budget_frac = (
+            (next_state.resources.budget_used - prev_state.resources.budget_used)
+            / max(next_state.resources.budget_total, 1)
+        )
+        rb.efficiency = self.w_eff * max(0.0, 1.0 - 5.0 * budget_frac)
+        # novelty: small bonus for non-redundant steps
+        if not soft_violations:
+            rb.novelty = 0.1
+        # penalties
+        rb.penalty = -0.15 * len(soft_violations)
+        # potential-based shaping
+        phi_prev = self._potential(prev_state)
+        phi_next = self._potential(next_state)
+        rb.shaping = self.gamma * phi_next - phi_prev
+        return rb
+    # ── terminal reward ──────────────────────────────────��──────────────
+    def terminal_reward(
+        self,
+        state: FullLatentState,
+        conclusions: List[ConclusionClaim],
+        task_success_criteria: List[str],
+    ) -> RewardBreakdown:
+        rb = RewardBreakdown()
+        # pipeline completeness (0-1)
+        completeness = self._completeness(state)
+        rb.components["completeness"] = completeness
+        # calibration: how well conclusions align with hidden ground truth
+        calibration = self._calibration(state, conclusions)
+        rb.components["calibration"] = calibration
+        # efficiency bonus at terminal
+        budget_eff = state.resources.budget_remaining / max(
+            state.resources.budget_total, 1
+        )
+        time_eff = state.resources.time_remaining_days / max(
+            state.resources.time_limit_days, 1
+        )
+        rb.components["budget_efficiency"] = budget_eff
+        rb.components["time_efficiency"] = time_eff
+        # over-confidence penalty
+        overconf = self._overconfidence_penalty(state, conclusions)
+        rb.components["overconfidence_penalty"] = overconf
+        rb.terminal = (
+            3.0 * completeness
+            + 4.0 * calibration
+            + 1.0 * (budget_eff + time_eff) / 2.0
+            + overconf
+        )
+        return rb
+    # ── helpers ─────────────────────────────────────────────────────────
+    def _ordering_score(
+        self, action: ExperimentAction, s: FullLatentState
+    ) -> float:
+        """Heuristic: 1.0 if this step naturally follows the current progress."""
+        at = action.action_type
+        p = s.progress
+        NATURAL_NEXT = {
+            ActionType.COLLECT_SAMPLE: not p.samples_collected,
+            ActionType.PREPARE_LIBRARY: p.samples_collected and not p.library_prepared,
+            ActionType.SEQUENCE_CELLS: p.library_prepared and not p.cells_sequenced,
+            ActionType.RUN_QC: p.cells_sequenced and not p.qc_performed,
+            ActionType.FILTER_DATA: p.qc_performed and not p.data_filtered,
+            ActionType.NORMALIZE_DATA: p.data_filtered and not p.data_normalized,
+            ActionType.CLUSTER_CELLS: p.data_normalized and not p.cells_clustered,
+            ActionType.DIFFERENTIAL_EXPRESSION: p.data_normalized and not p.de_performed,
+            ActionType.PATHWAY_ENRICHMENT: p.de_performed and not p.pathways_analyzed,
+            ActionType.MARKER_SELECTION: p.de_performed and not p.markers_discovered,
+            ActionType.VALIDATE_MARKER: p.markers_discovered and not p.markers_validated,
+            ActionType.SYNTHESIZE_CONCLUSION: (
+                p.de_performed or p.cells_clustered
+            ) and not p.conclusion_reached,
+        }
+        return 1.0 if NATURAL_NEXT.get(at, False) else 0.3
+    def _potential(self, s: FullLatentState) -> float:
+        """Progress potential φ(s) — counts completed milestones."""
+        p = s.progress
+        milestones = [
+            p.samples_collected,
+            p.library_prepared,
+            p.cells_sequenced,
+            p.qc_performed,
+            p.data_filtered,
+            p.data_normalized,
+            p.cells_clustered,
+            p.de_performed,
+            p.pathways_analyzed,
+            p.markers_discovered,
+            p.markers_validated,
+            p.conclusion_reached,
+        ]
+        return sum(milestones) / len(milestones)
+    def _completeness(self, s: FullLatentState) -> float:
+        p = s.progress
+        core = [
+            p.samples_collected,
+            p.cells_sequenced,
+            p.qc_performed,
+            p.data_filtered,
+            p.data_normalized,
+            p.de_performed or p.cells_clustered,
+            p.conclusion_reached,
+        ]
+        return sum(core) / len(core)
+    def _calibration(
+        self, s: FullLatentState, conclusions: List[ConclusionClaim]
+    ) -> float:
+        if not conclusions:
+            return 0.0
+        true_mechanisms = set(s.biology.causal_mechanisms)
+        true_markers = set(s.biology.true_markers)
+        score = 0.0
+        n = len(conclusions)
+        for c in conclusions:
+            claim_lower = c.claim.lower()
+            match = any(m.lower() in claim_lower for m in true_mechanisms)
+            marker_match = any(m.lower() in claim_lower for m in true_markers)
+            if match or marker_match:
+                score += 1.0
+            else:
+                score -= 0.3
+        return max(0.0, min(1.0, score / max(n, 1)))
+    def _overconfidence_penalty(
+        self, s: FullLatentState, conclusions: List[ConclusionClaim]
+    ) -> float:
+        """Penalise high-confidence claims that disagree with ground truth."""
+        penalty = 0.0
+        true_set = set(
+            m.lower() for m in s.biology.causal_mechanisms + s.biology.true_markers
+        )
+        for c in conclusions:
+            is_correct = any(t in c.claim.lower() for t in true_set)
+            if c.confidence > 0.8 and not is_correct:
+                penalty -= 0.5 * c.confidence
+        return penalty

server/rules/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .engine import RuleEngine, RuleViolation
2	+
3	+ __all__ = ["RuleEngine", "RuleViolation"]

server/rules/engine.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""Biological rule engine — hard and soft constraint checking.
+Hard constraints block action execution entirely.
+Soft constraints allow execution but degrade output quality and incur penalties.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from enum import Enum
+from typing import List
+from models import ActionType, ExperimentAction
+from server.simulator.latent_state import FullLatentState
+class Severity(str, Enum):
+    HARD = "hard"
+    SOFT = "soft"
+@dataclass
+class RuleViolation:
+    rule_id: str
+    severity: Severity
+    message: str
+class RuleEngine:
+    """Evaluates biological and resource constraints against the current
+    latent state before each action is applied.
+    """
+    def check(
+        self, action: ExperimentAction, state: FullLatentState
+    ) -> List[RuleViolation]:
+        violations: List[RuleViolation] = []
+        violations.extend(self._check_prerequisites(action, state))
+        violations.extend(self._check_resource_constraints(action, state))
+        violations.extend(self._check_redundancy(action, state))
+        violations.extend(self._check_causal_validity(action, state))
+        return violations
+    def hard_violations(self, violations: List[RuleViolation]) -> List[str]:
+        return [v.message for v in violations if v.severity == Severity.HARD]
+    def soft_violations(self, violations: List[RuleViolation]) -> List[str]:
+        return [v.message for v in violations if v.severity == Severity.SOFT]
+    # ── prerequisite rules ──────────────────────────────────────────────
+    def _check_prerequisites(
+        self, action: ExperimentAction, s: FullLatentState
+    ) -> List[RuleViolation]:
+        vs: List[RuleViolation] = []
+        at = action.action_type
+        p = s.progress
+        REQUIRES = {
+            ActionType.PREPARE_LIBRARY: [
+                ("samples_collected", "Cannot prepare library without collected samples"),
+            ],
+            ActionType.SEQUENCE_CELLS: [
+                ("library_prepared", "Cannot sequence without library preparation"),
+            ],
+            ActionType.RUN_QC: [
+                ("cells_sequenced", "Cannot run QC before sequencing"),
+            ],
+            ActionType.FILTER_DATA: [
+                ("qc_performed", "Cannot filter data before QC"),
+            ],
+            ActionType.NORMALIZE_DATA: [
+                ("data_filtered", "Cannot normalise before filtering"),
+            ],
+            ActionType.INTEGRATE_BATCHES: [
+                ("data_normalized", "Cannot integrate batches before normalisation"),
+            ],
+            ActionType.CLUSTER_CELLS: [
+                ("data_normalized", "Cannot cluster before normalisation"),
+            ],
+            ActionType.DIFFERENTIAL_EXPRESSION: [
+                ("data_normalized", "Cannot run DE before normalisation"),
+            ],
+            ActionType.TRAJECTORY_ANALYSIS: [
+                ("data_normalized", "Cannot infer trajectories before normalisation"),
+            ],
+            ActionType.PATHWAY_ENRICHMENT: [
+                ("de_performed", "Cannot run pathway enrichment without DE results"),
+            ],
+            ActionType.REGULATORY_NETWORK_INFERENCE: [
+                ("data_normalized", "Cannot infer networks before normalisation"),
+            ],
+            ActionType.MARKER_SELECTION: [
+                ("de_performed", "Cannot select markers without DE results"),
+            ],
+            ActionType.VALIDATE_MARKER: [
+                ("markers_discovered", "Cannot validate markers before discovery"),
+            ],
+            ActionType.PERTURB_GENE: [
+                ("samples_collected", "Cannot perturb without samples"),
+            ],
+            ActionType.PERTURB_COMPOUND: [
+                ("samples_collected", "Cannot perturb without samples"),
+            ],
+            ActionType.CULTURE_CELLS: [
+                ("samples_collected", "Cannot culture without samples"),
+            ],
+        }
+        for flag, msg in REQUIRES.get(at, []):
+            if not getattr(p, flag, False):
+                vs.append(RuleViolation(
+                    rule_id=f"prereq_{at.value}_{flag}",
+                    severity=Severity.HARD,
+                    message=msg,
+                ))
+        return vs
+    # ── resource constraints ────────────────────────────────────────────
+    def _check_resource_constraints(
+        self, action: ExperimentAction, s: FullLatentState
+    ) -> List[RuleViolation]:
+        vs: List[RuleViolation] = []
+        if s.resources.budget_exhausted:
+            vs.append(RuleViolation(
+                rule_id="budget_exhausted",
+                severity=Severity.HARD,
+                message="Budget exhausted — no further actions possible",
+            ))
+        if s.resources.time_exhausted:
+            vs.append(RuleViolation(
+                rule_id="time_exhausted",
+                severity=Severity.HARD,
+                message="Time limit reached — no further actions possible",
+            ))
+        remaining = s.resources.budget_remaining
+        from server.simulator.transition import ACTION_COSTS
+        cost, _ = ACTION_COSTS.get(action.action_type, (0, 0))
+        if cost > remaining and remaining > 0:
+            vs.append(RuleViolation(
+                rule_id="budget_insufficient",
+                severity=Severity.SOFT,
+                message=f"Action costs ${cost:,.0f} but only ${remaining:,.0f} remains",
+            ))
+        return vs
+    # ── redundancy checks ───────────────────────────────────────────────
+    def _check_redundancy(
+        self, action: ExperimentAction, s: FullLatentState
+    ) -> List[RuleViolation]:
+        vs: List[RuleViolation] = []
+        at = action.action_type
+        p = s.progress
+        REDUNDANT = {
+            ActionType.COLLECT_SAMPLE: "samples_collected",
+            ActionType.PREPARE_LIBRARY: "library_prepared",
+            ActionType.SEQUENCE_CELLS: "cells_sequenced",
+            ActionType.RUN_QC: "qc_performed",
+            ActionType.FILTER_DATA: "data_filtered",
+            ActionType.NORMALIZE_DATA: "data_normalized",
+        }
+        flag = REDUNDANT.get(at)
+        if flag and getattr(p, flag, False):
+            vs.append(RuleViolation(
+                rule_id=f"redundant_{at.value}",
+                severity=Severity.SOFT,
+                message=f"Step '{at.value}' already completed — redundant action",
+            ))
+        return vs
+    # ── causal validity ─────────────────────────────────────────────────
+    def _check_causal_validity(
+        self, action: ExperimentAction, s: FullLatentState
+    ) -> List[RuleViolation]:
+        vs: List[RuleViolation] = []
+        if action.action_type == ActionType.SYNTHESIZE_CONCLUSION:
+            if not s.progress.de_performed and not s.progress.cells_clustered:
+                vs.append(RuleViolation(
+                    rule_id="premature_conclusion",
+                    severity=Severity.SOFT,
+                    message="Synthesising conclusion without substantive analysis",
+                ))
+            claims = action.parameters.get("claims", [])
+            for claim in claims:
+                if isinstance(claim, dict) and claim.get("claim_type") == "causal":
+                    if not s.progress.markers_validated and not s.progress.networks_inferred:
+                        vs.append(RuleViolation(
+                            rule_id="unsupported_causal_claim",
+                            severity=Severity.SOFT,
+                            message="Causal claim without validation or network evidence",
+                        ))
+                        break
+        if action.action_type == ActionType.PATHWAY_ENRICHMENT:
+            if not s.progress.de_performed:
+                vs.append(RuleViolation(
+                    rule_id="pathway_without_de",
+                    severity=Severity.SOFT,
+                    message="Pathway enrichment without DE may yield unreliable results",
+                ))
+        return vs

server/simulator/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from .latent_state import (
+    CellPopulation,
+    ExperimentProgress,
+    FullLatentState,
+    GeneProgram,
+    LatentBiologicalState,
+    ResourceState,
+    TechnicalState,
+)
+from .noise import NoiseModel
+from .output_generator import OutputGenerator
+from .transition import TransitionEngine
+__all__ = [
+    "CellPopulation",
+    "ExperimentProgress",
+    "FullLatentState",
+    "GeneProgram",
+    "LatentBiologicalState",
+    "NoiseModel",
+    "OutputGenerator",
+    "ResourceState",
+    "TechnicalState",
+    "TransitionEngine",
+]

server/simulator/latent_state.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""Latent biological and technical state — hidden from the agent."""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+class CellPopulation(BaseModel):
+    """Ground-truth cell sub-population in the simulated tissue."""
+    name: str
+    proportion: float = Field(ge=0.0, le=1.0)
+    marker_genes: List[str] = Field(default_factory=list)
+    state: str = "quiescent"
+    condition_response: Dict[str, float] = Field(default_factory=dict)
+class GeneProgram(BaseModel):
+    """A latent gene-regulatory programme."""
+    name: str
+    genes: List[str] = Field(default_factory=list)
+    activity_level: float = Field(0.5, ge=0.0, le=1.0)
+    condition_dependent: bool = False
+    conditions_active: List[str] = Field(default_factory=list)
+class LatentBiologicalState(BaseModel):
+    """Hidden ground-truth biology the agent cannot directly observe."""
+    cell_populations: List[CellPopulation] = Field(default_factory=list)
+    true_de_genes: Dict[str, Dict[str, float]] = Field(
+        default_factory=dict,
+        description="comparison_key → {gene: log2FC}",
+    )
+    true_pathways: Dict[str, float] = Field(
+        default_factory=dict,
+        description="pathway → activity level",
+    )
+    gene_programs: List[GeneProgram] = Field(default_factory=list)
+    true_trajectory: Optional[Dict[str, Any]] = None
+    true_regulatory_network: Dict[str, List[str]] = Field(
+        default_factory=dict,
+        description="TF → target genes",
+    )
+    perturbation_effects: Dict[str, Dict[str, float]] = Field(
+        default_factory=dict,
+        description="perturbation → {gene: effect_size}",
+    )
+    confounders: Dict[str, float] = Field(default_factory=dict)
+    true_markers: List[str] = Field(default_factory=list)
+    causal_mechanisms: List[str] = Field(default_factory=list)
+    n_true_cells: int = 10_000
+class TechnicalState(BaseModel):
+    """Hidden technical parameters that shape experimental noise."""
+    batch_effects: Dict[str, float] = Field(default_factory=dict)
+    ambient_rna_fraction: float = 0.05
+    doublet_rate: float = 0.04
+    dropout_rate: float = 0.1
+    sample_quality: float = Field(0.9, ge=0.0, le=1.0)
+    library_complexity: float = Field(0.8, ge=0.0, le=1.0)
+    sequencing_depth_factor: float = 1.0
+    capture_efficiency: float = 0.6
+class ExperimentProgress(BaseModel):
+    """Flags tracking which experiment stages have been completed."""
+    samples_collected: bool = False
+    cohort_selected: bool = False
+    cells_cultured: bool = False
+    library_prepared: bool = False
+    perturbation_applied: bool = False
+    cells_sequenced: bool = False
+    qc_performed: bool = False
+    data_filtered: bool = False
+    data_normalized: bool = False
+    batches_integrated: bool = False
+    cells_clustered: bool = False
+    de_performed: bool = False
+    trajectories_inferred: bool = False
+    pathways_analyzed: bool = False
+    networks_inferred: bool = False
+    markers_discovered: bool = False
+    markers_validated: bool = False
+    conclusion_reached: bool = False
+    n_cells_after_filter: Optional[int] = None
+    n_clusters_found: Optional[int] = None
+    n_de_genes_found: Optional[int] = None
+    n_markers_found: Optional[int] = None
+class ResourceState(BaseModel):
+    """Full internal resource tracking (superset of agent-visible ResourceUsage)."""
+    budget_total: float = 100_000.0
+    budget_used: float = 0.0
+    time_limit_days: float = 180.0
+    time_used_days: float = 0.0
+    samples_available: int = 0
+    samples_consumed: int = 0
+    compute_hours_used: float = 0.0
+    sequencing_lanes_used: int = 0
+    reagent_kits_used: int = 0
+    @property
+    def budget_remaining(self) -> float:
+        return max(0.0, self.budget_total - self.budget_used)
+    @property
+    def time_remaining_days(self) -> float:
+        return max(0.0, self.time_limit_days - self.time_used_days)
+    @property
+    def budget_exhausted(self) -> bool:
+        return self.budget_remaining <= 0
+    @property
+    def time_exhausted(self) -> bool:
+        return self.time_remaining_days <= 0
+class FullLatentState(BaseModel):
+    """Complete hidden state of the simulated biological world."""
+    biology: LatentBiologicalState = Field(
+        default_factory=LatentBiologicalState
+    )
+    technical: TechnicalState = Field(default_factory=TechnicalState)
+    progress: ExperimentProgress = Field(default_factory=ExperimentProgress)
+    resources: ResourceState = Field(default_factory=ResourceState)
+    hidden_failure_conditions: List[str] = Field(default_factory=list)
+    mechanism_confidence: Dict[str, float] = Field(default_factory=dict)
+    discovered_de_genes: List[str] = Field(default_factory=list)
+    discovered_clusters: List[str] = Field(default_factory=list)
+    step_count: int = 0
+    rng_seed: int = 42

server/simulator/noise.py ADDED Viewed

	@@ -0,0 +1,124 @@

+"""Stochastic noise models for the biological simulator."""
+from __future__ import annotations
+from typing import Dict, List, Tuple
+import numpy as np
+class NoiseModel:
+    """Generates calibrated noise for simulated experimental outputs.
+    All randomness is funnelled through a single ``numpy.Generator``
+    so that episodes are reproducible given the same seed.
+    """
+    def __init__(self, seed: int = 42):
+        self.rng = np.random.default_rng(seed)
+    def reseed(self, seed: int) -> None:
+        self.rng = np.random.default_rng(seed)
+    # ── expression-level noise ──────────────────────────────────────────
+    def add_expression_noise(
+        self,
+        true_values: Dict[str, float],
+        noise_level: float,
+        dropout_rate: float,
+    ) -> Dict[str, float]:
+        noisy: Dict[str, float] = {}
+        for gene, value in true_values.items():
+            if self.rng.random() < dropout_rate:
+                noisy[gene] = 0.0
+            else:
+                sigma = noise_level * abs(value) + 0.1
+                noisy[gene] = float(value + self.rng.normal(0, sigma))
+        return noisy
+    # ── effect-size sampling ────────────────────────────────────────────
+    def sample_effect_sizes(
+        self,
+        true_effects: Dict[str, float],
+        sample_size: int,
+        noise_level: float,
+    ) -> Dict[str, float]:
+        se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
+        return {
+            gene: float(effect + self.rng.normal(0, se))
+            for gene, effect in true_effects.items()
+        }
+    def sample_p_values(
+        self,
+        true_effects: Dict[str, float],
+        sample_size: int,
+        noise_level: float,
+    ) -> Dict[str, float]:
+        """Simulate approximate p-values from z-statistics."""
+        from scipy import stats  # type: ignore[import-untyped]
+        p_values: Dict[str, float] = {}
+        se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
+        for gene, effect in true_effects.items():
+            z = abs(effect) / max(se, 1e-8)
+            p_values[gene] = float(2 * stats.norm.sf(z))
+        return p_values
+    # ── false discovery helpers ─────────────────────────────────────────
+    def generate_false_positives(
+        self, n_background_genes: int, fdr: float
+    ) -> List[str]:
+        n_fp = int(self.rng.binomial(n_background_genes, fdr))
+        return [f"FP_GENE_{i}" for i in range(n_fp)]
+    def generate_false_negatives(
+        self, true_genes: List[str], fnr: float
+    ) -> List[str]:
+        """Return the subset of *true_genes* that are missed."""
+        return [g for g in true_genes if self.rng.random() < fnr]
+    # ── quality helpers ─────────────────────────────────────────────────
+    def quality_degradation(
+        self, base_quality: float, factors: List[float]
+    ) -> float:
+        q = base_quality
+        for f in factors:
+            q *= f
+        return float(np.clip(q + self.rng.normal(0, 0.02), 0.0, 1.0))
+    def sample_qc_metric(
+        self, mean: float, std: float, clip_lo: float = 0.0, clip_hi: float = 1.0
+    ) -> float:
+        return float(np.clip(self.rng.normal(mean, std), clip_lo, clip_hi))
+    def sample_count(self, lam: float) -> int:
+        return int(self.rng.poisson(max(lam, 0)))
+    def coin_flip(self, p: float) -> bool:
+        return bool(self.rng.random() < p)
+    def sample_cluster_count(
+        self, n_true_populations: int, quality: float
+    ) -> int:
+        """Over- or under-clustering depending on preprocessing quality."""
+        delta = self.rng.integers(-2, 3)
+        noise_clusters = max(0, int(round((1.0 - quality) * 3)))
+        return max(1, n_true_populations + delta + noise_clusters)
+    def shuffle_ranking(
+        self, items: List[str], noise_level: float
+    ) -> List[str]:
+        """Permute a ranking with Gaussian noise on ordinals."""
+        n = len(items)
+        if n == 0:
+            return []
+        scores = np.arange(n, dtype=float) + self.rng.normal(
+            0, noise_level * n, size=n
+        )
+        order = np.argsort(scores)
+        return [items[int(i)] for i in order]

server/simulator/output_generator.py ADDED Viewed

	@@ -0,0 +1,495 @@

+"""Generate simulated intermediate outputs conditioned on latent state."""
+from __future__ import annotations
+from typing import Any, Dict, List
+from models import (
+    ActionType,
+    ExperimentAction,
+    IntermediateOutput,
+    OutputType,
+)
+from .latent_state import FullLatentState
+from .noise import NoiseModel
+class OutputGenerator:
+    """Creates structured ``IntermediateOutput`` objects conditioned on the
+    hidden latent state, the action taken, and a stochastic noise model.
+    """
+    def __init__(self, noise: NoiseModel):
+        self.noise = noise
+    def generate(
+        self,
+        action: ExperimentAction,
+        state: FullLatentState,
+        step_index: int,
+    ) -> IntermediateOutput:
+        handler = _HANDLERS.get(action.action_type, self._default)
+        return handler(self, action, state, step_index)
+    # ── wet-lab outputs ─────────────────────────────────────────────────
+    def _collect_sample(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        n_samples = action.parameters.get("n_samples", 6)
+        quality = self.noise.quality_degradation(
+            s.technical.sample_quality, [s.technical.capture_efficiency]
+        )
+        return IntermediateOutput(
+            output_type=OutputType.SAMPLE_COLLECTION_RESULT,
+            step_index=idx,
+            quality_score=quality,
+            summary=f"Collected {n_samples} samples (quality={quality:.2f})",
+            data={
+                "n_samples": n_samples,
+                "quality": quality,
+                "organism": "human",
+                "tissue": "blood",
+            },
+            artifacts_available=["raw_samples"],
+        )
+    def _select_cohort(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        criteria = action.parameters.get("criteria", {})
+        n_selected = action.parameters.get("n_selected", 4)
+        return IntermediateOutput(
+            output_type=OutputType.COHORT_RESULT,
+            step_index=idx,
+            summary=f"Selected cohort of {n_selected} samples with criteria {criteria}",
+            data={"n_selected": n_selected, "criteria": criteria},
+            artifacts_available=["cohort_manifest"],
+        )
+    def _prepare_library(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        complexity = self.noise.quality_degradation(
+            s.technical.library_complexity,
+            [s.technical.sample_quality],
+        )
+        return IntermediateOutput(
+            output_type=OutputType.LIBRARY_PREP_RESULT,
+            step_index=idx,
+            quality_score=complexity,
+            summary=f"Library prepared (complexity={complexity:.2f})",
+            data={
+                "library_complexity": complexity,
+                "method": action.method or "10x_chromium",
+            },
+            artifacts_available=["prepared_library"],
+        )
+    def _culture_cells(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        days = action.parameters.get("days", 7)
+        viability = self.noise.sample_qc_metric(0.92, 0.05, 0.5, 1.0)
+        return IntermediateOutput(
+            output_type=OutputType.CULTURE_RESULT,
+            step_index=idx,
+            quality_score=viability,
+            summary=f"Cultured for {days}d, viability={viability:.2f}",
+            data={"days": days, "viability": viability},
+            artifacts_available=["cultured_cells"],
+        )
+    def _perturb(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        target = action.parameters.get("target", "unknown")
+        efficiency = self.noise.sample_qc_metric(0.75, 0.15, 0.0, 1.0)
+        return IntermediateOutput(
+            output_type=OutputType.PERTURBATION_RESULT,
+            step_index=idx,
+            quality_score=efficiency,
+            summary=f"Perturbation of {target} (efficiency={efficiency:.2f})",
+            data={
+                "target": target,
+                "efficiency": efficiency,
+                "type": action.action_type.value,
+            },
+            artifacts_available=["perturbed_cells"],
+        )
+    def _sequence_cells(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        depth = s.technical.sequencing_depth_factor
+        n_cells = self.noise.sample_count(
+            s.biology.n_true_cells * s.technical.capture_efficiency
+        )
+        n_genes = self.noise.sample_count(18_000)
+        median_umi = self.noise.sample_count(int(3000 * depth))
+        quality = self.noise.quality_degradation(
+            s.technical.sample_quality,
+            [s.technical.library_complexity, s.technical.capture_efficiency],
+        )
+        return IntermediateOutput(
+            output_type=OutputType.SEQUENCING_RESULT,
+            step_index=idx,
+            quality_score=quality,
+            summary=(
+                f"Sequenced {n_cells} cells, {n_genes} genes detected, "
+                f"median UMI={median_umi}"
+            ),
+            data={
+                "n_cells": n_cells,
+                "n_genes": n_genes,
+                "median_umi": median_umi,
+                "sequencing_saturation": self.noise.sample_qc_metric(0.7, 0.1),
+            },
+            artifacts_available=["raw_count_matrix"],
+        )
+    # ── computational outputs ───────────────────────────────────────────
+    def _run_qc(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        doublet_frac = self.noise.sample_qc_metric(
+            s.technical.doublet_rate, 0.01, 0.0, 0.2
+        )
+        mito_frac = self.noise.sample_qc_metric(0.05, 0.02, 0.0, 0.3)
+        ambient_frac = self.noise.sample_qc_metric(
+            s.technical.ambient_rna_fraction, 0.01, 0.0, 0.2
+        )
+        warnings: List[str] = []
+        if doublet_frac > 0.08:
+            warnings.append(f"High doublet rate ({doublet_frac:.1%})")
+        if mito_frac > 0.1:
+            warnings.append(f"High mitochondrial fraction ({mito_frac:.1%})")
+        quality = 1.0 - (doublet_frac + mito_frac + ambient_frac)
+        return IntermediateOutput(
+            output_type=OutputType.QC_METRICS,
+            step_index=idx,
+            quality_score=max(0.0, quality),
+            summary="QC metrics computed",
+            data={
+                "doublet_fraction": doublet_frac,
+                "mitochondrial_fraction": mito_frac,
+                "ambient_rna_fraction": ambient_frac,
+                "median_genes_per_cell": self.noise.sample_count(2500),
+                "median_umi_per_cell": self.noise.sample_count(8000),
+            },
+            warnings=warnings,
+            artifacts_available=["qc_report"],
+        )
+    def _filter_data(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        retain_frac = self.noise.sample_qc_metric(0.85, 0.05, 0.5, 1.0)
+        n_before = s.biology.n_true_cells
+        n_after = max(100, int(n_before * retain_frac))
+        return IntermediateOutput(
+            output_type=OutputType.COUNT_MATRIX_SUMMARY,
+            step_index=idx,
+            quality_score=retain_frac,
+            summary=f"Filtered {n_before} → {n_after} cells ({retain_frac:.0%} retained)",
+            data={
+                "n_cells_before": n_before,
+                "n_cells_after": n_after,
+                "n_genes_retained": self.noise.sample_count(15_000),
+                "retain_fraction": retain_frac,
+            },
+            artifacts_available=["filtered_count_matrix"],
+        )
+    def _normalize_data(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        method = action.method or "log_normalize"
+        return IntermediateOutput(
+            output_type=OutputType.COUNT_MATRIX_SUMMARY,
+            step_index=idx,
+            summary=f"Normalized with {method}",
+            data={"method": method, "n_hvg": self.noise.sample_count(2000)},
+            artifacts_available=["normalized_matrix", "hvg_list"],
+        )
+    def _integrate_batches(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        method = action.method or "harmony"
+        residual = self.noise.sample_qc_metric(0.05, 0.03, 0.0, 0.3)
+        return IntermediateOutput(
+            output_type=OutputType.EMBEDDING_SUMMARY,
+            step_index=idx,
+            quality_score=1.0 - residual,
+            summary=f"Batch integration ({method}), residual batch effect={residual:.2f}",
+            data={
+                "method": method,
+                "residual_batch_effect": residual,
+                "n_batches": len(s.technical.batch_effects) or 1,
+            },
+            artifacts_available=["integrated_embedding"],
+        )
+    def _cluster_cells(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        n_true = len(s.biology.cell_populations) or 5
+        quality = self.noise.quality_degradation(0.8, [0.95])
+        n_clusters = self.noise.sample_cluster_count(n_true, quality)
+        cluster_names = [f"cluster_{i}" for i in range(n_clusters)]
+        sizes = self._random_partition(s.biology.n_true_cells, n_clusters)
+        return IntermediateOutput(
+            output_type=OutputType.CLUSTER_RESULT,
+            step_index=idx,
+            quality_score=quality,
+            summary=f"Found {n_clusters} clusters (ground-truth populations: {n_true})",
+            data={
+                "n_clusters": n_clusters,
+                "cluster_names": cluster_names,
+                "cluster_sizes": sizes,
+                "silhouette_score": self.noise.sample_qc_metric(0.35, 0.1, -1.0, 1.0),
+            },
+            uncertainty=abs(n_clusters - n_true) / max(n_true, 1),
+            artifacts_available=["cluster_assignments", "umap_embedding"],
+        )
+    def _differential_expression(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        comparison = action.parameters.get("comparison", "disease_vs_healthy")
+        true_effects = s.biology.true_de_genes.get(comparison, {})
+        n_cells = s.progress.n_cells_after_filter or s.biology.n_true_cells
+        noise_level = s.technical.dropout_rate + 0.1 * (1.0 - s.technical.sample_quality)
+        observed = self.noise.sample_effect_sizes(true_effects, n_cells, noise_level)
+        fp_genes = self.noise.generate_false_positives(5000, 0.002 + noise_level * 0.01)
+        for g in fp_genes:
+            observed[g] = float(self.noise.rng.normal(0, 0.3))
+        fn_genes = self.noise.generate_false_negatives(list(true_effects.keys()), 0.15)
+        for g in fn_genes:
+            observed.pop(g, None)
+        top_genes = sorted(observed.items(), key=lambda kv: abs(kv[1]), reverse=True)[:50]
+        return IntermediateOutput(
+            output_type=OutputType.DE_RESULT,
+            step_index=idx,
+            quality_score=self.noise.quality_degradation(0.8, [1.0 - noise_level]),
+            summary=f"DE analysis ({comparison}): {len(observed)} genes tested, {len(top_genes)} top hits",
+            data={
+                "comparison": comparison,
+                "n_tested": len(observed),
+                "top_genes": [
+                    {"gene": g, "log2FC": round(fc, 3)} for g, fc in top_genes
+                ],
+                "n_significant": sum(1 for _, fc in observed.items() if abs(fc) > 0.5),
+            },
+            uncertainty=noise_level,
+            artifacts_available=["de_table"],
+        )
+    def _trajectory_analysis(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        has_trajectory = s.biology.true_trajectory is not None
+        quality = self.noise.quality_degradation(0.7 if has_trajectory else 0.3, [0.9])
+        summary_data: Dict[str, Any] = {"method": action.method or "monocle3"}
+        if has_trajectory:
+            summary_data.update({
+                "n_lineages": s.biology.true_trajectory.get("n_lineages", 1),
+                "pseudotime_range": [0.0, 1.0],
+                "branching_detected": s.biology.true_trajectory.get("branching", False),
+            })
+        else:
+            summary_data["n_lineages"] = self.noise.sample_count(1) + 1
+            summary_data["pseudotime_range"] = [0.0, 1.0]
+            summary_data["branching_detected"] = self.noise.coin_flip(0.3)
+        return IntermediateOutput(
+            output_type=OutputType.TRAJECTORY_RESULT,
+            step_index=idx,
+            quality_score=quality,
+            summary="Trajectory / pseudotime analysis complete",
+            data=summary_data,
+            uncertainty=0.2 if has_trajectory else 0.6,
+            artifacts_available=["pseudotime_values", "lineage_graph"],
+        )
+    def _pathway_enrichment(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        true_pathways = s.biology.true_pathways
+        noise_level = 0.15
+        observed: Dict[str, float] = {}
+        for pw, activity in true_pathways.items():
+            observed[pw] = activity + float(self.noise.rng.normal(0, noise_level))
+        for i in range(self.noise.sample_count(2)):
+            observed[f"FP_PATHWAY_{i}"] = float(self.noise.rng.uniform(0.3, 0.6))
+        top = sorted(observed.items(), key=lambda kv: kv[1], reverse=True)[:15]
+        return IntermediateOutput(
+            output_type=OutputType.PATHWAY_RESULT,
+            step_index=idx,
+            quality_score=self.noise.quality_degradation(0.8, [0.95]),
+            summary=f"Pathway enrichment: {len(top)} significant pathways",
+            data={
+                "method": action.method or "GSEA",
+                "top_pathways": [
+                    {"pathway": p, "score": round(s, 3)} for p, s in top
+                ],
+            },
+            uncertainty=noise_level,
+            artifacts_available=["enrichment_table"],
+        )
+    def _regulatory_network(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        true_net = s.biology.true_regulatory_network
+        n_edges_true = sum(len(v) for v in true_net.values())
+        noise_edges = self.noise.sample_count(max(5, int(n_edges_true * 0.3)))
+        return IntermediateOutput(
+            output_type=OutputType.NETWORK_RESULT,
+            step_index=idx,
+            quality_score=self.noise.quality_degradation(0.6, [0.9]),
+            summary=f"Regulatory network inferred: {n_edges_true + noise_edges} edges",
+            data={
+                "method": action.method or "SCENIC",
+                "n_regulons": len(true_net) + self.noise.sample_count(3),
+                "n_edges": n_edges_true + noise_edges,
+                "top_regulators": list(true_net.keys())[:10],
+            },
+            uncertainty=0.35,
+            artifacts_available=["regulon_table", "grn_adjacency"],
+        )
+    def _marker_selection(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        true_markers = list(s.biology.true_markers)
+        noise_level = 0.2
+        observed_markers = [
+            m for m in true_markers if not self.noise.coin_flip(noise_level)
+        ]
+        fp = self.noise.generate_false_positives(200, 0.01)
+        observed_markers.extend(fp)
+        return IntermediateOutput(
+            output_type=OutputType.MARKER_RESULT,
+            step_index=idx,
+            quality_score=self.noise.quality_degradation(0.75, [0.9]),
+            summary=f"Selected {len(observed_markers)} candidate markers",
+            data={
+                "markers": observed_markers[:20],
+                "n_candidates": len(observed_markers),
+            },
+            uncertainty=noise_level,
+            artifacts_available=["marker_list"],
+        )
+    def _validate_marker(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        marker = action.parameters.get("marker", "unknown")
+        is_true = marker in s.biology.true_markers
+        validation_correct = not self.noise.coin_flip(0.1)
+        validated = is_true == validation_correct
+        return IntermediateOutput(
+            output_type=OutputType.VALIDATION_RESULT,
+            step_index=idx,
+            quality_score=0.9 if validation_correct else 0.4,
+            summary=f"Marker {marker}: {'validated' if validated else 'not validated'}",
+            data={
+                "marker": marker,
+                "validated": validated,
+                "assay": action.method or "qPCR",
+                "effect_size": self.noise.sample_qc_metric(
+                    1.5 if is_true else 0.2, 0.3, -0.5, 5.0
+                ),
+            },
+            artifacts_available=["validation_data"],
+        )
+    def _design_followup(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        return IntermediateOutput(
+            output_type=OutputType.FOLLOWUP_DESIGN,
+            step_index=idx,
+            summary="Follow-up experiment design proposed",
+            data={"proposal": action.parameters},
+            artifacts_available=["followup_proposal"],
+        )
+    def _subagent_review(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        return IntermediateOutput(
+            output_type=OutputType.SUBAGENT_REPORT,
+            step_index=idx,
+            summary=f"Subagent review ({action.invoked_subagent or 'general'})",
+            data={"subagent": action.invoked_subagent, "notes": "Review complete."},
+            artifacts_available=["subagent_report"],
+        )
+    def _synthesize_conclusion(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        return IntermediateOutput(
+            output_type=OutputType.CONCLUSION,
+            step_index=idx,
+            summary="Conclusion synthesised from pipeline evidence",
+            data={"claims": action.parameters.get("claims", [])},
+            artifacts_available=["conclusion_report"],
+        )
+    def _default(
+        self, action: ExperimentAction, s: FullLatentState, idx: int
+    ) -> IntermediateOutput:
+        return IntermediateOutput(
+            output_type=OutputType.FAILURE_REPORT,
+            step_index=idx,
+            success=False,
+            summary=f"Unhandled action type: {action.action_type}",
+            data={},
+        )
+    # ── helpers ─────────────────────────────────────────────────────────
+    def _random_partition(self, total: int, k: int) -> List[int]:
+        if k <= 0:
+            return []
+        fracs = self.noise.rng.dirichlet(alpha=[1.0] * k)
+        sizes = [max(1, int(total * f)) for f in fracs]
+        diff = total - sum(sizes)
+        sizes[0] += diff
+        return sizes
+_HANDLERS = {
+    ActionType.COLLECT_SAMPLE: OutputGenerator._collect_sample,
+    ActionType.SELECT_COHORT: OutputGenerator._select_cohort,
+    ActionType.PREPARE_LIBRARY: OutputGenerator._prepare_library,
+    ActionType.CULTURE_CELLS: OutputGenerator._culture_cells,
+    ActionType.PERTURB_GENE: OutputGenerator._perturb,
+    ActionType.PERTURB_COMPOUND: OutputGenerator._perturb,
+    ActionType.SEQUENCE_CELLS: OutputGenerator._sequence_cells,
+    ActionType.RUN_QC: OutputGenerator._run_qc,
+    ActionType.FILTER_DATA: OutputGenerator._filter_data,
+    ActionType.NORMALIZE_DATA: OutputGenerator._normalize_data,
+    ActionType.INTEGRATE_BATCHES: OutputGenerator._integrate_batches,
+    ActionType.CLUSTER_CELLS: OutputGenerator._cluster_cells,
+    ActionType.DIFFERENTIAL_EXPRESSION: OutputGenerator._differential_expression,
+    ActionType.TRAJECTORY_ANALYSIS: OutputGenerator._trajectory_analysis,
+    ActionType.PATHWAY_ENRICHMENT: OutputGenerator._pathway_enrichment,
+    ActionType.REGULATORY_NETWORK_INFERENCE: OutputGenerator._regulatory_network,
+    ActionType.MARKER_SELECTION: OutputGenerator._marker_selection,
+    ActionType.VALIDATE_MARKER: OutputGenerator._validate_marker,
+    ActionType.DESIGN_FOLLOWUP: OutputGenerator._design_followup,
+    ActionType.REQUEST_SUBAGENT_REVIEW: OutputGenerator._subagent_review,
+    ActionType.SYNTHESIZE_CONCLUSION: OutputGenerator._synthesize_conclusion,
+}

server/simulator/transition.py ADDED Viewed

	@@ -0,0 +1,216 @@

+"""Transition dynamics engine — the heart of the biological simulator.
+Orchestrates latent-state updates, output generation, resource accounting,
+and constraint propagation for every agent action.
+"""
+from __future__ import annotations
+from copy import deepcopy
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+from models import (
+    ActionType,
+    ExperimentAction,
+    IntermediateOutput,
+    OutputType,
+)
+from .latent_state import FullLatentState
+from .noise import NoiseModel
+from .output_generator import OutputGenerator
+ACTION_COSTS: Dict[ActionType, Tuple[float, float]] = {
+    ActionType.COLLECT_SAMPLE:               (5_000,  7.0),
+    ActionType.SELECT_COHORT:                (  500,  1.0),
+    ActionType.PREPARE_LIBRARY:              (8_000,  3.0),
+    ActionType.CULTURE_CELLS:                (3_000, 14.0),
+    ActionType.PERTURB_GENE:                 (2_000,  3.0),
+    ActionType.PERTURB_COMPOUND:             (1_000,  2.0),
+    ActionType.SEQUENCE_CELLS:               (15_000, 5.0),
+    ActionType.RUN_QC:                       (  100,  0.5),
+    ActionType.FILTER_DATA:                  (   50,  0.25),
+    ActionType.NORMALIZE_DATA:               (   50,  0.25),
+    ActionType.INTEGRATE_BATCHES:            (  100,  0.5),
+    ActionType.CLUSTER_CELLS:                (  100,  0.5),
+    ActionType.DIFFERENTIAL_EXPRESSION:      (  100,  0.5),
+    ActionType.TRAJECTORY_ANALYSIS:          (  200,  1.0),
+    ActionType.PATHWAY_ENRICHMENT:           (  100,  0.5),
+    ActionType.REGULATORY_NETWORK_INFERENCE: (  300,  1.0),
+    ActionType.MARKER_SELECTION:             (  100,  0.5),
+    ActionType.VALIDATE_MARKER:              (5_000, 14.0),
+    ActionType.DESIGN_FOLLOWUP:              (    0,  0.5),
+    ActionType.REQUEST_SUBAGENT_REVIEW:      (    0,  0.25),
+    ActionType.SYNTHESIZE_CONCLUSION:        (    0,  0.5),
+}
+@dataclass
+class TransitionResult:
+    """Bundle returned by the transition engine after one step."""
+    next_state: FullLatentState
+    output: IntermediateOutput
+    reward_components: Dict[str, float] = field(default_factory=dict)
+    hard_violations: List[str] = field(default_factory=list)
+    soft_violations: List[str] = field(default_factory=list)
+    done: bool = False
+class TransitionEngine:
+    """Applies one action to the latent state, producing the next state
+    and a simulated intermediate output.
+    The engine delegates output generation to ``OutputGenerator`` and
+    constraint checking to external rule engines (injected at call time).
+    """
+    def __init__(self, noise: NoiseModel):
+        self.noise = noise
+        self.output_gen = OutputGenerator(noise)
+    def step(
+        self,
+        state: FullLatentState,
+        action: ExperimentAction,
+        *,
+        hard_violations: Optional[List[str]] = None,
+        soft_violations: Optional[List[str]] = None,
+    ) -> TransitionResult:
+        s = deepcopy(state)
+        s.step_count += 1
+        step_idx = s.step_count
+        hard_v = hard_violations or []
+        soft_v = soft_violations or []
+        if hard_v:
+            output = IntermediateOutput(
+                output_type=OutputType.FAILURE_REPORT,
+                step_index=step_idx,
+                success=False,
+                summary=f"Action blocked: {'; '.join(hard_v)}",
+            )
+            return TransitionResult(
+                next_state=s,
+                output=output,
+                hard_violations=hard_v,
+                soft_violations=soft_v,
+            )
+        self._apply_resource_cost(s, action)
+        if s.resources.budget_exhausted or s.resources.time_exhausted:
+            output = IntermediateOutput(
+                output_type=OutputType.FAILURE_REPORT,
+                step_index=step_idx,
+                success=False,
+                summary="Resources exhausted",
+            )
+            return TransitionResult(
+                next_state=s, output=output, done=True,
+                hard_violations=["resources_exhausted"],
+            )
+        self._update_progress(s, action)
+        output = self.output_gen.generate(action, s, step_idx)
+        if soft_v:
+            output.quality_score *= 0.5
+            output.warnings.extend(soft_v)
+        self._propagate_artifacts(s, action, output)
+        done = action.action_type == ActionType.SYNTHESIZE_CONCLUSION
+        return TransitionResult(
+            next_state=s,
+            output=output,
+            soft_violations=soft_v,
+            done=done,
+        )
+    # ── internals ───────────────────────────────────────────────────────
+    def _apply_resource_cost(
+        self, s: FullLatentState, action: ExperimentAction
+    ) -> None:
+        budget_cost, time_cost = ACTION_COSTS.get(
+            action.action_type, (0.0, 0.0)
+        )
+        s.resources.budget_used += budget_cost
+        s.resources.time_used_days += time_cost
+        if action.action_type in {
+            ActionType.RUN_QC, ActionType.FILTER_DATA,
+            ActionType.NORMALIZE_DATA, ActionType.INTEGRATE_BATCHES,
+            ActionType.CLUSTER_CELLS, ActionType.DIFFERENTIAL_EXPRESSION,
+            ActionType.TRAJECTORY_ANALYSIS, ActionType.PATHWAY_ENRICHMENT,
+            ActionType.REGULATORY_NETWORK_INFERENCE, ActionType.MARKER_SELECTION,
+        }:
+            s.resources.compute_hours_used += time_cost * 8
+    def _update_progress(
+        self, s: FullLatentState, action: ExperimentAction
+    ) -> None:
+        at = action.action_type
+        p = s.progress
+        _MAP = {
+            ActionType.COLLECT_SAMPLE: "samples_collected",
+            ActionType.SELECT_COHORT: "cohort_selected",
+            ActionType.PREPARE_LIBRARY: "library_prepared",
+            ActionType.CULTURE_CELLS: "cells_cultured",
+            ActionType.PERTURB_GENE: "perturbation_applied",
+            ActionType.PERTURB_COMPOUND: "perturbation_applied",
+            ActionType.SEQUENCE_CELLS: "cells_sequenced",
+            ActionType.RUN_QC: "qc_performed",
+            ActionType.FILTER_DATA: "data_filtered",
+            ActionType.NORMALIZE_DATA: "data_normalized",
+            ActionType.INTEGRATE_BATCHES: "batches_integrated",
+            ActionType.CLUSTER_CELLS: "cells_clustered",
+            ActionType.DIFFERENTIAL_EXPRESSION: "de_performed",
+            ActionType.TRAJECTORY_ANALYSIS: "trajectories_inferred",
+            ActionType.PATHWAY_ENRICHMENT: "pathways_analyzed",
+            ActionType.REGULATORY_NETWORK_INFERENCE: "networks_inferred",
+            ActionType.MARKER_SELECTION: "markers_discovered",
+            ActionType.VALIDATE_MARKER: "markers_validated",
+            ActionType.SYNTHESIZE_CONCLUSION: "conclusion_reached",
+        }
+        flag = _MAP.get(at)
+        if flag:
+            setattr(p, flag, True)
+        if at == ActionType.COLLECT_SAMPLE:
+            n = action.parameters.get("n_samples", 6)
+            s.resources.samples_available += n
+        if at == ActionType.SEQUENCE_CELLS:
+            s.resources.sequencing_lanes_used += 1
+        if at == ActionType.FILTER_DATA:
+            retain = self.noise.sample_qc_metric(0.85, 0.05, 0.5, 1.0)
+            p.n_cells_after_filter = max(
+                100, int(s.biology.n_true_cells * retain)
+            )
+        if at == ActionType.CLUSTER_CELLS:
+            n_true = len(s.biology.cell_populations) or 5
+            p.n_clusters_found = self.noise.sample_cluster_count(n_true, 0.8)
+    def _propagate_artifacts(
+        self,
+        s: FullLatentState,
+        action: ExperimentAction,
+        output: IntermediateOutput,
+    ) -> None:
+        if action.action_type == ActionType.DIFFERENTIAL_EXPRESSION:
+            top = output.data.get("top_genes", [])
+            s.discovered_de_genes = [g["gene"] for g in top[:20]]
+        if action.action_type == ActionType.CLUSTER_CELLS:
+            s.discovered_clusters = output.data.get("cluster_names", [])
+        if action.action_type == ActionType.MARKER_SELECTION:
+            s.progress.n_markers_found = output.data.get("n_candidates", 0)

server/subagents/__init__.py ADDED Viewed

File without changes

server/tasks/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .generator import TaskGenerator
+from .scenarios import SCENARIO_LIBRARY, Scenario
+__all__ = ["SCENARIO_LIBRARY", "Scenario", "TaskGenerator"]

server/tasks/generator.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Task generator — produces (TaskSpec, FullLatentState) pairs for episodes.
+Supports three modes:
+  1. Select from the pre-defined scenario library.
+  2. Randomly perturb a scenario for domain-randomisation.
+  3. Compose a fully procedural scenario (tissue × modality × difficulty).
+"""
+from __future__ import annotations
+from typing import List, Optional, Tuple
+import numpy as np
+from models import TaskSpec
+from server.simulator.latent_state import (
+    CellPopulation,
+    ExperimentProgress,
+    FullLatentState,
+    GeneProgram,
+    LatentBiologicalState,
+    ResourceState,
+    TechnicalState,
+)
+from .scenarios import SCENARIO_LIBRARY, Scenario
+class TaskGenerator:
+    """Generates task + latent-state pairs for environment episodes."""
+    def __init__(
+        self,
+        scenarios: Optional[List[Scenario]] = None,
+        domain_randomise: bool = True,
+    ):
+        self.scenarios = scenarios or SCENARIO_LIBRARY
+        self.domain_randomise = domain_randomise
+    def generate(
+        self,
+        *,
+        seed: Optional[int] = None,
+        scenario_name: Optional[str] = None,
+    ) -> Tuple[TaskSpec, FullLatentState]:
+        rng = np.random.default_rng(seed)
+        if scenario_name:
+            scenario = self._find_scenario(scenario_name)
+        else:
+            idx = int(rng.integers(0, len(self.scenarios)))
+            scenario = self.scenarios[idx]
+        task = scenario.task.model_copy(deep=True)
+        biology = scenario.biology.model_copy(deep=True)
+        technical = scenario.technical.model_copy(deep=True)
+        if self.domain_randomise:
+            self._randomise(rng, task, biology, technical)
+        latent = FullLatentState(
+            biology=biology,
+            technical=technical,
+            progress=ExperimentProgress(),
+            resources=ResourceState(
+                budget_total=task.budget_limit,
+                time_limit_days=task.time_limit_days,
+            ),
+            hidden_failure_conditions=list(scenario.hidden_failure_conditions),
+            rng_seed=seed or 0,
+        )
+        return task, latent
+    def list_scenarios(self) -> List[str]:
+        return [s.name for s in self.scenarios]
+    # ── internals ───────────────────────────────────────────────────────
+    def _find_scenario(self, name: str) -> Scenario:
+        for s in self.scenarios:
+            if s.name == name:
+                return s
+        available = ", ".join(self.list_scenarios())
+        raise ValueError(f"Unknown scenario '{name}'. Available: {available}")
+    def _randomise(
+        self,
+        rng: np.random.Generator,
+        task: TaskSpec,
+        bio: LatentBiologicalState,
+        tech: TechnicalState,
+    ) -> None:
+        budget_scale = float(rng.uniform(0.7, 1.3))
+        task.budget_limit *= budget_scale
+        task.time_limit_days *= float(rng.uniform(0.8, 1.2))
+        tech.dropout_rate = float(np.clip(
+            tech.dropout_rate + rng.normal(0, 0.02), 0.01, 0.3
+        ))
+        tech.doublet_rate = float(np.clip(
+            tech.doublet_rate + rng.normal(0, 0.01), 0.01, 0.15
+        ))
+        tech.sample_quality = float(np.clip(
+            tech.sample_quality + rng.normal(0, 0.05), 0.5, 1.0
+        ))
+        tech.ambient_rna_fraction = float(np.clip(
+            tech.ambient_rna_fraction + rng.normal(0, 0.01), 0.01, 0.15
+        ))
+        for batch_id in list(tech.batch_effects.keys()):
+            tech.batch_effects[batch_id] = float(np.clip(
+                tech.batch_effects[batch_id] + rng.normal(0, 0.03), 0.0, 0.4
+            ))
+        for pop in bio.cell_populations:
+            pop.proportion = float(np.clip(
+                pop.proportion * rng.uniform(0.8, 1.2), 0.01, 0.8
+            ))
+        total = sum(p.proportion for p in bio.cell_populations) or 1.0
+        for pop in bio.cell_populations:
+            pop.proportion /= total
+        for comparison, effects in bio.true_de_genes.items():
+            for gene in list(effects.keys()):
+                effects[gene] *= float(rng.uniform(0.8, 1.2))
+        bio.n_true_cells = max(
+            1000,
+            int(bio.n_true_cells * rng.uniform(0.6, 1.4)),
+        )

server/tasks/scenarios.py ADDED Viewed

	@@ -0,0 +1,454 @@

+"""Pre-defined biological scenarios for task generation.
+Each ``Scenario`` bundles a task specification together with the matching
+hidden ground-truth biology so the simulator can instantiate consistent
+episodes.  The library is intentionally diverse: it covers differential
+expression, trajectory inference, perturbation response, and biomarker
+validation across tissues and modalities.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from models import ExpectedFinding, PaperReference, TaskSpec
+from server.simulator.latent_state import (
+    CellPopulation,
+    GeneProgram,
+    LatentBiologicalState,
+    TechnicalState,
+)
+@dataclass
+class Scenario:
+    """A reproducible (task, ground-truth) pair."""
+    name: str
+    task: TaskSpec
+    biology: LatentBiologicalState
+    technical: TechnicalState = field(default_factory=TechnicalState)
+    hidden_failure_conditions: List[str] = field(default_factory=list)
+    difficulty: str = "medium"
+    tags: List[str] = field(default_factory=list)
+# ── Scenario library ────────────────────────────────────────────────────────
+SCENARIO_LIBRARY: List[Scenario] = [
+    # ── 1. Cardiac disease DE ───────────────────────────────────────────
+    Scenario(
+        name="cardiac_disease_de",
+        difficulty="easy",
+        tags=["de", "scRNA-seq", "cardiac"],
+        task=TaskSpec(
+            problem_statement=(
+                "Identify differentially expressed genes between diseased "
+                "and healthy cardiomyocytes using single-cell RNA sequencing."
+            ),
+            modality="scRNA-seq",
+            organism="human",
+            tissue="heart",
+            conditions=["healthy", "dilated_cardiomyopathy"],
+            budget_limit=80_000.0,
+            time_limit_days=120.0,
+            success_criteria=[
+                "Identify DE genes between conditions",
+                "Validate at least one candidate marker",
+            ],
+        ),
+        biology=LatentBiologicalState(
+            cell_populations=[
+                CellPopulation(
+                    name="cardiomyocyte",
+                    proportion=0.35,
+                    marker_genes=["TNNT2", "MYH7", "ACTC1"],
+                    state="contractile",
+                    condition_response={"dilated_cardiomyopathy": 0.8},
+                ),
+                CellPopulation(
+                    name="fibroblast",
+                    proportion=0.25,
+                    marker_genes=["COL1A1", "DCN", "LUM"],
+                    state="quiescent",
+                    condition_response={"dilated_cardiomyopathy": 1.3},
+                ),
+                CellPopulation(
+                    name="endothelial",
+                    proportion=0.15,
+                    marker_genes=["PECAM1", "VWF", "CDH5"],
+                    state="quiescent",
+                ),
+                CellPopulation(
+                    name="macrophage",
+                    proportion=0.10,
+                    marker_genes=["CD68", "CD163", "CSF1R"],
+                    state="activated",
+                    condition_response={"dilated_cardiomyopathy": 1.5},
+                ),
+                CellPopulation(
+                    name="smooth_muscle",
+                    proportion=0.15,
+                    marker_genes=["ACTA2", "MYH11", "TAGLN"],
+                    state="quiescent",
+                ),
+            ],
+            true_de_genes={
+                "disease_vs_healthy": {
+                    "NPPA": 2.5, "NPPB": 3.1, "MYH7": 1.8,
+                    "COL1A1": 1.6, "COL3A1": 1.4, "POSTN": 2.0,
+                    "CCL2": 1.2, "IL6": 0.9, "TGFB1": 1.1,
+                    "ANKRD1": 2.2, "XIRP2": -1.3, "MYL2": -0.8,
+                },
+            },
+            true_pathways={
+                "cardiac_muscle_contraction": 0.4,
+                "extracellular_matrix_organisation": 0.85,
+                "inflammatory_response": 0.7,
+                "TGF_beta_signalling": 0.75,
+                "apoptosis": 0.55,
+            },
+            true_markers=["NPPA", "NPPB", "POSTN", "COL1A1"],
+            causal_mechanisms=[
+                "TGF-beta-driven fibrosis",
+                "inflammatory macrophage infiltration",
+            ],
+            n_true_cells=12_000,
+        ),
+        technical=TechnicalState(
+            batch_effects={"batch_1": 0.15, "batch_2": 0.10},
+            doublet_rate=0.05,
+            dropout_rate=0.08,
+        ),
+    ),
+    # ── 2. Developmental trajectory ────────────────────────────��────────
+    Scenario(
+        name="hematopoiesis_trajectory",
+        difficulty="medium",
+        tags=["trajectory", "scRNA-seq", "hematopoiesis"],
+        task=TaskSpec(
+            problem_statement=(
+                "Infer the developmental trajectory of hematopoietic "
+                "stem cells differentiating into mature blood lineages."
+            ),
+            modality="scRNA-seq",
+            organism="human",
+            tissue="bone_marrow",
+            conditions=["steady_state"],
+            budget_limit=100_000.0,
+            time_limit_days=150.0,
+            success_criteria=[
+                "Reconstruct branching lineage structure",
+                "Identify key transcription factors driving fate decisions",
+            ],
+            paper_references=[
+                PaperReference(
+                    title=(
+                        "Single-cell RNA-sequencing uncovers transcriptional "
+                        "states and fate decisions in haematopoiesis"
+                    ),
+                    citation="Nature Communications (2018)",
+                    doi="10.1038/s41467-017-02305-6",
+                    url=(
+                        "https://www.nature.com/articles/"
+                        "s41467-017-02305-6"
+                    ),
+                ),
+            ],
+            expected_findings=[
+                ExpectedFinding(
+                    finding=(
+                        "Trajectory analysis should recover branching blood "
+                        "lineages rooted in HSCs."
+                    ),
+                    category="trajectory",
+                    keywords=["HSC", "branching", "lineage", "trajectory"],
+                ),
+                ExpectedFinding(
+                    finding=(
+                        "GATA1 should appear as a driver of erythroid fate "
+                        "commitment."
+                    ),
+                    category="regulatory_network",
+                    keywords=["GATA1", "erythroid", "commitment"],
+                ),
+                ExpectedFinding(
+                    finding=(
+                        "CEBPA and SPI1 should support myeloid branch "
+                        "decisions."
+                    ),
+                    category="regulatory_network",
+                    keywords=["CEBPA", "SPI1", "myeloid", "branch"],
+                ),
+            ],
+        ),
+        biology=LatentBiologicalState(
+            cell_populations=[
+                CellPopulation(name="HSC", proportion=0.05,
+                               marker_genes=["CD34", "KIT", "THY1"],
+                               state="stem"),
+                CellPopulation(name="CMP", proportion=0.10,
+                               marker_genes=["CD34", "FLT3"],
+                               state="progenitor"),
+                CellPopulation(name="GMP", proportion=0.12,
+                               marker_genes=["CSF3R", "CEBPA"],
+                               state="progenitor"),
+                CellPopulation(name="MEP", proportion=0.10,
+                               marker_genes=["GATA1", "KLF1"],
+                               state="progenitor"),
+                CellPopulation(name="erythrocyte", proportion=0.20,
+                               marker_genes=["HBA1", "HBB", "GYPA"],
+                               state="mature"),
+                CellPopulation(name="neutrophil", proportion=0.18,
+                               marker_genes=["ELANE", "MPO", "CTSG"],
+                               state="mature"),
+                CellPopulation(name="monocyte", proportion=0.15,
+                               marker_genes=["CD14", "CSF1R", "FCGR3A"],
+                               state="mature"),
+                CellPopulation(name="megakaryocyte", proportion=0.10,
+                               marker_genes=["ITGA2B", "GP1BA"],
+                               state="mature"),
+            ],
+            true_de_genes={},
+            true_pathways={
+                "hematopoietic_cell_lineage": 0.9,
+                "MAPK_signalling": 0.6,
+                "JAK_STAT_signalling": 0.7,
+            },
+            true_trajectory={
+                "root": "HSC",
+                "n_lineages": 3,
+                "branching": True,
+                "branches": [
+                    ["HSC", "CMP", "GMP", "neutrophil"],
+                    ["HSC", "CMP", "GMP", "monocyte"],
+                    ["HSC", "MEP", "erythrocyte"],
+                    ["HSC", "MEP", "megakaryocyte"],
+                ],
+            },
+            true_regulatory_network={
+                "GATA1": ["KLF1", "HBB", "HBA1", "GYPA"],
+                "CEBPA": ["CSF3R", "ELANE", "MPO"],
+                "SPI1": ["CSF1R", "CD14", "FCGR3A"],
+                "RUNX1": ["CD34", "KIT"],
+            },
+            true_markers=["GATA1", "CEBPA", "SPI1"],
+            causal_mechanisms=[
+                "GATA1-driven erythroid commitment",
+                "PU.1/CEBPA antagonism at myeloid branch point",
+            ],
+            n_true_cells=15_000,
+        ),
+        technical=TechnicalState(dropout_rate=0.12, doublet_rate=0.06),
+    ),
+    # ── 3. Perturbation response ────────────────────────────────────────
+    Scenario(
+        name="perturbation_immune",
+        difficulty="hard",
+        tags=["perturbation", "scRNA-seq", "immune"],
+        task=TaskSpec(
+            problem_statement=(
+                "Determine the effect of JAK inhibitor treatment on "
+                "T-cell activation states in rheumatoid arthritis."
+            ),
+            modality="scRNA-seq",
+            organism="human",
+            tissue="synovial_fluid",
+            conditions=["untreated_RA", "JAK_inhibitor_treated"],
+            budget_limit=120_000.0,
+            time_limit_days=180.0,
+            prior_observations=[
+                "Elevated JAK-STAT signalling observed in prior bulk RNA-seq",
+            ],
+            success_criteria=[
+                "Quantify shift in T-cell activation states",
+                "Identify pathways modulated by JAK inhibitor",
+                "Propose validation strategy",
+            ],
+        ),
+        biology=LatentBiologicalState(
+            cell_populations=[
+                CellPopulation(name="CD4_Th1", proportion=0.20,
+                               marker_genes=["IFNG", "TBX21", "IL2"],
+                               state="activated",
+                               condition_response={"JAK_inhibitor_treated": 0.5}),
+                CellPopulation(name="CD4_Th17", proportion=0.15,
+                               marker_genes=["IL17A", "RORC", "CCR6"],
+                               state="activated",
+                               condition_response={"JAK_inhibitor_treated": 0.6}),
+                CellPopulation(name="CD4_Treg", proportion=0.08,
+                               marker_genes=["FOXP3", "IL2RA", "CTLA4"],
+                               state="regulatory",
+                               condition_response={"JAK_inhibitor_treated": 1.2}),
+                CellPopulation(name="CD8_cytotoxic", proportion=0.18,
+                               marker_genes=["GZMB", "PRF1", "CD8A"],
+                               state="activated",
+                               condition_response={"JAK_inhibitor_treated": 0.7}),
+                CellPopulation(name="macrophage", proportion=0.15,
+                               marker_genes=["CD68", "CD163", "MARCO"],
+                               state="inflammatory"),
+                CellPopulation(name="fibroblast", proportion=0.14,
+                               marker_genes=["COL1A1", "FAP", "THY1"],
+                               state="activated"),
+                CellPopulation(name="B_cell", proportion=0.10,
+                               marker_genes=["CD19", "MS4A1", "CD79A"],
+                               state="quiescent"),
+            ],
+            true_de_genes={
+                "treated_vs_untreated": {
+                    "IFNG": -1.8, "TBX21": -1.2, "IL17A": -1.5,
+                    "RORC": -0.9, "JAK1": -0.3, "STAT1": -1.0,
+                    "STAT3": -0.8, "SOCS1": 1.5, "SOCS3": 1.3,
+                    "FOXP3": 0.6, "IL10": 0.7,
+                },
+            },
+            true_pathways={
+                "JAK_STAT_signalling": 0.3,
+                "Th1_differentiation": 0.35,
+                "Th17_differentiation": 0.4,
+                "cytokine_signalling": 0.45,
+                "regulatory_T_cell_function": 0.7,
+            },
+            perturbation_effects={
+                "JAK_inhibitor": {
+                    "STAT1": -0.8, "STAT3": -0.7, "IFNG": -1.5,
+                    "IL17A": -1.3, "SOCS1": 1.2,
+                },
+            },
+            true_markers=["STAT1", "SOCS1", "IFNG"],
+            causal_mechanisms=[
+                "JAK-STAT pathway inhibition reduces Th1/Th17 activation",
+                "Compensatory Treg expansion under JAK inhibition",
+            ],
+            n_true_cells=18_000,
+        ),
+        technical=TechnicalState(
+            batch_effects={"batch_ctrl": 0.12, "batch_treated": 0.18},
+            ambient_rna_fraction=0.07,
+            dropout_rate=0.10,
+        ),
+        hidden_failure_conditions=[
+            "High ambient RNA may confound DE in low-abundance transcripts",
+        ],
+    ),
+    # ── 4. Biomarker validation ─────────────────────────────────────────
+    Scenario(
+        name="biomarker_validation_lung",
+        difficulty="medium",
+        tags=["biomarker", "validation", "scRNA-seq", "lung"],
+        task=TaskSpec(
+            problem_statement=(
+                "Design a follow-up validation experiment for candidate "
+                "biomarker SPP1 in idiopathic pulmonary fibrosis (IPF)."
+            ),
+            modality="scRNA-seq",
+            organism="human",
+            tissue="lung",
+            conditions=["healthy", "IPF"],
+            budget_limit=90_000.0,
+            time_limit_days=150.0,
+            prior_observations=[
+                "SPP1 identified as top DE gene in prior pilot study",
+                "SPP1+ macrophages enriched in fibrotic regions",
+            ],
+            success_criteria=[
+                "Validate SPP1 as a marker for pro-fibrotic macrophages",
+                "Confirm spatial localisation in fibrotic tissue",
+            ],
+            paper_references=[
+                PaperReference(
+                    title=(
+                        "Proliferating SPP1/MERTK-expressing macrophages in "
+                        "idiopathic pulmonary fibrosis"
+                    ),
+                    citation="European Respiratory Journal (2019)",
+                    doi="10.1183/13993003.02441-2018",
+                    pmid="31221805",
+                    url="https://pubmed.ncbi.nlm.nih.gov/31221805/",
+                ),
+            ],
+            expected_findings=[
+                ExpectedFinding(
+                    finding=(
+                        "SPP1-positive macrophages should be enriched in IPF "
+                        "fibrotic regions."
+                    ),
+                    category="marker",
+                    keywords=["SPP1", "macrophage", "IPF", "fibrotic"],
+                ),
+                ExpectedFinding(
+                    finding=(
+                        "MERTK should co-occur with the profibrotic macrophage "
+                        "state."
+                    ),
+                    category="marker",
+                    keywords=["MERTK", "macrophage", "SPP1"],
+                ),
+                ExpectedFinding(
+                    finding=(
+                        "Extracellular matrix organization should emerge as a "
+                        "top fibrotic program."
+                    ),
+                    category="pathway",
+                    keywords=["extracellular_matrix", "fibrosis", "pathway"],
+                ),
+            ],
+            dataset_metadata={
+                "literature_grounding": "single_cell_ipf_macrophages",
+            },
+        ),
+        biology=LatentBiologicalState(
+            cell_populations=[
+                CellPopulation(name="alveolar_macrophage", proportion=0.18,
+                               marker_genes=["MARCO", "FABP4", "MCEMP1"],
+                               state="resident"),
+                CellPopulation(name="SPP1_macrophage", proportion=0.12,
+                               marker_genes=["SPP1", "MERTK", "MMP9", "TREM2"],
+                               state="pro-fibrotic",
+                               condition_response={"IPF": 2.0}),
+                CellPopulation(name="AT2", proportion=0.20,
+                               marker_genes=["SFTPC", "SFTPB", "ABCA3"],
+                               state="normal"),
+                CellPopulation(name="fibroblast", proportion=0.22,
+                               marker_genes=["COL1A1", "COL3A1", "POSTN"],
+                               state="activated",
+                               condition_response={"IPF": 1.5}),
+                CellPopulation(name="endothelial", proportion=0.13,
+                               marker_genes=["PECAM1", "CLDN5"],
+                               state="quiescent"),
+                CellPopulation(name="T_cell", proportion=0.15,
+                               marker_genes=["CD3D", "CD3E", "IL7R"],
+                               state="quiescent"),
+            ],
+            true_de_genes={
+                "IPF_vs_healthy": {
+                    "SPP1": 3.2, "MERTK": 1.4, "MMP9": 1.8, "TREM2": 1.5,
+                    "COL1A1": 2.1, "COL3A1": 1.9, "POSTN": 2.4,
+                    "SFTPC": -1.2, "AGER": -1.6,
+                },
+            },
+            true_pathways={
+                "extracellular_matrix_organisation": 0.9,
+                "integrin_signalling": 0.75,
+                "macrophage_activation": 0.8,
+                "Wnt_signalling": 0.6,
+            },
+            true_markers=["SPP1", "MERTK", "POSTN", "MMP9"],
+            causal_mechanisms=[
+                "SPP1+ macrophage-driven fibroblast activation",
+                "Integrin-mediated SPP1 signalling in fibrosis",
+            ],
+            n_true_cells=14_000,
+        ),
+        technical=TechnicalState(
+            batch_effects={"batch_1": 0.10},
+            dropout_rate=0.09,
+            sample_quality=0.85,
+        ),
+    ),
+]

tests/__init__.py ADDED Viewed

File without changes

tests/test_environment.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""Integration tests for the full BioExperimentEnvironment."""
+from models import ActionType, ExperimentAction
+from server.hackathon_environment import BioExperimentEnvironment
+class TestEnvironmentLifecycle:
+    def test_reset_returns_valid_observation(self):
+        env = BioExperimentEnvironment()
+        obs = env.reset()
+        assert obs.step_index == 0
+        assert obs.done is False
+        assert obs.task.problem_statement != ""
+    def test_step_increments_step_count(self):
+        env = BioExperimentEnvironment()
+        env.reset()
+        obs = env.step(ExperimentAction(action_type=ActionType.COLLECT_SAMPLE))
+        assert obs.step_index == 1
+        assert env.state.step_count == 1
+    def test_valid_pipeline_trajectory(self):
+        env = BioExperimentEnvironment()
+        env.reset()
+        actions = [
+            ExperimentAction(action_type=ActionType.COLLECT_SAMPLE,
+                             parameters={"n_samples": 6}),
+            ExperimentAction(action_type=ActionType.PREPARE_LIBRARY,
+                             method="10x_chromium"),
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            ExperimentAction(action_type=ActionType.RUN_QC),
+            ExperimentAction(action_type=ActionType.FILTER_DATA),
+            ExperimentAction(action_type=ActionType.NORMALIZE_DATA),
+            ExperimentAction(action_type=ActionType.CLUSTER_CELLS),
+            ExperimentAction(action_type=ActionType.DIFFERENTIAL_EXPRESSION,
+                             parameters={"comparison": "disease_vs_healthy"}),
+        ]
+        for a in actions:
+            obs = env.step(a)
+            assert obs.latest_output is not None
+            assert obs.latest_output.success is True, (
+                f"Step {a.action_type} failed: {obs.rule_violations}"
+            )
+        assert obs.step_index == len(actions)
+        assert obs.resource_usage.budget_used > 0
+    def test_premature_de_blocked(self):
+        env = BioExperimentEnvironment()
+        env.reset()
+        obs = env.step(ExperimentAction(
+            action_type=ActionType.DIFFERENTIAL_EXPRESSION,
+        ))
+        assert obs.latest_output is not None
+        assert obs.latest_output.success is False
+    def test_conclusion_ends_episode(self):
+        env = BioExperimentEnvironment()
+        env.reset()
+        quick_pipeline = [
+            ExperimentAction(action_type=ActionType.COLLECT_SAMPLE),
+            ExperimentAction(action_type=ActionType.PREPARE_LIBRARY),
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            ExperimentAction(action_type=ActionType.RUN_QC),
+            ExperimentAction(action_type=ActionType.FILTER_DATA),
+            ExperimentAction(action_type=ActionType.NORMALIZE_DATA),
+            ExperimentAction(action_type=ActionType.CLUSTER_CELLS),
+            ExperimentAction(action_type=ActionType.DIFFERENTIAL_EXPRESSION,
+                             parameters={"comparison": "disease_vs_healthy"}),
+            ExperimentAction(
+                action_type=ActionType.SYNTHESIZE_CONCLUSION,
+                parameters={"claims": [
+                    {"claim": "Test conclusion", "confidence": 0.7,
+                     "claim_type": "correlational"},
+                ]},
+            ),
+        ]
+        for a in quick_pipeline:
+            obs = env.step(a)
+        assert obs.done is True
+        assert obs.reward != 0.0

tests/test_literature_benchmark.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Tests for literature-grounded benchmark utilities."""
+from training.literature_benchmark import (
+    run_paper_benchmark,
+    select_literature_scenario,
+)
+def test_select_literature_scenario_for_ipf_prompt():
+    scenario = select_literature_scenario(
+        "Validate SPP1-positive macrophage findings in idiopathic pulmonary fibrosis."
+    )
+    assert scenario.name == "biomarker_validation_lung"
+def test_select_literature_scenario_for_trajectory_prompt():
+    scenario = select_literature_scenario(
+        "Recover branching hematopoietic lineages and branch point transcription factors."
+    )
+    assert scenario.name == "hematopoiesis_trajectory"
+def test_run_paper_benchmark_matches_curated_findings():
+    result = run_paper_benchmark(
+        problem_statement=(
+            "Design a follow-up validation experiment for candidate biomarker "
+            "SPP1 in idiopathic pulmonary fibrosis."
+        ),
+        scenario_name="biomarker_validation_lung",
+        domain_randomise=False,
+    )
+    assert result.total_steps >= 1
+    assert result.matched_papers
+    assert result.match_ratio >= (2 / 3)
+    assert any("SPP1" in finding for finding in result.matched_findings)

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Tests for POMDP schema models."""
+import pytest
+from models import (
+    ActionType,
+    ConclusionClaim,
+    ExpectedFinding,
+    ExperimentAction,
+    ExperimentObservation,
+    IntermediateOutput,
+    OutputType,
+    PaperReference,
+    PipelineStepRecord,
+    ResourceUsage,
+    TaskSpec,
+)
+def test_experiment_action_roundtrip():
+    a = ExperimentAction(
+        action_type=ActionType.COLLECT_SAMPLE,
+        input_targets=["prior_cohort"],
+        method="10x_chromium",
+        parameters={"n_samples": 6},
+        confidence=0.8,
+    )
+    d = a.model_dump()
+    assert d["action_type"] == "collect_sample"
+    assert d["confidence"] == 0.8
+    reconstructed = ExperimentAction(**d)
+    assert reconstructed.action_type == ActionType.COLLECT_SAMPLE
+def test_experiment_observation_defaults():
+    obs = ExperimentObservation(done=False, reward=0.0)
+    assert obs.step_index == 0
+    assert obs.pipeline_history == []
+    assert obs.resource_usage.budget_remaining == 100_000.0
+def test_intermediate_output_quality_bounds():
+    with pytest.raises(Exception):
+        IntermediateOutput(
+            output_type=OutputType.QC_METRICS,
+            step_index=1,
+            quality_score=1.5,
+        )
+def test_task_spec_defaults():
+    t = TaskSpec()
+    assert "10x_chromium" in t.available_assays
+    assert t.budget_limit == 100_000.0
+    assert t.paper_references == []
+    assert t.expected_findings == []
+def test_paper_reference_and_expected_finding_roundtrip():
+    task = TaskSpec(
+        paper_references=[
+            PaperReference(
+                title="Example paper",
+                doi="10.0000/example",
+            )
+        ],
+        expected_findings=[
+            ExpectedFinding(
+                finding="Example marker is enriched",
+                category="marker",
+                keywords=["EXAMPLE"],
+            )
+        ],
+    )
+    dumped = task.model_dump()
+    assert dumped["paper_references"][0]["title"] == "Example paper"
+    assert dumped["expected_findings"][0]["category"] == "marker"
+def test_conclusion_claim_serialization():
+    c = ConclusionClaim(
+        claim="NPPA is upregulated in disease",
+        evidence_steps=[3, 5],
+        confidence=0.85,
+        claim_type="correlational",
+    )
+    d = c.model_dump()
+    assert d["claim_type"] == "correlational"
+    assert d["confidence"] == 0.85

tests/test_rewards.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""Tests for the decomposable reward function."""
+from models import ActionType, ConclusionClaim, ExperimentAction, IntermediateOutput, OutputType
+from server.rewards.reward import RewardComputer
+from server.simulator.latent_state import (
+    ExperimentProgress,
+    FullLatentState,
+    LatentBiologicalState,
+    ResourceState,
+)
+def _states(
+    prev_flags: dict | None = None,
+    next_flags: dict | None = None,
+    budget_used: float = 0.0,
+):
+    prev = FullLatentState(
+        progress=ExperimentProgress(**(prev_flags or {})),
+        resources=ResourceState(budget_total=100_000, budget_used=budget_used),
+    )
+    nf = dict(prev_flags or {})
+    nf.update(next_flags or {})
+    nxt = FullLatentState(
+        progress=ExperimentProgress(**nf),
+        resources=ResourceState(budget_total=100_000, budget_used=budget_used + 5000),
+    )
+    return prev, nxt
+class TestStepReward:
+    def test_valid_step_positive(self):
+        rc = RewardComputer()
+        prev, nxt = _states(
+            prev_flags={"samples_collected": True, "library_prepared": True},
+            next_flags={"cells_sequenced": True},
+        )
+        output = IntermediateOutput(
+            output_type=OutputType.SEQUENCING_RESULT,
+            step_index=1,
+            quality_score=0.85,
+            uncertainty=0.15,
+        )
+        rb = rc.step_reward(
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            prev, nxt, output, [], [],
+        )
+        assert rb.total > 0
+    def test_hard_violation_negative(self):
+        rc = RewardComputer()
+        prev, nxt = _states()
+        output = IntermediateOutput(
+            output_type=OutputType.FAILURE_REPORT,
+            step_index=1,
+            success=False,
+        )
+        rb = rc.step_reward(
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            prev, nxt, output, ["blocked"], [],
+        )
+        assert rb.total < 0
+class TestTerminalReward:
+    def test_correct_conclusion_rewarded(self):
+        rc = RewardComputer()
+        state = FullLatentState(
+            biology=LatentBiologicalState(
+                causal_mechanisms=["TGF-beta-driven fibrosis"],
+                true_markers=["NPPA"],
+            ),
+            progress=ExperimentProgress(
+                samples_collected=True, cells_sequenced=True,
+                qc_performed=True, data_filtered=True,
+                data_normalized=True, de_performed=True,
+                conclusion_reached=True,
+            ),
+            resources=ResourceState(budget_total=100_000, budget_used=40_000),
+        )
+        claims = [
+            ConclusionClaim(
+                claim="TGF-beta-driven fibrosis observed",
+                confidence=0.9,
+                claim_type="causal",
+            ),
+        ]
+        rb = rc.terminal_reward(state, claims, [])
+        assert rb.terminal > 0
+    def test_overconfident_wrong_claim_penalised(self):
+        rc = RewardComputer()
+        state = FullLatentState(
+            biology=LatentBiologicalState(causal_mechanisms=["real_mechanism"]),
+            progress=ExperimentProgress(conclusion_reached=True),
+        )
+        claims = [
+            ConclusionClaim(
+                claim="completely_wrong_mechanism",
+                confidence=0.95,
+                claim_type="causal",
+            ),
+        ]
+        rb = rc.terminal_reward(state, claims, [])
+        assert rb.components.get("overconfidence_penalty", 0) < 0

tests/test_rules.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Tests for the biological rule engine."""
+from models import ActionType, ExperimentAction
+from server.rules.engine import RuleEngine, Severity
+from server.simulator.latent_state import (
+    ExperimentProgress,
+    FullLatentState,
+    ResourceState,
+)
+def _state(**progress_flags) -> FullLatentState:
+    return FullLatentState(
+        progress=ExperimentProgress(**progress_flags),
+        resources=ResourceState(budget_total=100_000, time_limit_days=180),
+    )
+class TestPrerequisites:
+    def test_sequence_without_library_blocked(self):
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            _state(samples_collected=True),
+        )
+        hard = engine.hard_violations(violations)
+        assert any("library" in m.lower() for m in hard)
+    def test_sequence_with_library_allowed(self):
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.SEQUENCE_CELLS),
+            _state(samples_collected=True, library_prepared=True),
+        )
+        hard = engine.hard_violations(violations)
+        assert not hard
+    def test_de_without_normalization_blocked(self):
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.DIFFERENTIAL_EXPRESSION),
+            _state(cells_sequenced=True, qc_performed=True, data_filtered=True),
+        )
+        hard = engine.hard_violations(violations)
+        assert any("normalis" in m.lower() or "normaliz" in m.lower() for m in hard)
+    def test_validate_marker_without_discovery_blocked(self):
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.VALIDATE_MARKER),
+            _state(de_performed=True),
+        )
+        hard = engine.hard_violations(violations)
+        assert any("marker" in m.lower() for m in hard)
+class TestRedundancy:
+    def test_double_qc_is_soft(self):
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.RUN_QC),
+            _state(cells_sequenced=True, qc_performed=True),
+        )
+        hard = engine.hard_violations(violations)
+        soft = engine.soft_violations(violations)
+        assert not hard
+        assert any("redundant" in m.lower() for m in soft)
+class TestResourceConstraints:
+    def test_exhausted_budget_blocked(self):
+        s = _state()
+        s.resources.budget_used = 100_000
+        engine = RuleEngine()
+        violations = engine.check(
+            ExperimentAction(action_type=ActionType.COLLECT_SAMPLE), s,
+        )
+        hard = engine.hard_violations(violations)
+        assert any("budget" in m.lower() for m in hard)

tests/test_simulator.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Tests for the latent-state simulator modules."""
+import pytest
+from models import ActionType, ExperimentAction, OutputType
+from server.simulator.latent_state import (
+    CellPopulation,
+    ExperimentProgress,
+    FullLatentState,
+    LatentBiologicalState,
+    ResourceState,
+    TechnicalState,
+)
+from server.simulator.noise import NoiseModel
+from server.simulator.output_generator import OutputGenerator
+from server.simulator.transition import TransitionEngine
+def _make_state() -> FullLatentState:
+    return FullLatentState(
+        biology=LatentBiologicalState(
+            cell_populations=[
+                CellPopulation(name="A", proportion=0.6, marker_genes=["G1"]),
+                CellPopulation(name="B", proportion=0.4, marker_genes=["G2"]),
+            ],
+            true_de_genes={"disease_vs_healthy": {"G1": 2.0, "G2": -1.5}},
+            true_pathways={"apoptosis": 0.7},
+            true_markers=["G1"],
+            causal_mechanisms=["G1-driven apoptosis"],
+            n_true_cells=5000,
+        ),
+        technical=TechnicalState(dropout_rate=0.1, doublet_rate=0.04),
+        progress=ExperimentProgress(),
+        resources=ResourceState(budget_total=50_000, time_limit_days=90),
+    )
+class TestNoiseModel:
+    def test_deterministic_with_seed(self):
+        n1 = NoiseModel(seed=42)
+        n2 = NoiseModel(seed=42)
+        assert n1.sample_qc_metric(0.5, 0.1) == n2.sample_qc_metric(0.5, 0.1)
+    def test_false_positives(self):
+        n = NoiseModel(seed=0)
+        fps = n.generate_false_positives(1000, 0.01)
+        assert all(g.startswith("FP_GENE_") for g in fps)
+    def test_quality_degradation_bounded(self):
+        n = NoiseModel(seed=0)
+        for _ in range(100):
+            q = n.quality_degradation(0.9, [0.8, 0.7])
+            assert 0.0 <= q <= 1.0
+class TestOutputGenerator:
+    def test_collect_sample(self):
+        noise = NoiseModel(seed=1)
+        gen = OutputGenerator(noise)
+        s = _make_state()
+        action = ExperimentAction(
+            action_type=ActionType.COLLECT_SAMPLE,
+            parameters={"n_samples": 4},
+        )
+        out = gen.generate(action, s, 1)
+        assert out.output_type == OutputType.SAMPLE_COLLECTION_RESULT
+        assert out.data["n_samples"] == 4
+    def test_de_includes_true_genes(self):
+        noise = NoiseModel(seed=42)
+        gen = OutputGenerator(noise)
+        s = _make_state()
+        s.progress.data_normalized = True
+        action = ExperimentAction(
+            action_type=ActionType.DIFFERENTIAL_EXPRESSION,
+            parameters={"comparison": "disease_vs_healthy"},
+        )
+        out = gen.generate(action, s, 5)
+        assert out.output_type == OutputType.DE_RESULT
+        gene_names = [g["gene"] for g in out.data["top_genes"]]
+        assert "G1" in gene_names or "G2" in gene_names
+class TestTransitionEngine:
+    def test_progress_flags_set(self):
+        noise = NoiseModel(seed=0)
+        engine = TransitionEngine(noise)
+        s = _make_state()
+        action = ExperimentAction(action_type=ActionType.COLLECT_SAMPLE)
+        result = engine.step(s, action)
+        assert result.next_state.progress.samples_collected is True
+    def test_hard_violation_blocks(self):
+        noise = NoiseModel(seed=0)
+        engine = TransitionEngine(noise)
+        s = _make_state()
+        result = engine.step(
+            s,
+            ExperimentAction(action_type=ActionType.COLLECT_SAMPLE),
+            hard_violations=["test_block"],
+        )
+        assert result.output.success is False
+        assert result.output.output_type == OutputType.FAILURE_REPORT
+    def test_resource_deduction(self):
+        noise = NoiseModel(seed=0)
+        engine = TransitionEngine(noise)
+        s = _make_state()
+        action = ExperimentAction(action_type=ActionType.SEQUENCE_CELLS)
+        s.progress.library_prepared = True
+        result = engine.step(s, action)
+        assert result.next_state.resources.budget_used == 15_000
+    def test_conclusion_ends_episode(self):
+        noise = NoiseModel(seed=0)
+        engine = TransitionEngine(noise)
+        s = _make_state()
+        s.progress.de_performed = True
+        action = ExperimentAction(action_type=ActionType.SYNTHESIZE_CONCLUSION)
+        result = engine.step(s, action)
+        assert result.done is True

training/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from .evaluation import EvaluationSuite
+from .gym_wrapper import BioExperimentGymEnv
+from .trajectory import Trajectory, TrajectoryDataset
+__all__ = [
+    "BioExperimentGymEnv",
+    "EvaluationSuite",
+    "PaperBenchmarkResult",
+    "Trajectory",
+    "TrajectoryDataset",
+    "run_paper_benchmark",
+    "select_literature_scenario",
+]
+def __getattr__(name: str):
+    if name in {
+        "PaperBenchmarkResult",
+        "run_paper_benchmark",
+        "select_literature_scenario",
+    }:
+        from .literature_benchmark import (
+            PaperBenchmarkResult,
+            run_paper_benchmark,
+            select_literature_scenario,
+        )
+        exports = {
+            "PaperBenchmarkResult": PaperBenchmarkResult,
+            "run_paper_benchmark": run_paper_benchmark,
+            "select_literature_scenario": select_literature_scenario,
+        }
+        return exports[name]
+    raise AttributeError(f"module 'training' has no attribute {name!r}")

training/evaluation.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""Evaluation suite for the bio-experiment planning environment.
+Separates metrics into four families:
+  - online RL metrics      (collected during training rollouts)
+  - offline benchmark metrics (computed on a fixed held-out set)
+  - expert review metrics  (for human-in-the-loop evaluation)
+  - simulator fidelity metrics (how well the simulator matches reality)
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+import numpy as np
+from .trajectory import Trajectory, TrajectoryDataset
+@dataclass
+class MetricResult:
+    name: str
+    value: float
+    details: Dict[str, Any] = field(default_factory=dict)
+class EvaluationSuite:
+    """Computes and aggregates evaluation metrics over trajectory datasets."""
+    # ── online RL metrics ───────────────────────────────────────────────
+    @staticmethod
+    def online_metrics(trajectories: List[Trajectory]) -> List[MetricResult]:
+        if not trajectories:
+            return []
+        rewards = [t.total_reward for t in trajectories]
+        lengths = [len(t.steps) for t in trajectories]
+        successes = [t.success for t in trajectories]
+        return [
+            MetricResult("mean_return", float(np.mean(rewards))),
+            MetricResult("median_return", float(np.median(rewards))),
+            MetricResult("std_return", float(np.std(rewards))),
+            MetricResult("mean_episode_length", float(np.mean(lengths))),
+            MetricResult("success_rate", float(np.mean(successes))),
+        ]
+    # ── offline benchmark metrics ───────────────────────────────────────
+    @staticmethod
+    def benchmark_metrics(dataset: TrajectoryDataset) -> List[MetricResult]:
+        results: List[MetricResult] = []
+        if len(dataset) == 0:
+            return results
+        results.append(MetricResult(
+            "pipeline_validity_rate",
+            EvaluationSuite._pipeline_validity_rate(dataset),
+        ))
+        results.append(MetricResult(
+            "ordering_score",
+            EvaluationSuite._ordering_score(dataset),
+        ))
+        results.append(MetricResult(
+            "action_diversity",
+            EvaluationSuite._action_diversity(dataset),
+        ))
+        results.append(MetricResult(
+            "mean_conclusion_confidence",
+            EvaluationSuite._mean_conclusion_confidence(dataset),
+        ))
+        return results
+    # ── expert review metrics (stubs) ───────────────────────────────────
+    @staticmethod
+    def expert_review_metrics(
+        trajectories: List[Trajectory],
+        expert_scores: Optional[Dict[str, float]] = None,
+    ) -> List[MetricResult]:
+        """Placeholder for human expert review scores.
+        In practice, each trajectory would be scored by a domain expert
+        on axes such as scientific validity, creativity, and efficiency.
+        """
+        if not expert_scores:
+            return [MetricResult("expert_review", 0.0, {"note": "no scores provided"})]
+        avg = float(np.mean(list(expert_scores.values())))
+        return [MetricResult("expert_review_mean", avg, expert_scores)]
+    # ── simulator fidelity metrics (stubs) ──────────────────────────────
+    @staticmethod
+    def simulator_fidelity_metrics(
+        simulated: TrajectoryDataset,
+        real: Optional[TrajectoryDataset] = None,
+    ) -> List[MetricResult]:
+        """Compare simulated trajectories against real experimental data.
+        When ``real`` is provided, computes distributional distances
+        between simulated and real output statistics.
+        """
+        if real is None or len(real) == 0:
+            return [MetricResult("fidelity", 0.0, {"note": "no real data"})]
+        sim_rewards = [t.total_reward for t in simulated.trajectories]
+        real_rewards = [t.total_reward for t in real.trajectories]
+        reward_gap = abs(float(np.mean(sim_rewards)) - float(np.mean(real_rewards)))
+        return [MetricResult("reward_distribution_gap", reward_gap)]
+    # ── internal helpers ────────────────────────────────────────────────
+    @staticmethod
+    def _pipeline_validity_rate(ds: TrajectoryDataset) -> float:
+        valid = 0
+        for t in ds.trajectories:
+            violations = sum(
+                1 for s in t.steps
+                if not s.observation.get("rule_violations") == []
+                and s.observation.get("rule_violations") is not None
+            )
+            if violations == 0:
+                valid += 1
+        return valid / max(len(ds), 1)
+    @staticmethod
+    def _ordering_score(ds: TrajectoryDataset) -> float:
+        scores: List[float] = []
+        for t in ds.trajectories:
+            breakdown_scores = []
+            for s in t.steps:
+                bd = s.reward_breakdown
+                if "ordering" in bd:
+                    breakdown_scores.append(bd["ordering"])
+            if breakdown_scores:
+                scores.append(float(np.mean(breakdown_scores)))
+        return float(np.mean(scores)) if scores else 0.0
+    @staticmethod
+    def _action_diversity(ds: TrajectoryDataset) -> float:
+        all_types: set = set()
+        for t in ds.trajectories:
+            for s in t.steps:
+                at = s.action.get("action_type")
+                if at:
+                    all_types.add(at)
+        return len(all_types)
+    @staticmethod
+    def _mean_conclusion_confidence(ds: TrajectoryDataset) -> float:
+        confs: List[float] = []
+        for t in ds.trajectories:
+            for s in t.steps:
+                conclusions = s.observation.get("conclusions", [])
+                for c in conclusions:
+                    if isinstance(c, dict) and "confidence" in c:
+                        confs.append(c["confidence"])
+        return float(np.mean(confs)) if confs else 0.0

training/gym_wrapper.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""Gymnasium-compatible wrapper around ``BioExperimentEnvironment``.
+Provides ``BioExperimentGymEnv`` which wraps the OpenEnv environment for
+local in-process RL training (no HTTP/WebSocket overhead).
+Observation and action spaces are represented as ``gymnasium.spaces.Dict``
+so that standard RL libraries (SB3, CleanRL, etc.) can ingest them.
+"""
+from __future__ import annotations
+from typing import Any, Dict, Optional, Tuple
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+from models import ActionType, ExperimentAction, ExperimentObservation
+from server.hackathon_environment import BioExperimentEnvironment, MAX_STEPS
+ACTION_TYPE_LIST = list(ActionType)
+_N_ACTION_TYPES = len(ACTION_TYPE_LIST)
+_MAX_OUTPUTS = MAX_STEPS
+_MAX_HISTORY = MAX_STEPS
+_VEC_DIM = 64
+class BioExperimentGymEnv(gym.Env):
+    """Gymnasium ``Env`` backed by the in-process simulator.
+    Observations are flattened into a dictionary of NumPy arrays suitable
+    for RL policy networks.  Actions are integer-indexed action types with
+    a continuous confidence scalar.
+    For LLM-based agents or planners that prefer structured
+    ``ExperimentAction`` objects, use the underlying
+    ``BioExperimentEnvironment`` directly instead.
+    """
+    metadata = {"render_modes": ["human"]}
+    def __init__(self, render_mode: Optional[str] = None):
+        super().__init__()
+        self._env = BioExperimentEnvironment()
+        self.render_mode = render_mode
+        self.action_space = spaces.Dict({
+            "action_type": spaces.Discrete(_N_ACTION_TYPES),
+            "confidence": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+        })
+        self.observation_space = spaces.Dict({
+            "step_index": spaces.Discrete(MAX_STEPS + 1),
+            "budget_remaining_frac": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "time_remaining_frac": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "progress_flags": spaces.MultiBinary(18),
+            "latest_quality": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "latest_uncertainty": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "avg_quality": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "avg_uncertainty": spaces.Box(0.0, 1.0, shape=(), dtype=np.float32),
+            "n_violations": spaces.Discrete(20),
+            "n_outputs": spaces.Discrete(_MAX_OUTPUTS + 1),
+            "cumulative_reward": spaces.Box(-100.0, 100.0, shape=(), dtype=np.float32),
+        })
+        self._last_obs: Optional[ExperimentObservation] = None
+    # ── Gymnasium interface ─────────────────────────────────────────────
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        super().reset(seed=seed)
+        obs = self._env.reset()
+        self._last_obs = obs
+        return self._vectorise(obs), self._info(obs)
+    def step(
+        self, action: Dict[str, Any]
+    ) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]:
+        action_idx = int(action["action_type"])
+        confidence = float(action.get("confidence", 0.5))
+        experiment_action = ExperimentAction(
+            action_type=ACTION_TYPE_LIST[action_idx],
+            confidence=confidence,
+        )
+        obs = self._env.step(experiment_action)
+        self._last_obs = obs
+        terminated = obs.done
+        truncated = obs.step_index >= MAX_STEPS and not terminated
+        reward = obs.reward
+        return (
+            self._vectorise(obs),
+            reward,
+            terminated,
+            truncated,
+            self._info(obs),
+        )
+    def render(self) -> Optional[str]:
+        if self.render_mode != "human" or self._last_obs is None:
+            return None
+        obs = self._last_obs
+        lines = [
+            f"Step {obs.step_index}",
+            f"  Task: {obs.task.problem_statement[:80]}",
+            f"  Budget: ${obs.resource_usage.budget_remaining:,.0f} remaining",
+            f"  Time: {obs.resource_usage.time_remaining_days:.0f} days remaining",
+        ]
+        if obs.latest_output:
+            lines.append(f"  Latest: {obs.latest_output.summary}")
+        if obs.rule_violations:
+            lines.append(f"  Violations: {obs.rule_violations}")
+        text = "\n".join(lines)
+        print(text)
+        return text
+    # ── helpers ─────────────────────────────────────────────────────────
+    def _vectorise(self, obs: ExperimentObservation) -> Dict[str, Any]:
+        progress = self._env._latent.progress if self._env._latent else None
+        flags = np.zeros(18, dtype=np.int8)
+        if progress:
+            flag_names = [
+                "samples_collected", "cohort_selected", "cells_cultured",
+                "library_prepared", "perturbation_applied", "cells_sequenced",
+                "qc_performed", "data_filtered", "data_normalized",
+                "batches_integrated", "cells_clustered", "de_performed",
+                "trajectories_inferred", "pathways_analyzed",
+                "networks_inferred", "markers_discovered",
+                "markers_validated", "conclusion_reached",
+            ]
+            for i, f in enumerate(flag_names):
+                flags[i] = int(getattr(progress, f, False))
+        unc = obs.uncertainty_summary
+        lo = obs.latest_output
+        return {
+            "step_index": obs.step_index,
+            "budget_remaining_frac": np.float32(
+                obs.resource_usage.budget_remaining
+                / max(obs.task.budget_limit, 1)
+            ),
+            "time_remaining_frac": np.float32(
+                obs.resource_usage.time_remaining_days
+                / max(obs.task.time_limit_days, 1)
+            ),
+            "progress_flags": flags,
+            "latest_quality": np.float32(lo.quality_score if lo else 0.0),
+            "latest_uncertainty": np.float32(lo.uncertainty if lo else 0.0),
+            "avg_quality": np.float32(unc.get("avg_quality", 0.0)),
+            "avg_uncertainty": np.float32(unc.get("avg_uncertainty", 0.0)),
+            "n_violations": min(len(obs.rule_violations), 19),
+            "n_outputs": min(len(obs.all_outputs), _MAX_OUTPUTS),
+            "cumulative_reward": np.float32(
+                obs.metadata.get("cumulative_reward", 0.0)
+                if obs.metadata else 0.0
+            ),
+        }
+    def _info(self, obs: ExperimentObservation) -> Dict[str, Any]:
+        return {
+            "structured_obs": obs,
+            "episode_id": obs.metadata.get("episode_id") if obs.metadata else None,
+        }

training/literature_benchmark.py ADDED Viewed

	@@ -0,0 +1,557 @@

+"""Literature-grounded experiment benchmark utilities.
+This module lets the environment run a paper-backed experiment plan, then
+compare the resulting simulated findings against curated expected findings
+from the literature.
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+from dataclasses import asdict, dataclass, field
+from importlib.metadata import PackageNotFoundError, version
+from typing import Any, Dict, List, Optional, Sequence
+from models import (
+    ActionType,
+    ConclusionClaim,
+    ExperimentAction,
+    ExperimentObservation,
+    OutputType,
+    TaskSpec,
+)
+from server.hackathon_environment import BioExperimentEnvironment
+from server.tasks.scenarios import SCENARIO_LIBRARY, Scenario
+TOKEN_RE = re.compile(r"[A-Za-z0-9_+\-]+")
+STOPWORDS = {
+    "a",
+    "an",
+    "and",
+    "as",
+    "by",
+    "for",
+    "from",
+    "in",
+    "into",
+    "of",
+    "on",
+    "or",
+    "the",
+    "to",
+    "using",
+    "with",
+}
+BIO_LIBRARY_DISTRIBUTIONS = {
+    "scanpy": "scanpy",
+    "gseapy": "gseapy",
+    "biopython": "biopython",
+}
+@dataclass
+class PaperBenchmarkResult:
+    scenario_name: str
+    problem_statement: str
+    matched_papers: List[str]
+    bio_library_versions: Dict[str, Optional[str]]
+    matched_findings: List[str] = field(default_factory=list)
+    missed_findings: List[str] = field(default_factory=list)
+    discovered_markers: List[str] = field(default_factory=list)
+    candidate_mechanisms: List[str] = field(default_factory=list)
+    conclusions: List[str] = field(default_factory=list)
+    final_reward: float = 0.0
+    total_steps: int = 0
+    @property
+    def match_ratio(self) -> float:
+        total = len(self.matched_findings) + len(self.missed_findings)
+        return len(self.matched_findings) / max(total, 1)
+    def to_dict(self) -> Dict[str, Any]:
+        data = asdict(self)
+        data["match_ratio"] = self.match_ratio
+        return data
+def detect_bio_library_versions() -> Dict[str, Optional[str]]:
+    versions: Dict[str, Optional[str]] = {}
+    for name, dist_name in BIO_LIBRARY_DISTRIBUTIONS.items():
+        try:
+            versions[name] = version(dist_name)
+        except PackageNotFoundError:
+            versions[name] = None
+    return versions
+def select_literature_scenario(problem_statement: str) -> Scenario:
+    """Pick the closest literature-backed scenario for a prompt."""
+    prompt_tokens = set(_tokenize(problem_statement))
+    best_score = -1
+    best_scenario: Optional[Scenario] = None
+    for scenario in SCENARIO_LIBRARY:
+        if not scenario.task.paper_references:
+            continue
+        corpus = [
+            scenario.task.problem_statement,
+            *(ref.title for ref in scenario.task.paper_references),
+            *(finding.finding for finding in scenario.task.expected_findings),
+            scenario.task.tissue,
+            scenario.task.modality,
+            *scenario.task.conditions,
+        ]
+        score = len(prompt_tokens & set(_tokenize(" ".join(corpus))))
+        if scenario.task.problem_statement.lower() in problem_statement.lower():
+            score += 4
+        if score > best_score:
+            best_score = score
+            best_scenario = scenario
+    if best_scenario is None:
+        raise ValueError("No literature-backed scenarios are available.")
+    return best_scenario
+def run_paper_benchmark(
+    *,
+    problem_statement: str,
+    scenario_name: Optional[str] = None,
+    domain_randomise: bool = False,
+) -> PaperBenchmarkResult:
+    """Run a literature-backed episode and compare outputs to paper results."""
+    scenario = _resolve_scenario(problem_statement, scenario_name)
+    env = BioExperimentEnvironment(
+        scenario_name=scenario.name,
+        domain_randomise=domain_randomise,
+    )
+    obs = env.reset()
+    for action in build_paper_aligned_actions(obs.task):
+        obs = env.step(action)
+    claims = infer_conclusion_claims(obs)
+    obs = env.step(
+        ExperimentAction(
+            action_type=ActionType.SYNTHESIZE_CONCLUSION,
+            parameters={"claims": [claim.model_dump() for claim in claims]},
+            justification=(
+                "Summarize the simulated experimental evidence and compare it "
+                "with the paper-backed expected findings."
+            ),
+            confidence=0.8,
+            tool_call_spec=_tool_context(
+                obs.task,
+                libraries=["biopython"],
+                include_expected_findings=True,
+            ),
+        )
+    )
+    matched, missed = compare_expected_findings(obs.task, obs)
+    return PaperBenchmarkResult(
+        scenario_name=scenario.name,
+        problem_statement=obs.task.problem_statement,
+        matched_papers=[ref.title for ref in obs.task.paper_references],
+        bio_library_versions=detect_bio_library_versions(),
+        matched_findings=matched,
+        missed_findings=missed,
+        discovered_markers=list(obs.discovered_markers),
+        candidate_mechanisms=list(obs.candidate_mechanisms),
+        conclusions=[c.claim for c in obs.conclusions],
+        final_reward=float(obs.metadata.get("cumulative_reward", 0.0)),
+        total_steps=obs.step_index,
+    )
+def build_paper_aligned_actions(task: TaskSpec) -> List[ExperimentAction]:
+    """Construct a pragmatic analysis plan aligned to the task modality."""
+    actions: List[ExperimentAction] = [
+        ExperimentAction(
+            action_type=ActionType.COLLECT_SAMPLE,
+            parameters={"n_samples": 8},
+            justification="Collect enough samples to support downstream analysis.",
+            confidence=0.75,
+            tool_call_spec=_tool_context(task, libraries=["biopython"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.PREPARE_LIBRARY,
+            method="10x_chromium",
+            justification="Use a standard single-cell library prep workflow.",
+            confidence=0.8,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.SEQUENCE_CELLS,
+            method="NovaSeq",
+            justification="Generate sufficient single-cell read depth.",
+            confidence=0.8,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.RUN_QC,
+            method="scanpy.pp.calculate_qc_metrics",
+            justification="Check technical quality before downstream inference.",
+            confidence=0.85,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.FILTER_DATA,
+            method="scanpy.pp.filter_cells",
+            justification="Remove low-quality cells and reduce technical noise.",
+            confidence=0.85,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.NORMALIZE_DATA,
+            method="scanpy.pp.normalize_total",
+            justification="Normalize expression to prepare comparable profiles.",
+            confidence=0.85,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.CLUSTER_CELLS,
+            method="scanpy.tl.leiden",
+            justification="Resolve cell states before focused interpretation.",
+            confidence=0.8,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+    ]
+    categories = {finding.category for finding in task.expected_findings}
+    if "trajectory" in categories:
+        actions.extend([
+            ExperimentAction(
+                action_type=ActionType.TRAJECTORY_ANALYSIS,
+                method="scanpy.tl.dpt",
+                justification="Recover pseudotime structure and lineage branches.",
+                confidence=0.8,
+                tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+            ),
+            ExperimentAction(
+                action_type=ActionType.REGULATORY_NETWORK_INFERENCE,
+                method="pySCENIC",
+                justification="Infer branch-associated regulators from the trajectory.",
+                confidence=0.75,
+                tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+            ),
+            ExperimentAction(
+                action_type=ActionType.MARKER_SELECTION,
+                method="scanpy.tl.rank_genes_groups",
+                justification="Summarize lineage markers and branch-state genes.",
+                confidence=0.75,
+                tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+            ),
+        ])
+        return actions
+    actions.extend([
+        ExperimentAction(
+            action_type=ActionType.DIFFERENTIAL_EXPRESSION,
+            method="scanpy.tl.rank_genes_groups",
+            parameters={"comparison": _default_comparison_name(task)},
+            justification="Identify genes associated with the focal phenotype.",
+            confidence=0.85,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.PATHWAY_ENRICHMENT,
+            method="gseapy.prerank",
+            justification="Translate DE hits into pathway-level interpretation.",
+            confidence=0.8,
+            tool_call_spec=_tool_context(task, libraries=["gseapy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.MARKER_SELECTION,
+            method="scanpy.tl.rank_genes_groups",
+            justification="Nominate candidate markers for follow-up validation.",
+            confidence=0.8,
+            tool_call_spec=_tool_context(task, libraries=["scanpy"]),
+        ),
+        ExperimentAction(
+            action_type=ActionType.VALIDATE_MARKER,
+            method="immunofluorescence",
+            parameters={"marker": _preferred_marker(task)},
+            justification="Check whether the leading marker reproduces in validation.",
+            confidence=0.75,
+            tool_call_spec=_tool_context(task, libraries=["biopython"]),
+        ),
+    ])
+    return actions
+def infer_conclusion_claims(obs: ExperimentObservation) -> List[ConclusionClaim]:
+    """Turn accumulated evidence into concise, paper-comparable claims."""
+    markers = set(obs.discovered_markers)
+    mechanisms = set(obs.candidate_mechanisms)
+    network_regulators = set(_extract_network_regulators(obs))
+    trajectory_output = _latest_output_data(obs, OutputType.TRAJECTORY_RESULT)
+    claims: List[ConclusionClaim] = []
+    if "SPP1" in markers:
+        claims.append(ConclusionClaim(
+            claim="SPP1-positive macrophages are enriched in IPF fibrotic tissue.",
+            confidence=0.84,
+            claim_type="marker",
+            evidence_steps=_evidence_steps(obs, {
+                OutputType.DE_RESULT,
+                OutputType.MARKER_RESULT,
+                OutputType.VALIDATION_RESULT,
+            }),
+        ))
+    if {"SPP1", "MERTK"} <= markers:
+        claims.append(ConclusionClaim(
+            claim="MERTK co-occurs with the SPP1-positive profibrotic macrophage state.",
+            confidence=0.8,
+            claim_type="marker",
+            evidence_steps=_evidence_steps(obs, {
+                OutputType.DE_RESULT,
+                OutputType.MARKER_RESULT,
+            }),
+        ))
+    if "extracellular_matrix_organisation" in mechanisms:
+        claims.append(ConclusionClaim(
+            claim=(
+                "Extracellular matrix organization is a dominant fibrotic "
+                "program in the IPF samples."
+            ),
+            confidence=0.78,
+            claim_type="pathway",
+            evidence_steps=_evidence_steps(obs, {OutputType.PATHWAY_RESULT}),
+        ))
+    if trajectory_output.get("branching_detected"):
+        claims.append(ConclusionClaim(
+            claim=(
+                "Trajectory analysis recovered branching blood lineages rooted "
+                "in HSCs."
+            ),
+            confidence=0.82,
+            claim_type="trajectory",
+            evidence_steps=_evidence_steps(obs, {OutputType.TRAJECTORY_RESULT}),
+        ))
+    if "GATA1" in network_regulators:
+        claims.append(ConclusionClaim(
+            claim="GATA1 emerges as a driver of erythroid fate commitment.",
+            confidence=0.8,
+            claim_type="regulatory_network",
+            evidence_steps=_evidence_steps(obs, {OutputType.NETWORK_RESULT}),
+        ))
+    if {"CEBPA", "SPI1"} & network_regulators:
+        claims.append(ConclusionClaim(
+            claim="CEBPA and SPI1 support myeloid branch decisions.",
+            confidence=0.78,
+            claim_type="regulatory_network",
+            evidence_steps=_evidence_steps(obs, {OutputType.NETWORK_RESULT}),
+        ))
+    if claims:
+        return claims
+    # Fallback: preserve the strongest expected findings verbatim if the
+    # heuristic extractors do not recover enough signal from the episode.
+    return [
+        ConclusionClaim(
+            claim=finding.finding,
+            confidence=0.65,
+            claim_type=finding.category,
+        )
+        for finding in obs.task.expected_findings[:3]
+    ]
+def compare_expected_findings(
+    task: TaskSpec,
+    obs: ExperimentObservation,
+) -> tuple[List[str], List[str]]:
+    """Compare the episode evidence against literature-backed findings."""
+    evidence_text = _evidence_text(obs)
+    matched: List[str] = []
+    missed: List[str] = []
+    for finding in task.expected_findings:
+        keywords = [kw.lower() for kw in finding.keywords]
+        if not keywords:
+            keywords = _tokenize(finding.finding)
+        hits = sum(1 for kw in keywords if kw in evidence_text)
+        threshold = max(1, (len(keywords) + 1) // 2)
+        if hits >= threshold:
+            matched.append(finding.finding)
+        else:
+            missed.append(finding.finding)
+    return matched, missed
+def _resolve_scenario(
+    problem_statement: str,
+    scenario_name: Optional[str],
+) -> Scenario:
+    if scenario_name:
+        for scenario in SCENARIO_LIBRARY:
+            if scenario.name == scenario_name:
+                return scenario
+        raise ValueError(f"Unknown scenario_name '{scenario_name}'.")
+    return select_literature_scenario(problem_statement)
+def _tool_context(
+    task: TaskSpec,
+    *,
+    libraries: Sequence[str],
+    include_expected_findings: bool = False,
+) -> Dict[str, Any]:
+    context: Dict[str, Any] = {
+        "literature_query": task.problem_statement,
+        "paper_references": [
+            {
+                "title": ref.title,
+                "doi": ref.doi,
+                "pmid": ref.pmid,
+                "url": ref.url,
+            }
+            for ref in task.paper_references
+        ],
+        "bioinformatics_libraries": list(libraries),
+    }
+    if include_expected_findings:
+        context["expected_findings"] = [
+            finding.finding for finding in task.expected_findings
+        ]
+    return context
+def _default_comparison_name(task: TaskSpec) -> str:
+    conditions = {condition.lower() for condition in task.conditions}
+    if {"healthy", "ipf"} <= conditions:
+        return "IPF_vs_healthy"
+    if any("treated" in condition for condition in conditions) and any(
+        "untreated" in condition for condition in conditions
+    ):
+        return "treated_vs_untreated"
+    if any("healthy" in condition for condition in conditions):
+        return "disease_vs_healthy"
+    return "disease_vs_healthy"
+def _preferred_marker(task: TaskSpec) -> str:
+    for finding in task.expected_findings:
+        for keyword in finding.keywords:
+            if keyword.isupper():
+                return keyword
+    return "SPP1"
+def _latest_output_data(
+    obs: ExperimentObservation,
+    output_type: OutputType,
+) -> Dict[str, Any]:
+    for output in reversed(obs.all_outputs):
+        if output.output_type == output_type:
+            return output.data
+    return {}
+def _extract_network_regulators(obs: ExperimentObservation) -> List[str]:
+    for output in reversed(obs.all_outputs):
+        if output.output_type == OutputType.NETWORK_RESULT:
+            return output.data.get("top_regulators", [])
+    return []
+def _evidence_steps(
+    obs: ExperimentObservation,
+    output_types: set[OutputType],
+) -> List[int]:
+    return [
+        output.step_index
+        for output in obs.all_outputs
+        if output.output_type in output_types
+    ]
+def _evidence_text(obs: ExperimentObservation) -> str:
+    parts: List[str] = []
+    parts.extend(obs.discovered_markers)
+    parts.extend(obs.candidate_mechanisms)
+    parts.extend(conclusion.claim for conclusion in obs.conclusions)
+    for output in obs.all_outputs:
+        parts.append(output.summary)
+        if output.output_type == OutputType.DE_RESULT:
+            parts.extend(
+                gene["gene"]
+                for gene in output.data.get("top_genes", [])
+                if isinstance(gene, dict) and "gene" in gene
+            )
+        elif output.output_type == OutputType.PATHWAY_RESULT:
+            parts.extend(
+                pathway["pathway"]
+                for pathway in output.data.get("top_pathways", [])
+                if isinstance(pathway, dict) and "pathway" in pathway
+            )
+        elif output.output_type == OutputType.NETWORK_RESULT:
+            parts.extend(output.data.get("top_regulators", []))
+        elif output.output_type == OutputType.TRAJECTORY_RESULT:
+            if output.data.get("branching_detected"):
+                parts.append("branching lineage HSC trajectory")
+    return " ".join(parts).lower()
+def _tokenize(text: str) -> List[str]:
+    return [
+        token.lower()
+        for token in TOKEN_RE.findall(text)
+        if token and token.lower() not in STOPWORDS
+    ]
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--problem-statement",
+        default=(
+            "Design a follow-up validation experiment for candidate biomarker "
+            "SPP1 in idiopathic pulmonary fibrosis."
+        ),
+    )
+    parser.add_argument("--scenario-name", default=None)
+    parser.add_argument("--domain-randomise", action="store_true")
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+    result = run_paper_benchmark(
+        problem_statement=args.problem_statement,
+        scenario_name=args.scenario_name,
+        domain_randomise=args.domain_randomise,
+    )
+    if args.json:
+        print(json.dumps(result.to_dict(), indent=2))
+        return
+    print(f"Scenario: {result.scenario_name}")
+    print(f"Problem: {result.problem_statement}")
+    print(f"Paper: {', '.join(result.matched_papers)}")
+    print(f"Match ratio: {result.match_ratio:.2%}")
+    print(f"Matched findings: {len(result.matched_findings)}")
+    print(f"Missed findings: {len(result.missed_findings)}")
+    print(f"Discovered markers: {', '.join(result.discovered_markers[:8])}")
+    print(f"Candidate mechanisms: {', '.join(result.candidate_mechanisms[:5])}")
+    print(f"Conclusions: {len(result.conclusions)}")
+    print(f"Final reward: {result.final_reward:+.3f}")
+    print(f"Bio libraries: {json.dumps(result.bio_library_versions, sort_keys=True)}")
+if __name__ == "__main__":
+    main()

training/trajectory.py ADDED Viewed

	@@ -0,0 +1,159 @@

+"""Trajectory serialisation and dataset utilities.
+A ``Trajectory`` stores the full history of one episode (task, actions,
+observations, rewards, latent-state snapshots) in a format that supports:
+  - offline RL training
+  - imitation learning from expert demonstrations
+  - evaluation / replay
+  - simulator calibration
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from models import (
+    ExperimentAction,
+    ExperimentObservation,
+    TaskSpec,
+)
+@dataclass
+class TrajectoryStep:
+    step_index: int
+    action: Dict[str, Any]
+    observation: Dict[str, Any]
+    reward: float
+    done: bool
+    reward_breakdown: Dict[str, float] = field(default_factory=dict)
+    latent_snapshot: Optional[Dict[str, Any]] = None
+@dataclass
+class Trajectory:
+    """Complete record of one environment episode."""
+    episode_id: str
+    task: Dict[str, Any]
+    steps: List[TrajectoryStep] = field(default_factory=list)
+    total_reward: float = 0.0
+    success: bool = False
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    # ── construction helpers ────────────────────────────────────────────
+    def add_step(
+        self,
+        action: ExperimentAction,
+        observation: ExperimentObservation,
+        reward: float,
+        done: bool,
+        reward_breakdown: Optional[Dict[str, float]] = None,
+        latent_snapshot: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self.steps.append(TrajectoryStep(
+            step_index=len(self.steps),
+            action=action.model_dump(),
+            observation=observation.model_dump(),
+            reward=reward,
+            done=done,
+            reward_breakdown=reward_breakdown or {},
+            latent_snapshot=latent_snapshot,
+        ))
+        self.total_reward += reward
+        if done:
+            self.success = reward > 0
+    # ── serialisation ───────────────────────────────────────────────────
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "episode_id": self.episode_id,
+            "task": self.task,
+            "steps": [
+                {
+                    "step_index": s.step_index,
+                    "action": s.action,
+                    "observation": s.observation,
+                    "reward": s.reward,
+                    "done": s.done,
+                    "reward_breakdown": s.reward_breakdown,
+                    "latent_snapshot": s.latent_snapshot,
+                }
+                for s in self.steps
+            ],
+            "total_reward": self.total_reward,
+            "success": self.success,
+            "metadata": self.metadata,
+        }
+    def save(self, path: str | Path) -> None:
+        p = Path(path)
+        p.parent.mkdir(parents=True, exist_ok=True)
+        with open(p, "w") as f:
+            json.dump(self.to_dict(), f, indent=2, default=str)
+    @classmethod
+    def load(cls, path: str | Path) -> "Trajectory":
+        with open(path) as f:
+            d = json.load(f)
+        traj = cls(
+            episode_id=d["episode_id"],
+            task=d["task"],
+            total_reward=d.get("total_reward", 0.0),
+            success=d.get("success", False),
+            metadata=d.get("metadata", {}),
+        )
+        for s in d.get("steps", []):
+            traj.steps.append(TrajectoryStep(**s))
+        return traj
+class TrajectoryDataset:
+    """In-memory collection of trajectories with convenience accessors."""
+    def __init__(self, trajectories: Optional[List[Trajectory]] = None):
+        self.trajectories: List[Trajectory] = trajectories or []
+    def add(self, traj: Trajectory) -> None:
+        self.trajectories.append(traj)
+    def __len__(self) -> int:
+        return len(self.trajectories)
+    def __getitem__(self, idx: int) -> Trajectory:
+        return self.trajectories[idx]
+    def filter_successful(self) -> "TrajectoryDataset":
+        return TrajectoryDataset([t for t in self.trajectories if t.success])
+    def save_dir(self, directory: str | Path) -> None:
+        d = Path(directory)
+        d.mkdir(parents=True, exist_ok=True)
+        for t in self.trajectories:
+            t.save(d / f"{t.episode_id}.json")
+    @classmethod
+    def load_dir(cls, directory: str | Path) -> "TrajectoryDataset":
+        d = Path(directory)
+        trajs = [Trajectory.load(p) for p in sorted(d.glob("*.json"))]
+        return cls(trajs)
+    def summary(self) -> Dict[str, Any]:
+        if not self.trajectories:
+            return {"n": 0}
+        rewards = [t.total_reward for t in self.trajectories]
+        lengths = [len(t.steps) for t in self.trajectories]
+        success_rate = sum(1 for t in self.trajectories if t.success) / len(self.trajectories)
+        return {
+            "n": len(self.trajectories),
+            "success_rate": success_rate,
+            "mean_reward": sum(rewards) / len(rewards),
+            "mean_length": sum(lengths) / len(lengths),
+            "max_reward": max(rewards),
+            "min_reward": min(rewards),
+        }

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff