Spaces:
Running
Running
File size: 1,661 Bytes
80d8c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | """Tests for Scientist and Lab Manager dataset builders."""
from __future__ import annotations
import json
from replicalab.models import LabManagerAction
from replicalab.training.datasets import (
build_lab_manager_sft_examples,
build_scientist_prompt_examples,
)
def test_scientist_prompt_examples_include_frozen_evidence_when_available() -> None:
rows = build_scientist_prompt_examples(
seeds=[3],
templates=["math_reasoning", "ml_benchmark"],
difficulties=["easy"],
)
assert len(rows) == 6
math_row = next(
row
for row in rows
if row.scenario == "math_reasoning" and row.goal_variant == "paper_understanding"
)
ml_row = next(
row
for row in rows
if row.scenario == "ml_benchmark" and row.goal_variant == "paper_understanding"
)
assert math_row.evidence_id is None
assert ml_row.evidence_id is not None
assert "Frozen evidence pack" in ml_row.prompt[-1]["content"]
assert {row.goal_variant for row in rows} == {
"paper_understanding",
"constraint_grounding",
"negotiation_quality",
}
def test_lab_manager_sft_examples_emit_valid_lab_manager_action_json() -> None:
rows = build_lab_manager_sft_examples(
seeds=[2],
templates=["ml_benchmark"],
difficulties=["easy"],
)
assert len(rows) >= 2
assert {row.candidate_kind for row in rows} >= {"baseline", "constraint_stress"}
payload = json.loads(rows[0].target_json)
action = LabManagerAction.model_validate(payload)
assert action.explanation
assert rows[0].messages[-1]["role"] == "assistant"
|