File size: 1,661 Bytes
80d8c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Tests for Scientist and Lab Manager dataset builders."""

from __future__ import annotations

import json

from replicalab.models import LabManagerAction
from replicalab.training.datasets import (
    build_lab_manager_sft_examples,
    build_scientist_prompt_examples,
)


def test_scientist_prompt_examples_include_frozen_evidence_when_available() -> None:
    rows = build_scientist_prompt_examples(
        seeds=[3],
        templates=["math_reasoning", "ml_benchmark"],
        difficulties=["easy"],
    )

    assert len(rows) == 6
    math_row = next(
        row
        for row in rows
        if row.scenario == "math_reasoning" and row.goal_variant == "paper_understanding"
    )
    ml_row = next(
        row
        for row in rows
        if row.scenario == "ml_benchmark" and row.goal_variant == "paper_understanding"
    )

    assert math_row.evidence_id is None
    assert ml_row.evidence_id is not None
    assert "Frozen evidence pack" in ml_row.prompt[-1]["content"]
    assert {row.goal_variant for row in rows} == {
        "paper_understanding",
        "constraint_grounding",
        "negotiation_quality",
    }


def test_lab_manager_sft_examples_emit_valid_lab_manager_action_json() -> None:
    rows = build_lab_manager_sft_examples(
        seeds=[2],
        templates=["ml_benchmark"],
        difficulties=["easy"],
    )

    assert len(rows) >= 2
    assert {row.candidate_kind for row in rows} >= {"baseline", "constraint_stress"}

    payload = json.loads(rows[0].target_json)
    action = LabManagerAction.model_validate(payload)

    assert action.explanation
    assert rows[0].messages[-1]["role"] == "assistant"