replicalab / tests /fixtures /generate_api_examples.py
maxxie114's picture
Initial HF Spaces deployment
80d8c84
#!/usr/bin/env python3
"""Generate api_schema_examples.json from real Pydantic models.
MOD 10 — run this script to regenerate the fixture whenever the
contracts change. The output is deterministic.
Usage:
python tests/fixtures/generate_api_examples.py
"""
from __future__ import annotations
import json
from pathlib import Path
from replicalab.config import DEFAULT_DIFFICULTY, DEFAULT_SCENARIO_TEMPLATE
from replicalab.agents.judge_policy import build_judge_audit
from replicalab.models import (
ConversationEntry,
EpisodeLog,
EpisodeState,
LabManagerObservation,
Observation,
Protocol,
RewardBreakdown,
ScientistAction,
ScientistObservation,
StepInfo,
StepResult,
)
from replicalab.scenarios import available_scenario_families, generate_scenario
from replicalab.scoring.rubric import compute_total_reward
OUTPUT_PATH = Path(__file__).parent / "api_schema_examples.json"
# ---------------------------------------------------------------------------
# Build realistic payloads from real models
# ---------------------------------------------------------------------------
_SEED = 42
_TEMPLATE = DEFAULT_SCENARIO_TEMPLATE
_DIFFICULTY = DEFAULT_DIFFICULTY
# Generate a real scenario to extract observation data
_pack = generate_scenario(seed=_SEED, template=_TEMPLATE, difficulty=_DIFFICULTY)
_sci_obs = _pack.scientist_observation
_lm_obs = _pack.lab_manager_observation
_TERMINAL_BREAKDOWN = RewardBreakdown(
rigor=0.8,
feasibility=0.8,
fidelity=0.8,
efficiency_bonus=0.2,
communication_bonus=0.1,
penalties={},
)
_TERMINAL_AUDIT = build_judge_audit(
_TERMINAL_BREAKDOWN,
agreement_reached=True,
rounds_used=3,
max_rounds=_sci_obs.max_rounds,
)
_TERMINAL_REWARD = compute_total_reward(_TERMINAL_BREAKDOWN)
def _reset_request():
return {
"seed": _SEED,
"scenario": _TEMPLATE,
"difficulty": _DIFFICULTY,
"session_id": None,
}
def _reset_response():
obs = Observation(scientist=_sci_obs, lab_manager=_lm_obs)
return {
"session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"episode_id": "ep-deadbeef-1234-5678-9abc-def012345678",
"observation": obs.model_dump(),
}
def _propose_action():
return ScientistAction(
action_type="propose_protocol",
sample_size=30,
controls=["positive_control", "negative_control"],
technique=_sci_obs.paper_method,
duration_days=5,
required_equipment=list(_lm_obs.equipment_available[:2]) if _lm_obs.equipment_available else ["tool_a"],
required_reagents=list(_lm_obs.reagents_in_stock[:2]) if _lm_obs.reagents_in_stock else ["ref_a"],
questions=[],
rationale="Initial proposal using available resources.",
).model_dump()
def _step_request():
return {
"session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"action": _propose_action(),
}
def _mid_episode_step_result():
protocol = Protocol(
sample_size=30,
controls=["positive_control", "negative_control"],
technique=_sci_obs.paper_method,
duration_days=5,
required_equipment=list(_lm_obs.equipment_available[:2]) if _lm_obs.equipment_available else ["tool_a"],
required_reagents=list(_lm_obs.reagents_in_stock[:2]) if _lm_obs.reagents_in_stock else ["ref_a"],
rationale="Initial proposal using available resources.",
)
history = [
ConversationEntry(
role="scientist",
message="Initial proposal using available resources.",
round_number=1,
action_type="propose_protocol",
),
ConversationEntry(
role="lab_manager",
message="Budget is within range. Equipment is available.",
round_number=1,
action_type="report_feasibility",
),
]
obs = Observation(
scientist=ScientistObservation(
paper_title=_sci_obs.paper_title,
paper_hypothesis=_sci_obs.paper_hypothesis,
paper_method=_sci_obs.paper_method,
paper_key_finding=_sci_obs.paper_key_finding,
experiment_goal=_sci_obs.experiment_goal,
conversation_history=history,
current_protocol=protocol,
round_number=1,
max_rounds=_sci_obs.max_rounds,
),
lab_manager=LabManagerObservation(
budget_total=_lm_obs.budget_total,
budget_remaining=_lm_obs.budget_remaining,
equipment_available=list(_lm_obs.equipment_available),
equipment_booked=list(_lm_obs.equipment_booked),
reagents_in_stock=list(_lm_obs.reagents_in_stock),
reagents_out_of_stock=list(_lm_obs.reagents_out_of_stock),
staff_count=_lm_obs.staff_count,
time_limit_days=_lm_obs.time_limit_days,
safety_restrictions=list(_lm_obs.safety_restrictions),
conversation_history=history,
current_protocol=protocol,
round_number=1,
max_rounds=_lm_obs.max_rounds,
),
)
return StepResult(
observation=obs,
reward=0.0,
done=False,
info=StepInfo(
agreement_reached=False,
error=None,
reward_breakdown=None,
judge_notes=None,
verdict=None,
),
).model_dump()
def _terminal_step_result():
return StepResult(
observation=None,
reward=_TERMINAL_REWARD,
done=True,
info=StepInfo(
agreement_reached=True,
error=None,
reward_breakdown=_TERMINAL_BREAKDOWN,
judge_notes=_TERMINAL_AUDIT.judge_notes,
verdict=_TERMINAL_AUDIT.verdict,
top_failure_reasons=list(_TERMINAL_AUDIT.top_failure_reasons),
),
).model_dump()
def _scenarios_response():
return {"scenarios": available_scenario_families()}
def _replay_response():
return EpisodeLog(
episode_id="ep-deadbeef-1234-5678-9abc-def012345678",
seed=_SEED,
scenario_template=_TEMPLATE,
difficulty=_DIFFICULTY,
final_state=EpisodeState(
seed=_SEED,
scenario_template=_TEMPLATE,
difficulty=_DIFFICULTY,
paper_title=_sci_obs.paper_title,
paper_hypothesis=_sci_obs.paper_hypothesis,
paper_method=_sci_obs.paper_method,
paper_key_finding=_sci_obs.paper_key_finding,
experiment_goal=_sci_obs.experiment_goal,
lab_budget_total=_lm_obs.budget_total,
lab_budget_remaining=_lm_obs.budget_remaining,
lab_equipment=list(_lm_obs.equipment_available),
lab_reagents=list(_lm_obs.reagents_in_stock),
lab_staff_count=_lm_obs.staff_count,
lab_time_limit_days=_lm_obs.time_limit_days,
round_number=3,
max_rounds=_sci_obs.max_rounds,
done=True,
agreement_reached=True,
reward=_TERMINAL_REWARD,
rigor_score=_TERMINAL_BREAKDOWN.rigor,
feasibility_score=_TERMINAL_BREAKDOWN.feasibility,
fidelity_score=_TERMINAL_BREAKDOWN.fidelity,
judge_notes=_TERMINAL_AUDIT.judge_notes,
verdict=_TERMINAL_AUDIT.verdict,
top_failure_reasons=list(_TERMINAL_AUDIT.top_failure_reasons),
),
transcript=[
ConversationEntry(
role="scientist",
message="Initial proposal using available resources.",
round_number=1,
action_type="propose_protocol",
),
ConversationEntry(
role="lab_manager",
message="Budget is within range. Equipment is available.",
round_number=1,
action_type="report_feasibility",
),
],
reward_breakdown=_TERMINAL_BREAKDOWN,
total_reward=_TERMINAL_REWARD,
rounds_used=3,
agreement_reached=True,
judge_notes=_TERMINAL_AUDIT.judge_notes,
verdict=_TERMINAL_AUDIT.verdict,
top_failure_reasons=list(_TERMINAL_AUDIT.top_failure_reasons),
).model_dump()
def _ws_reset_message():
return {
"type": "reset",
"seed": _SEED,
"scenario": _TEMPLATE,
"difficulty": _DIFFICULTY,
}
def _ws_reset_ok_message():
obs = Observation(scientist=_sci_obs, lab_manager=_lm_obs)
return {
"type": "reset_ok",
"episode_id": "ep-deadbeef-1234-5678-9abc-def012345678",
"observation": obs.model_dump(),
}
def _ws_step_message():
return {
"type": "step",
"action": _propose_action(),
}
def _ws_step_ok_message():
return {
"type": "step_ok",
**_mid_episode_step_result(),
}
# ---------------------------------------------------------------------------
# Assemble and write
# ---------------------------------------------------------------------------
def main():
examples = {
"_meta": {
"generated_by": "tests/fixtures/generate_api_examples.py",
"description": "API schema examples generated from real Pydantic models. Re-run the script to regenerate after contract changes.",
"seed": _SEED,
"scenario_template": _TEMPLATE,
"difficulty": _DIFFICULTY,
},
"rest": {
"POST /reset": {
"request": _reset_request(),
"response": _reset_response(),
},
"POST /step": {
"request": _step_request(),
"response_mid_episode": _mid_episode_step_result(),
"response_terminal": _terminal_step_result(),
},
"GET /scenarios": {
"response": _scenarios_response(),
},
"GET /replay/{episode_id}": {
"response": _replay_response(),
},
},
"websocket": {
"reset": {
"client_sends": _ws_reset_message(),
"server_responds": _ws_reset_ok_message(),
},
"step": {
"client_sends": _ws_step_message(),
"server_responds": _ws_step_ok_message(),
},
"ping": {
"client_sends": {"type": "ping"},
"server_responds": {"type": "pong"},
},
},
}
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH.write_text(
json.dumps(examples, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
print(f"Wrote {OUTPUT_PATH}")
if __name__ == "__main__":
main()