{ "_meta": { "generated_by": "tests/fixtures/generate_api_examples.py", "description": "API schema examples generated from real Pydantic models. Re-run the script to regenerate after contract changes.", "seed": 42, "scenario_template": "math_reasoning", "difficulty": "easy" }, "rest": { "POST /reset": { "request": { "seed": 42, "scenario": "math_reasoning", "difficulty": "easy", "session_id": null }, "response": { "session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "episode_id": "ep-deadbeef-1234-5678-9abc-def012345678", "observation": { "scientist": { "paper_title": "Planning a proof of the Cauchy-Schwarz inequality", "paper_hypothesis": "A square-expansion argument gives the cleanest proof path.", "paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.", "experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.", "conversation_history": [], "current_protocol": null, "round_number": 0, "max_rounds": 6 }, "lab_manager": { "budget_total": 345.0, "budget_remaining": 345.0, "equipment_available": [ "Structured proof notebook" ], "equipment_booked": [], "reagents_in_stock": [ "Reference theorem library", "Graduate reviewer" ], "reagents_out_of_stock": [], "staff_count": 1, "time_limit_days": 3, "safety_restrictions": [ "The outline should stay concise enough for seminar notes." ], "conversation_history": [], "current_protocol": null, "round_number": 0, "max_rounds": 6 } } } }, "POST /step": { "request": { "session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "action": { "action_type": "propose_protocol", "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "questions": [], "rationale": "Initial proposal using available resources." } }, "response_mid_episode": { "observation": { "scientist": { "paper_title": "Planning a proof of the Cauchy-Schwarz inequality", "paper_hypothesis": "A square-expansion argument gives the cleanest proof path.", "paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.", "experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.", "conversation_history": [ { "role": "scientist", "message": "Initial proposal using available resources.", "round_number": 1, "action_type": "propose_protocol" }, { "role": "lab_manager", "message": "Budget is within range. Equipment is available.", "round_number": 1, "action_type": "report_feasibility" } ], "current_protocol": { "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "rationale": "Initial proposal using available resources." }, "round_number": 1, "max_rounds": 6 }, "lab_manager": { "budget_total": 345.0, "budget_remaining": 345.0, "equipment_available": [ "Structured proof notebook" ], "equipment_booked": [], "reagents_in_stock": [ "Reference theorem library", "Graduate reviewer" ], "reagents_out_of_stock": [], "staff_count": 1, "time_limit_days": 3, "safety_restrictions": [ "The outline should stay concise enough for seminar notes." ], "conversation_history": [ { "role": "scientist", "message": "Initial proposal using available resources.", "round_number": 1, "action_type": "propose_protocol" }, { "role": "lab_manager", "message": "Budget is within range. Equipment is available.", "round_number": 1, "action_type": "report_feasibility" } ], "current_protocol": { "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "rationale": "Initial proposal using available resources." }, "round_number": 1, "max_rounds": 6 } }, "reward": 0.0, "done": false, "info": { "agreement_reached": false, "error": null, "reward_breakdown": null, "judge_notes": null, "verdict": null, "top_failure_reasons": [] } }, "response_terminal": { "observation": null, "reward": 5.42, "done": true, "info": { "agreement_reached": true, "error": null, "reward_breakdown": { "rigor": 0.8, "feasibility": 0.8, "fidelity": 0.8, "efficiency_bonus": 0.2, "communication_bonus": 0.1, "penalties": {} }, "judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).", "verdict": "accept", "top_failure_reasons": [] } } }, "GET /scenarios": { "response": { "scenarios": [ { "family": "math_reasoning", "difficulties": [ "easy", "medium", "hard" ] }, { "family": "ml_benchmark", "difficulties": [ "easy", "medium", "hard" ] }, { "family": "finance_trading", "difficulties": [ "easy", "medium", "hard" ] } ] } }, "GET /replay/{episode_id}": { "response": { "episode_id": "ep-deadbeef-1234-5678-9abc-def012345678", "seed": 42, "scenario_template": "math_reasoning", "difficulty": "easy", "final_state": { "seed": 42, "scenario_template": "math_reasoning", "difficulty": "easy", "paper_title": "Planning a proof of the Cauchy-Schwarz inequality", "paper_hypothesis": "A square-expansion argument gives the cleanest proof path.", "paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.", "experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.", "lab_budget_total": 345.0, "lab_budget_remaining": 345.0, "lab_equipment": [ "Structured proof notebook" ], "lab_reagents": [ "Reference theorem library", "Graduate reviewer" ], "lab_staff_count": 1, "lab_time_limit_days": 3, "current_protocol": null, "conversation_history": [], "round_number": 3, "max_rounds": 6, "done": true, "agreement_reached": true, "reward": 5.42, "rigor_score": 0.8, "feasibility_score": 0.8, "fidelity_score": 0.8, "judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).", "verdict": "accept", "top_failure_reasons": [] }, "transcript": [ { "role": "scientist", "message": "Initial proposal using available resources.", "round_number": 1, "action_type": "propose_protocol" }, { "role": "lab_manager", "message": "Budget is within range. Equipment is available.", "round_number": 1, "action_type": "report_feasibility" } ], "reward_breakdown": { "rigor": 0.8, "feasibility": 0.8, "fidelity": 0.8, "efficiency_bonus": 0.2, "communication_bonus": 0.1, "penalties": {} }, "total_reward": 5.42, "rounds_used": 3, "agreement_reached": true, "judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).", "verdict": "accept", "top_failure_reasons": [] } } }, "websocket": { "reset": { "client_sends": { "type": "reset", "seed": 42, "scenario": "math_reasoning", "difficulty": "easy" }, "server_responds": { "type": "reset_ok", "episode_id": "ep-deadbeef-1234-5678-9abc-def012345678", "observation": { "scientist": { "paper_title": "Planning a proof of the Cauchy-Schwarz inequality", "paper_hypothesis": "A square-expansion argument gives the cleanest proof path.", "paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.", "experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.", "conversation_history": [], "current_protocol": null, "round_number": 0, "max_rounds": 6 }, "lab_manager": { "budget_total": 345.0, "budget_remaining": 345.0, "equipment_available": [ "Structured proof notebook" ], "equipment_booked": [], "reagents_in_stock": [ "Reference theorem library", "Graduate reviewer" ], "reagents_out_of_stock": [], "staff_count": 1, "time_limit_days": 3, "safety_restrictions": [ "The outline should stay concise enough for seminar notes." ], "conversation_history": [], "current_protocol": null, "round_number": 0, "max_rounds": 6 } } } }, "step": { "client_sends": { "type": "step", "action": { "action_type": "propose_protocol", "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "questions": [], "rationale": "Initial proposal using available resources." } }, "server_responds": { "type": "step_ok", "observation": { "scientist": { "paper_title": "Planning a proof of the Cauchy-Schwarz inequality", "paper_hypothesis": "A square-expansion argument gives the cleanest proof path.", "paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.", "experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.", "conversation_history": [ { "role": "scientist", "message": "Initial proposal using available resources.", "round_number": 1, "action_type": "propose_protocol" }, { "role": "lab_manager", "message": "Budget is within range. Equipment is available.", "round_number": 1, "action_type": "report_feasibility" } ], "current_protocol": { "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "rationale": "Initial proposal using available resources." }, "round_number": 1, "max_rounds": 6 }, "lab_manager": { "budget_total": 345.0, "budget_remaining": 345.0, "equipment_available": [ "Structured proof notebook" ], "equipment_booked": [], "reagents_in_stock": [ "Reference theorem library", "Graduate reviewer" ], "reagents_out_of_stock": [], "staff_count": 1, "time_limit_days": 3, "safety_restrictions": [ "The outline should stay concise enough for seminar notes." ], "conversation_history": [ { "role": "scientist", "message": "Initial proposal using available resources.", "round_number": 1, "action_type": "propose_protocol" }, { "role": "lab_manager", "message": "Budget is within range. Equipment is available.", "round_number": 1, "action_type": "report_feasibility" } ], "current_protocol": { "sample_size": 30, "controls": [ "positive_control", "negative_control" ], "technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.", "duration_days": 5, "required_equipment": [ "Structured proof notebook" ], "required_reagents": [ "Reference theorem library", "Graduate reviewer" ], "rationale": "Initial proposal using available resources." }, "round_number": 1, "max_rounds": 6 } }, "reward": 0.0, "done": false, "info": { "agreement_reached": false, "error": null, "reward_breakdown": null, "judge_notes": null, "verdict": null, "top_failure_reasons": [] } } }, "ping": { "client_sends": { "type": "ping" }, "server_responds": { "type": "pong" } } } }