replicalab / tests /fixtures /api_schema_examples.json
maxxie114's picture
Initial HF Spaces deployment
80d8c84
{
"_meta": {
"generated_by": "tests/fixtures/generate_api_examples.py",
"description": "API schema examples generated from real Pydantic models. Re-run the script to regenerate after contract changes.",
"seed": 42,
"scenario_template": "math_reasoning",
"difficulty": "easy"
},
"rest": {
"POST /reset": {
"request": {
"seed": 42,
"scenario": "math_reasoning",
"difficulty": "easy",
"session_id": null
},
"response": {
"session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"episode_id": "ep-deadbeef-1234-5678-9abc-def012345678",
"observation": {
"scientist": {
"paper_title": "Planning a proof of the Cauchy-Schwarz inequality",
"paper_hypothesis": "A square-expansion argument gives the cleanest proof path.",
"paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.",
"experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.",
"conversation_history": [],
"current_protocol": null,
"round_number": 0,
"max_rounds": 6
},
"lab_manager": {
"budget_total": 345.0,
"budget_remaining": 345.0,
"equipment_available": [
"Structured proof notebook"
],
"equipment_booked": [],
"reagents_in_stock": [
"Reference theorem library",
"Graduate reviewer"
],
"reagents_out_of_stock": [],
"staff_count": 1,
"time_limit_days": 3,
"safety_restrictions": [
"The outline should stay concise enough for seminar notes."
],
"conversation_history": [],
"current_protocol": null,
"round_number": 0,
"max_rounds": 6
}
}
}
},
"POST /step": {
"request": {
"session_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"action": {
"action_type": "propose_protocol",
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"questions": [],
"rationale": "Initial proposal using available resources."
}
},
"response_mid_episode": {
"observation": {
"scientist": {
"paper_title": "Planning a proof of the Cauchy-Schwarz inequality",
"paper_hypothesis": "A square-expansion argument gives the cleanest proof path.",
"paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.",
"experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.",
"conversation_history": [
{
"role": "scientist",
"message": "Initial proposal using available resources.",
"round_number": 1,
"action_type": "propose_protocol"
},
{
"role": "lab_manager",
"message": "Budget is within range. Equipment is available.",
"round_number": 1,
"action_type": "report_feasibility"
}
],
"current_protocol": {
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"rationale": "Initial proposal using available resources."
},
"round_number": 1,
"max_rounds": 6
},
"lab_manager": {
"budget_total": 345.0,
"budget_remaining": 345.0,
"equipment_available": [
"Structured proof notebook"
],
"equipment_booked": [],
"reagents_in_stock": [
"Reference theorem library",
"Graduate reviewer"
],
"reagents_out_of_stock": [],
"staff_count": 1,
"time_limit_days": 3,
"safety_restrictions": [
"The outline should stay concise enough for seminar notes."
],
"conversation_history": [
{
"role": "scientist",
"message": "Initial proposal using available resources.",
"round_number": 1,
"action_type": "propose_protocol"
},
{
"role": "lab_manager",
"message": "Budget is within range. Equipment is available.",
"round_number": 1,
"action_type": "report_feasibility"
}
],
"current_protocol": {
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"rationale": "Initial proposal using available resources."
},
"round_number": 1,
"max_rounds": 6
}
},
"reward": 0.0,
"done": false,
"info": {
"agreement_reached": false,
"error": null,
"reward_breakdown": null,
"judge_notes": null,
"verdict": null,
"top_failure_reasons": []
}
},
"response_terminal": {
"observation": null,
"reward": 5.42,
"done": true,
"info": {
"agreement_reached": true,
"error": null,
"reward_breakdown": {
"rigor": 0.8,
"feasibility": 0.8,
"fidelity": 0.8,
"efficiency_bonus": 0.2,
"communication_bonus": 0.1,
"penalties": {}
},
"judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).",
"verdict": "accept",
"top_failure_reasons": []
}
}
},
"GET /scenarios": {
"response": {
"scenarios": [
{
"family": "math_reasoning",
"difficulties": [
"easy",
"medium",
"hard"
]
},
{
"family": "ml_benchmark",
"difficulties": [
"easy",
"medium",
"hard"
]
},
{
"family": "finance_trading",
"difficulties": [
"easy",
"medium",
"hard"
]
}
]
}
},
"GET /replay/{episode_id}": {
"response": {
"episode_id": "ep-deadbeef-1234-5678-9abc-def012345678",
"seed": 42,
"scenario_template": "math_reasoning",
"difficulty": "easy",
"final_state": {
"seed": 42,
"scenario_template": "math_reasoning",
"difficulty": "easy",
"paper_title": "Planning a proof of the Cauchy-Schwarz inequality",
"paper_hypothesis": "A square-expansion argument gives the cleanest proof path.",
"paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.",
"experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.",
"lab_budget_total": 345.0,
"lab_budget_remaining": 345.0,
"lab_equipment": [
"Structured proof notebook"
],
"lab_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"lab_staff_count": 1,
"lab_time_limit_days": 3,
"current_protocol": null,
"conversation_history": [],
"round_number": 3,
"max_rounds": 6,
"done": true,
"agreement_reached": true,
"reward": 5.42,
"rigor_score": 0.8,
"feasibility_score": 0.8,
"fidelity_score": 0.8,
"judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).",
"verdict": "accept",
"top_failure_reasons": []
},
"transcript": [
{
"role": "scientist",
"message": "Initial proposal using available resources.",
"round_number": 1,
"action_type": "propose_protocol"
},
{
"role": "lab_manager",
"message": "Budget is within range. Equipment is available.",
"round_number": 1,
"action_type": "report_feasibility"
}
],
"reward_breakdown": {
"rigor": 0.8,
"feasibility": 0.8,
"fidelity": 0.8,
"efficiency_bonus": 0.2,
"communication_bonus": 0.1,
"penalties": {}
},
"total_reward": 5.42,
"rounds_used": 3,
"agreement_reached": true,
"judge_notes": "Rigor: 0.80 (strong) — measures structural completeness, success-criteria coverage, and required-element coverage.\nFeasibility: 0.80 (strong) — measures whether the protocol respects budget, equipment, reagent, schedule, and staffing constraints.\nFidelity: 0.80 (strong) — measures alignment with the hidden reference spec, including required elements, substitutions, and target metrics.\nEfficiency bonus: +0.20 (awarded for reaching agreement in fewer rounds).\nCommunication bonus: +0.10.\nNo penalties applied.\nTotal reward: 5.42 (formula: 10 × rigor × feasibility × fidelity + bonuses − penalties).",
"verdict": "accept",
"top_failure_reasons": []
}
}
},
"websocket": {
"reset": {
"client_sends": {
"type": "reset",
"seed": 42,
"scenario": "math_reasoning",
"difficulty": "easy"
},
"server_responds": {
"type": "reset_ok",
"episode_id": "ep-deadbeef-1234-5678-9abc-def012345678",
"observation": {
"scientist": {
"paper_title": "Planning a proof of the Cauchy-Schwarz inequality",
"paper_hypothesis": "A square-expansion argument gives the cleanest proof path.",
"paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.",
"experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.",
"conversation_history": [],
"current_protocol": null,
"round_number": 0,
"max_rounds": 6
},
"lab_manager": {
"budget_total": 345.0,
"budget_remaining": 345.0,
"equipment_available": [
"Structured proof notebook"
],
"equipment_booked": [],
"reagents_in_stock": [
"Reference theorem library",
"Graduate reviewer"
],
"reagents_out_of_stock": [],
"staff_count": 1,
"time_limit_days": 3,
"safety_restrictions": [
"The outline should stay concise enough for seminar notes."
],
"conversation_history": [],
"current_protocol": null,
"round_number": 0,
"max_rounds": 6
}
}
}
},
"step": {
"client_sends": {
"type": "step",
"action": {
"action_type": "propose_protocol",
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"questions": [],
"rationale": "Initial proposal using available resources."
}
},
"server_responds": {
"type": "step_ok",
"observation": {
"scientist": {
"paper_title": "Planning a proof of the Cauchy-Schwarz inequality",
"paper_hypothesis": "A square-expansion argument gives the cleanest proof path.",
"paper_method": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"paper_key_finding": "The proof is accepted only if every inequality step and equality case is justified.",
"experiment_goal": "Produce a proof-planning workflow for the Cauchy-Schwarz inequality for an undergraduate seminar handout.",
"conversation_history": [
{
"role": "scientist",
"message": "Initial proposal using available resources.",
"round_number": 1,
"action_type": "propose_protocol"
},
{
"role": "lab_manager",
"message": "Budget is within range. Equipment is available.",
"round_number": 1,
"action_type": "report_feasibility"
}
],
"current_protocol": {
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"rationale": "Initial proposal using available resources."
},
"round_number": 1,
"max_rounds": 6
},
"lab_manager": {
"budget_total": 345.0,
"budget_remaining": 345.0,
"equipment_available": [
"Structured proof notebook"
],
"equipment_booked": [],
"reagents_in_stock": [
"Reference theorem library",
"Graduate reviewer"
],
"reagents_out_of_stock": [],
"staff_count": 1,
"time_limit_days": 3,
"safety_restrictions": [
"The outline should stay concise enough for seminar notes."
],
"conversation_history": [
{
"role": "scientist",
"message": "Initial proposal using available resources.",
"round_number": 1,
"action_type": "propose_protocol"
},
{
"role": "lab_manager",
"message": "Budget is within range. Equipment is available.",
"round_number": 1,
"action_type": "report_feasibility"
}
],
"current_protocol": {
"sample_size": 30,
"controls": [
"positive_control",
"negative_control"
],
"technique": "Outline the proof using one algebraic identity, one equality-case check, and reviewer notes.",
"duration_days": 5,
"required_equipment": [
"Structured proof notebook"
],
"required_reagents": [
"Reference theorem library",
"Graduate reviewer"
],
"rationale": "Initial proposal using available resources."
},
"round_number": 1,
"max_rounds": 6
}
},
"reward": 0.0,
"done": false,
"info": {
"agreement_reached": false,
"error": null,
"reward_breakdown": null,
"judge_notes": null,
"verdict": null,
"top_failure_reasons": []
}
}
},
"ping": {
"client_sends": {
"type": "ping"
},
"server_responds": {
"type": "pong"
}
}
}
}