[
  "tests/test_api_reset.py::test_reset_defaults_to_cascading_timeout_when_task_missing",
  "tests/test_api_reset.py::test_reset_rejects_unknown_explicit_task",
  "tests/test_env_task_context.py::test_job_generator_rate_resolved_uses_config",
  "tests/test_env_task_context.py::test_registry_auth_matches_default_detects_corruption",
  "tests/test_env_task_context.py::test_restore_defaults_adds_job_generator_config",
  "tests/test_graders.py::test_grade_backpressure_cascade_continuous",
  "tests/test_graders.py::test_grade_byzantine_queue_fault_cases",
  "tests/test_graders.py::test_grade_cascading_timeout_boundaries",
  "tests/test_graders.py::test_grade_distributed_lock_starvation_cases",
  "tests/test_graders.py::test_grade_job_generator_runaway_thresholds",
  "tests/test_graders.py::test_grade_registry_corruption_thresholds",
  "tests/test_graders.py::test_grade_route_partition_threshold",
  "tests/test_inference_format.py::test_attempt_history_block_renders_all_attempts",
  "tests/test_inference_format.py::test_build_prompt_contains_symptoms_and_history",
  "tests/test_inference_format.py::test_end_log_line_includes_score_and_reward_list",
  "tests/test_inference_format.py::test_episode_score_clamps_terminal_reward_to_unit_interval",
  "tests/test_inference_format.py::test_extract_command_prefers_first_json_object_with_command",
  "tests/test_inference_format.py::test_extract_command_reads_fenced_json_payload",
  "tests/test_inference_format.py::test_extract_command_reads_json_after_reasoning_preamble",
  "tests/test_inference_format.py::test_extract_command_reads_json_embedded_in_text",
  "tests/test_inference_format.py::test_extract_command_reads_json_payload",
  "tests/test_inference_format.py::test_extract_command_rejects_non_json_code_fence",
  "tests/test_inference_format.py::test_extract_command_requires_command_even_with_reasoning",
  "tests/test_inference_format.py::test_extract_command_returns_none_when_empty",
  "tests/test_inference_format.py::test_extract_reasoning_when_present",
  "tests/test_inference_format.py::test_parse_tasks_default_and_override",
  "tests/test_inference_format.py::test_single_line_removes_newlines",
  "tests/test_inference_format.py::test_task_symptom_block_includes_new_tasks",
  "tests/test_inference_format.py::test_task_symptom_block_is_non_empty",
  "tests/test_models.py::test_action_rejects_empty_command",
  "tests/test_models.py::test_observation_roundtrip",
  "tests/test_models.py::test_system_metrics_rejects_success_rate_above_one"
]