File size: 133,985 Bytes
8ad54e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
{"mode": "argument_shaping", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"search query\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"query\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"integer\",\n      \"default_value\": 5,\n      \"description\": \"maximum hits\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"top_k\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 1,\n    \"sandbox_cost_estimate\": 0,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 2500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"network\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": false,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Search tool under runtime/network/validator discipline.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"retrieved evidence bundle\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"results\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"search execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"network\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"open_web\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Search public web sources and return evidence candidates.\",\n  \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by browser_search.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}}
{"mode": "harness_envelope", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=harness_envelope\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"search query\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"query\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"integer\",\n      \"default_value\": 5,\n      \"description\": \"maximum hits\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"top_k\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 1,\n    \"sandbox_cost_estimate\": 0,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 2500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"network\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": false,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Search tool under runtime/network/validator discipline.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"retrieved evidence bundle\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"results\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"search execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"network\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"open_web\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Search public web sources and return evidence candidates.\",\n  \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nWithin the full harness envelope, determine the compliant behavior of browser_search under control-plane priority.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "harness_envelope", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "harness_envelope", "focus": "harness_envelope", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"search query\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"query\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"integer\",\n      \"default_value\": 5,\n      \"description\": \"maximum hits\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"top_k\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 1,\n    \"sandbox_cost_estimate\": 0,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 2500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"network\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": false,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Search tool under runtime/network/validator discipline.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"retrieved evidence bundle\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"results\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"search execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"network\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"open_web\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Search public web sources and return evidence candidates.\",\n  \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if browser_search is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}}
{"mode": "argument_shaping", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"search query\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"query\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"integer\",\n      \"default_value\": 5,\n      \"description\": \"maximum hits\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"top_k\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 1,\n    \"sandbox_cost_estimate\": 0,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 2500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"network\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": false,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Search tool under runtime/network/validator discipline.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"retrieved evidence bundle\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"results\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"search execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"network\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"open_web\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Search public web sources and return evidence candidates.\",\n  \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nProduce arguments that fit browser_search and would fail if interpreted as repo_reverse_engineer.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 1}}
{"mode": "precondition_gating", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"search query\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"query\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"integer\",\n      \"default_value\": 5,\n      \"description\": \"maximum hits\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"top_k\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 1,\n    \"sandbox_cost_estimate\": 0,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 2500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"network\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": false,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Search tool under runtime/network/validator discipline.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"retrieved evidence bundle\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"results\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"search execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"network\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"open_web\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Search public web sources and return evidence candidates.\",\n  \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that browser_search should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"patch artifact ref\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"artifact_ref\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"boolean\",\n      \"default_value\": false,\n      \"description\": \"dry run flag\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"dry_run\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 4000,\n    \"token_cost_estimate\": 64,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"patch\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": false,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": true,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": true,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"write\",\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": true,\n    \"supports_resume\": true\n  },\n  \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n  \"invocation_mode\": \"mutating\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\",\n      \"sandbox_rollback_bridge\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"workspace diff\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"patch\",\n      \"name\": \"workspace_diff\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"patch receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"rolled_back\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"write\",\n      \"worktree\",\n      \"patch\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": true,\n    \"requires_clean_worktree\": true,\n    \"requires_confirmation\": true,\n    \"requires_evidence_before_trigger\": true,\n    \"risk_band\": \"high\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"partial\",\n    \"supports_rollback\": true\n  },\n  \"summary\": \"Apply a validated patch bundle to the workspace.\",\n  \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if apply_patch_bundle is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}}
{"mode": "receipt_and_validation", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=receipt_validation\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"patch artifact ref\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"artifact_ref\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"boolean\",\n      \"default_value\": false,\n      \"description\": \"dry run flag\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"dry_run\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 4000,\n    \"token_cost_estimate\": 64,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"patch\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": false,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": true,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": true,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"write\",\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": true,\n    \"supports_resume\": true\n  },\n  \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n  \"invocation_mode\": \"mutating\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\",\n      \"sandbox_rollback_bridge\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"workspace diff\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"patch\",\n      \"name\": \"workspace_diff\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"patch receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"rolled_back\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"write\",\n      \"worktree\",\n      \"patch\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": true,\n    \"requires_clean_worktree\": true,\n    \"requires_confirmation\": true,\n    \"requires_evidence_before_trigger\": true,\n    \"risk_band\": \"high\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"partial\",\n    \"supports_rollback\": true\n  },\n  \"summary\": \"Apply a validated patch bundle to the workspace.\",\n  \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEmit the bounded receipt bundle that proves apply_patch_bundle complied with validator and permission policy.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "receipt_and_validation", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "receipt_and_validation", "focus": "receipt_validation", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"patch artifact ref\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"artifact_ref\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"boolean\",\n      \"default_value\": false,\n      \"description\": \"dry run flag\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"dry_run\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 4000,\n    \"token_cost_estimate\": 64,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"patch\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": false,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": true,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": true,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"write\",\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": true,\n    \"supports_resume\": true\n  },\n  \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n  \"invocation_mode\": \"mutating\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\",\n      \"sandbox_rollback_bridge\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"workspace diff\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"patch\",\n      \"name\": \"workspace_diff\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"patch receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"rolled_back\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"write\",\n      \"worktree\",\n      \"patch\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": true,\n    \"requires_clean_worktree\": true,\n    \"requires_confirmation\": true,\n    \"requires_evidence_before_trigger\": true,\n    \"risk_band\": \"high\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"partial\",\n    \"supports_rollback\": true\n  },\n  \"summary\": \"Apply a validated patch bundle to the workspace.\",\n  \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nReject run_ci_validation and prefer apply_patch_bundle only when the contract and capability truly align with this request.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 1}}
{"mode": "argument_shaping", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"patch artifact ref\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"artifact_ref\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"boolean\",\n      \"default_value\": false,\n      \"description\": \"dry run flag\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"dry_run\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 4000,\n    \"token_cost_estimate\": 64,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"patch\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": false,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": true,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": true,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"write\",\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": true,\n    \"supports_resume\": true\n  },\n  \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n  \"invocation_mode\": \"mutating\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\",\n      \"sandbox_rollback_bridge\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"workspace diff\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"patch\",\n      \"name\": \"workspace_diff\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"patch receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"rolled_back\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"write\",\n      \"worktree\",\n      \"patch\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": true,\n    \"requires_clean_worktree\": true,\n    \"requires_confirmation\": true,\n    \"requires_evidence_before_trigger\": true,\n    \"risk_band\": \"high\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"partial\",\n    \"supports_rollback\": true\n  },\n  \"summary\": \"Apply a validated patch bundle to the workspace.\",\n  \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by apply_patch_bundle.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}}
{"mode": "precondition_gating", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"patch artifact ref\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"artifact_ref\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"boolean\",\n      \"default_value\": false,\n      \"description\": \"dry run flag\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"dry_run\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 4000,\n    \"token_cost_estimate\": 64,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"patch\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": false,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": true,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": true,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"write\",\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": true,\n    \"supports_resume\": true\n  },\n  \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n  \"invocation_mode\": \"mutating\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\",\n      \"sandbox_rollback_bridge\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"workspace diff\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"patch\",\n      \"name\": \"workspace_diff\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"patch receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": true,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"rolled_back\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"write\",\n      \"worktree\",\n      \"patch\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": true,\n    \"requires_clean_worktree\": true,\n    \"requires_confirmation\": true,\n    \"requires_evidence_before_trigger\": true,\n    \"risk_band\": \"high\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"partial\",\n    \"supports_rollback\": true\n  },\n  \"summary\": \"Apply a validated patch bundle to the workspace.\",\n  \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that apply_patch_bundle should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}}
{"mode": "precondition_gating", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": \"pytest -q\",\n      \"description\": \"test selector\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"test_selector\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 8000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Read-only validation tool under workspace / session discipline.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"execution_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"validate\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"worktree\",\n      \"validate\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n  \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that run_ci_validation should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}}
{"mode": "worktree_permission", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=worktree_permission\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": \"pytest -q\",\n      \"description\": \"test selector\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"test_selector\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 8000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Read-only validation tool under workspace / session discipline.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"execution_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"validate\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"worktree\",\n      \"validate\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n  \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nJudge run_ci_validation under strict worktree and permission discipline, including when it should defer instead of acting.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "worktree_permission", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "worktree_permission", "focus": "worktree_permission", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": \"pytest -q\",\n      \"description\": \"test selector\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"test_selector\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 8000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Read-only validation tool under workspace / session discipline.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"execution_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"validate\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"worktree\",\n      \"validate\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n  \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if run_ci_validation is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}}
{"mode": "argument_shaping", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": \"pytest -q\",\n      \"description\": \"test selector\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"test_selector\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 8000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Read-only validation tool under workspace / session discipline.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"execution_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"validate\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"worktree\",\n      \"validate\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n  \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by run_ci_validation.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}}
{"mode": "precondition_gating", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": \"pytest -q\",\n      \"description\": \"test selector\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"test_selector\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 8000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Read-only validation tool under workspace / session discipline.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"execution receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"execution_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"validate\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"worktree\",\n      \"validate\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n  \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEven if apply_patch_bundle looks tempting, keep run_ci_validation gated until the permission and worktree envelope is valid.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 1}}
{"mode": "self_check_loop", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=self_check_loop\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to validate\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"entry_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 6000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"self_check_loop\": true,\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"bounded failure explanation\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"text\",\n      \"name\": \"failure_report\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"validate\",\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n  \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nAfter code generation, run the strict one-cycle self-check path and only escalate to the next bounded repair step if validation fails.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "self_check_loop", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "self_check_loop", "focus": "self_check_loop", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to validate\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"entry_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 6000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"self_check_loop\": true,\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"bounded failure explanation\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"text\",\n      \"name\": \"failure_report\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"validate\",\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n  \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if self_check_repair_loop is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}}
{"mode": "argument_shaping", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to validate\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"entry_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 6000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"self_check_loop\": true,\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"bounded failure explanation\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"text\",\n      \"name\": \"failure_report\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"validate\",\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n  \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by self_check_repair_loop.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}}
{"mode": "precondition_gating", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to validate\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"entry_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 6000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"self_check_loop\": true,\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"bounded failure explanation\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"text\",\n      \"name\": \"failure_report\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"validate\",\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n  \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that self_check_repair_loop should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}}
{"mode": "receipt_and_validation", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=receipt_validation\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to validate\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"entry_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 6000,\n    \"token_cost_estimate\": 96,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"validator\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": false,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": true,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"execute\",\n      \"validate\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n  \"invocation_mode\": \"executing\",\n  \"metadata\": {\n    \"self_check_loop\": true,\n    \"validator_names\": [\n      \"sandbox_result_validator\"\n    ]\n  },\n  \"outputs\": [\n    {\n      \"description\": \"validator receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"validation_receipt\",\n      \"name\": \"validator_receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    },\n    {\n      \"description\": \"bounded failure explanation\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"text\",\n      \"name\": \"failure_report\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"repair\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"execute\",\n      \"validate\",\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"medium\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n  \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEmit the bounded receipt bundle that proves self_check_repair_loop complied with validator and permission policy.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "receipt_and_validation", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "receipt_and_validation", "focus": "receipt_validation", "split": "eval", "variant_rank": 0}}
{"mode": "reverse_engineering", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=reverse_engineering\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to inspect\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"focus_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 3500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": false,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"reverse_engineering\": true\n  },\n  \"outputs\": [\n    {\n      \"description\": \"bounded repository map\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"repo_map\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"inspection receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"low\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n  \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUse bounded repository inspection to reconstruct missing intent and architecture facts before any tool fantasy appears.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "reverse_engineering", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "reverse_engineering", "focus": "reverse_engineering", "split": "eval", "variant_rank": 0}}
{"mode": "runtime_session", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=runtime_session\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to inspect\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"focus_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 3500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": false,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"reverse_engineering\": true\n  },\n  \"outputs\": [\n    {\n      \"description\": \"bounded repository map\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"repo_map\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"inspection receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"low\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n  \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nIdentify the session continuity obligations that surround repo_reverse_engineer in a multi-step harness turn.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "runtime_session", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "runtime_session", "focus": "runtime_session", "split": "eval", "variant_rank": 0}}
{"mode": "tool_trigger", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to inspect\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"focus_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 3500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": false,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"reverse_engineering\": true\n  },\n  \"outputs\": [\n    {\n      \"description\": \"bounded repository map\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"repo_map\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"inspection receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"low\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n  \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if repo_reverse_engineer is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}}
{"mode": "argument_shaping", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to inspect\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"focus_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 3500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": false,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"reverse_engineering\": true\n  },\n  \"outputs\": [\n    {\n      \"description\": \"bounded repository map\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"repo_map\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"inspection receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"low\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n  \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by repo_reverse_engineer.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}}
{"mode": "precondition_gating", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n  \"arguments\": [\n    {\n      \"arg_type\": \"string\",\n      \"default_value\": null,\n      \"description\": \"workspace root\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"workspace_root\",\n      \"nullable\": false,\n      \"path_like\": true,\n      \"repeated\": false,\n      \"required\": true,\n      \"sensitive\": false,\n      \"workspace_scoped\": true\n    },\n    {\n      \"arg_type\": \"array\",\n      \"default_value\": [],\n      \"description\": \"paths to inspect\",\n      \"enum_values\": [],\n      \"examples\": [],\n      \"location\": \"body\",\n      \"name\": \"focus_paths\",\n      \"nullable\": false,\n      \"path_like\": false,\n      \"repeated\": false,\n      \"required\": false,\n      \"sensitive\": false,\n      \"workspace_scoped\": false\n    }\n  ],\n  \"budget_hint\": {\n    \"network_cost_estimate\": 0,\n    \"sandbox_cost_estimate\": 1,\n    \"step_cost_estimate\": 1,\n    \"time_cost_estimate_ms\": 3500,\n    \"token_cost_estimate\": 128,\n    \"tool_call_cost_estimate\": 1\n  },\n  \"capability\": \"search\",\n  \"constraints\": {\n    \"allowed_isolation_modes\": [],\n    \"allowed_path_prefixes\": [],\n    \"approval_modes\": [],\n    \"auto_mode_allowed\": true,\n    \"emits_evidence_ids\": true,\n    \"emits_patch_ref\": false,\n    \"emits_receipt\": true,\n    \"emits_validation_receipt\": false,\n    \"forbidden_path_prefixes\": [],\n    \"mutates_workspace\": false,\n    \"plan_mode_allowed\": true,\n    \"required_permissions\": [\n      \"worktree\"\n    ],\n    \"required_task_statuses\": [],\n    \"requires_worktree\": true,\n    \"supports_compact_boundary\": false,\n    \"supports_resume\": true\n  },\n  \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n  \"invocation_mode\": \"read_only\",\n  \"metadata\": {\n    \"reverse_engineering\": true\n  },\n  \"outputs\": [\n    {\n      \"description\": \"bounded repository map\",\n      \"evidence_backwrite\": true,\n      \"kind\": \"evidence\",\n      \"name\": \"repo_map\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": false\n    },\n    {\n      \"description\": \"inspection receipt\",\n      \"evidence_backwrite\": false,\n      \"kind\": \"execution_receipt\",\n      \"name\": \"receipt\",\n      \"patch_relevant\": false,\n      \"reproducible\": true,\n      \"required\": true,\n      \"task_output_relevant\": false,\n      \"validator_relevant\": true\n    }\n  ],\n  \"phase\": \"execute\",\n  \"receipt_policy\": {\n    \"include_artifact_refs\": true,\n    \"include_budget_charge\": true,\n    \"include_evidence_ids\": true,\n    \"include_patch_refs\": true,\n    \"include_permission_decision\": true,\n    \"include_raw_value\": false,\n    \"include_task_output_refs\": true,\n    \"include_validator_refs\": true,\n    \"include_worktree_ref\": true,\n    \"receipt_statuses\": [\n      \"succeeded\",\n      \"failed\",\n      \"blocked\"\n    ]\n  },\n  \"risk_profile\": {\n    \"allowed_permissions\": [\n      \"worktree\",\n      \"session\"\n    ],\n    \"blocks_on_conflict\": false,\n    \"requires_clean_worktree\": false,\n    \"requires_confirmation\": false,\n    \"requires_evidence_before_trigger\": false,\n    \"risk_band\": \"low\",\n    \"sensitive_domains\": [\n      \"workspace\"\n    ],\n    \"side_effect_level\": \"none\",\n    \"supports_rollback\": false\n  },\n  \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n  \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that repo_reverse_engineer should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}}