| {"mode": "argument_shaping", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"search query\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"query\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"integer\",\n \"default_value\": 5,\n \"description\": \"maximum hits\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"top_k\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 1,\n \"sandbox_cost_estimate\": 0,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 2500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"network\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": false,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Search tool under runtime/network/validator discipline.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"retrieved evidence bundle\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"results\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"search execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"network\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"open_web\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Search public web sources and return evidence candidates.\",\n \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by browser_search.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}} |
| {"mode": "harness_envelope", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=harness_envelope\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"search query\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"query\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"integer\",\n \"default_value\": 5,\n \"description\": \"maximum hits\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"top_k\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 1,\n \"sandbox_cost_estimate\": 0,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 2500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"network\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": false,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Search tool under runtime/network/validator discipline.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"retrieved evidence bundle\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"results\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"search execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"network\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"open_web\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Search public web sources and return evidence candidates.\",\n \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nWithin the full harness envelope, determine the compliant behavior of browser_search under control-plane priority.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "harness_envelope", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "harness_envelope", "focus": "harness_envelope", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"search query\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"query\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"integer\",\n \"default_value\": 5,\n \"description\": \"maximum hits\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"top_k\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 1,\n \"sandbox_cost_estimate\": 0,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 2500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"network\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": false,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Search tool under runtime/network/validator discipline.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"retrieved evidence bundle\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"results\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"search execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"network\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"open_web\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Search public web sources and return evidence candidates.\",\n \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if browser_search is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}} |
| {"mode": "argument_shaping", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"search query\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"query\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"integer\",\n \"default_value\": 5,\n \"description\": \"maximum hits\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"top_k\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 1,\n \"sandbox_cost_estimate\": 0,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 2500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"network\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": false,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Search tool under runtime/network/validator discipline.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"retrieved evidence bundle\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"results\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"search execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"network\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"open_web\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Search public web sources and return evidence candidates.\",\n \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nProduce arguments that fit browser_search and would fail if interpreted as repo_reverse_engineer.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 1}} |
| {"mode": "precondition_gating", "tool_name": "browser_search", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"search query\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"query\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"integer\",\n \"default_value\": 5,\n \"description\": \"maximum hits\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"top_k\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 1,\n \"sandbox_cost_estimate\": 0,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 2500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"network\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": false,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Search tool under runtime/network/validator discipline.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"retrieved evidence bundle\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"results\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"search execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"network\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"open_web\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Search public web sources and return evidence candidates.\",\n \"tool_name\": \"browser_search\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that browser_search should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "browser_search", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 0, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]}, "metadata": {"tool_name": "browser_search", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"patch artifact ref\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"artifact_ref\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"boolean\",\n \"default_value\": false,\n \"description\": \"dry run flag\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"dry_run\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 4000,\n \"token_cost_estimate\": 64,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"patch\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": false,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": true,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": true,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"write\",\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": true,\n \"supports_resume\": true\n },\n \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n \"invocation_mode\": \"mutating\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\",\n \"sandbox_rollback_bridge\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"workspace diff\",\n \"evidence_backwrite\": false,\n \"kind\": \"patch\",\n \"name\": \"workspace_diff\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"patch receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"rolled_back\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"write\",\n \"worktree\",\n \"patch\",\n \"session\"\n ],\n \"blocks_on_conflict\": true,\n \"requires_clean_worktree\": true,\n \"requires_confirmation\": true,\n \"requires_evidence_before_trigger\": true,\n \"risk_band\": \"high\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"partial\",\n \"supports_rollback\": true\n },\n \"summary\": \"Apply a validated patch bundle to the workspace.\",\n \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if apply_patch_bundle is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}} |
| {"mode": "receipt_and_validation", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=receipt_validation\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"patch artifact ref\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"artifact_ref\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"boolean\",\n \"default_value\": false,\n \"description\": \"dry run flag\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"dry_run\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 4000,\n \"token_cost_estimate\": 64,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"patch\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": false,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": true,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": true,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"write\",\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": true,\n \"supports_resume\": true\n },\n \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n \"invocation_mode\": \"mutating\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\",\n \"sandbox_rollback_bridge\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"workspace diff\",\n \"evidence_backwrite\": false,\n \"kind\": \"patch\",\n \"name\": \"workspace_diff\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"patch receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"rolled_back\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"write\",\n \"worktree\",\n \"patch\",\n \"session\"\n ],\n \"blocks_on_conflict\": true,\n \"requires_clean_worktree\": true,\n \"requires_confirmation\": true,\n \"requires_evidence_before_trigger\": true,\n \"risk_band\": \"high\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"partial\",\n \"supports_rollback\": true\n },\n \"summary\": \"Apply a validated patch bundle to the workspace.\",\n \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEmit the bounded receipt bundle that proves apply_patch_bundle complied with validator and permission policy.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "receipt_and_validation", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "receipt_and_validation", "focus": "receipt_validation", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"patch artifact ref\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"artifact_ref\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"boolean\",\n \"default_value\": false,\n \"description\": \"dry run flag\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"dry_run\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 4000,\n \"token_cost_estimate\": 64,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"patch\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": false,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": true,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": true,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"write\",\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": true,\n \"supports_resume\": true\n },\n \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n \"invocation_mode\": \"mutating\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\",\n \"sandbox_rollback_bridge\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"workspace diff\",\n \"evidence_backwrite\": false,\n \"kind\": \"patch\",\n \"name\": \"workspace_diff\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"patch receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"rolled_back\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"write\",\n \"worktree\",\n \"patch\",\n \"session\"\n ],\n \"blocks_on_conflict\": true,\n \"requires_clean_worktree\": true,\n \"requires_confirmation\": true,\n \"requires_evidence_before_trigger\": true,\n \"risk_band\": \"high\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"partial\",\n \"supports_rollback\": true\n },\n \"summary\": \"Apply a validated patch bundle to the workspace.\",\n \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nReject run_ci_validation and prefer apply_patch_bundle only when the contract and capability truly align with this request.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 1}} |
| {"mode": "argument_shaping", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"patch artifact ref\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"artifact_ref\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"boolean\",\n \"default_value\": false,\n \"description\": \"dry run flag\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"dry_run\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 4000,\n \"token_cost_estimate\": 64,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"patch\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": false,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": true,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": true,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"write\",\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": true,\n \"supports_resume\": true\n },\n \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n \"invocation_mode\": \"mutating\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\",\n \"sandbox_rollback_bridge\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"workspace diff\",\n \"evidence_backwrite\": false,\n \"kind\": \"patch\",\n \"name\": \"workspace_diff\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"patch receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"rolled_back\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"write\",\n \"worktree\",\n \"patch\",\n \"session\"\n ],\n \"blocks_on_conflict\": true,\n \"requires_clean_worktree\": true,\n \"requires_confirmation\": true,\n \"requires_evidence_before_trigger\": true,\n \"risk_band\": \"high\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"partial\",\n \"supports_rollback\": true\n },\n \"summary\": \"Apply a validated patch bundle to the workspace.\",\n \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by apply_patch_bundle.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}} |
| {"mode": "precondition_gating", "tool_name": "apply_patch_bundle", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"patch artifact ref\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"artifact_ref\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": false\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"boolean\",\n \"default_value\": false,\n \"description\": \"dry run flag\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"dry_run\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 4000,\n \"token_cost_estimate\": 64,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"patch\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": false,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": true,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": true,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"write\",\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": true,\n \"supports_resume\": true\n },\n \"description\": \"Mutating patch tool under worktree / snapshot / validator discipline.\",\n \"invocation_mode\": \"mutating\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\",\n \"sandbox_rollback_bridge\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"workspace diff\",\n \"evidence_backwrite\": false,\n \"kind\": \"patch\",\n \"name\": \"workspace_diff\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"patch receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": true,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"rolled_back\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"write\",\n \"worktree\",\n \"patch\",\n \"session\"\n ],\n \"blocks_on_conflict\": true,\n \"requires_clean_worktree\": true,\n \"requires_confirmation\": true,\n \"requires_evidence_before_trigger\": true,\n \"risk_band\": \"high\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"partial\",\n \"supports_rollback\": true\n },\n \"summary\": \"Apply a validated patch bundle to the workspace.\",\n \"tool_name\": \"apply_patch_bundle\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that apply_patch_bundle should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "apply_patch_bundle", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 1, "confirmation_required": 1, "session_required": 1, "permission_bits": [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1]}, "metadata": {"tool_name": "apply_patch_bundle", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}} |
| {"mode": "precondition_gating", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": \"pytest -q\",\n \"description\": \"test selector\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"test_selector\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 8000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Read-only validation tool under workspace / session discipline.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"execution_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"validate\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"worktree\",\n \"validate\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that run_ci_validation should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}} |
| {"mode": "worktree_permission", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=worktree_permission\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": \"pytest -q\",\n \"description\": \"test selector\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"test_selector\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 8000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Read-only validation tool under workspace / session discipline.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"execution_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"validate\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"worktree\",\n \"validate\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nJudge run_ci_validation under strict worktree and permission discipline, including when it should defer instead of acting.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "worktree_permission", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "worktree_permission", "focus": "worktree_permission", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": \"pytest -q\",\n \"description\": \"test selector\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"test_selector\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 8000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Read-only validation tool under workspace / session discipline.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"execution_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"validate\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"worktree\",\n \"validate\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if run_ci_validation is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}} |
| {"mode": "argument_shaping", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": \"pytest -q\",\n \"description\": \"test selector\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"test_selector\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 8000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Read-only validation tool under workspace / session discipline.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"execution_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"validate\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"worktree\",\n \"validate\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by run_ci_validation.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}} |
| {"mode": "precondition_gating", "tool_name": "run_ci_validation", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"string\",\n \"default_value\": \"pytest -q\",\n \"description\": \"test selector\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"test_selector\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 8000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Read-only validation tool under workspace / session discipline.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"execution receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"execution_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"validate\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"worktree\",\n \"validate\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded CI/test validation and emit validator-compatible receipts.\",\n \"tool_name\": \"run_ci_validation\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEven if apply_patch_bundle looks tempting, keep run_ci_validation gated until the permission and worktree envelope is valid.\n[/USER_QUERY]", "labels": {"tool_name": "run_ci_validation", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "run_ci_validation", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 1}} |
| {"mode": "self_check_loop", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=self_check_loop\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to validate\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"entry_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 6000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"self_check_loop\": true,\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"bounded failure explanation\",\n \"evidence_backwrite\": false,\n \"kind\": \"text\",\n \"name\": \"failure_report\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"validate\",\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nAfter code generation, run the strict one-cycle self-check path and only escalate to the next bounded repair step if validation fails.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "self_check_loop", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "self_check_loop", "focus": "self_check_loop", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to validate\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"entry_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 6000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"self_check_loop\": true,\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"bounded failure explanation\",\n \"evidence_backwrite\": false,\n \"kind\": \"text\",\n \"name\": \"failure_report\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"validate\",\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if self_check_repair_loop is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}} |
| {"mode": "argument_shaping", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to validate\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"entry_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 6000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"self_check_loop\": true,\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"bounded failure explanation\",\n \"evidence_backwrite\": false,\n \"kind\": \"text\",\n \"name\": \"failure_report\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"validate\",\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by self_check_repair_loop.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}} |
| {"mode": "precondition_gating", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to validate\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"entry_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 6000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"self_check_loop\": true,\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"bounded failure explanation\",\n \"evidence_backwrite\": false,\n \"kind\": \"text\",\n \"name\": \"failure_report\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"validate\",\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that self_check_repair_loop should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}} |
| {"mode": "receipt_and_validation", "tool_name": "self_check_repair_loop", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=receipt_validation\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to validate\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"entry_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 6000,\n \"token_cost_estimate\": 96,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"validator\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": false,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": true,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"execute\",\n \"validate\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Self-check loop tool for code-after-generation verification and bounded repair routing.\",\n \"invocation_mode\": \"executing\",\n \"metadata\": {\n \"self_check_loop\": true,\n \"validator_names\": [\n \"sandbox_result_validator\"\n ]\n },\n \"outputs\": [\n {\n \"description\": \"validator receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"validation_receipt\",\n \"name\": \"validator_receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n },\n {\n \"description\": \"bounded failure explanation\",\n \"evidence_backwrite\": false,\n \"kind\": \"text\",\n \"name\": \"failure_report\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"repair\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"execute\",\n \"validate\",\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"medium\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Run bounded self-check, diagnose the failed constraint, and prepare the next repair step.\",\n \"tool_name\": \"self_check_repair_loop\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nEmit the bounded receipt bundle that proves self_check_repair_loop complied with validator and permission policy.\n[/USER_QUERY]", "labels": {"tool_name": "self_check_repair_loop", "mode_name": "receipt_and_validation", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 1, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0]}, "metadata": {"tool_name": "self_check_repair_loop", "mode": "receipt_and_validation", "focus": "receipt_validation", "split": "eval", "variant_rank": 0}} |
| {"mode": "reverse_engineering", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=reverse_engineering\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to inspect\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"focus_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 3500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": false,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"reverse_engineering\": true\n },\n \"outputs\": [\n {\n \"description\": \"bounded repository map\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"repo_map\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"inspection receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"low\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUse bounded repository inspection to reconstruct missing intent and architecture facts before any tool fantasy appears.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "reverse_engineering", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "reverse_engineering", "focus": "reverse_engineering", "split": "eval", "variant_rank": 0}} |
| {"mode": "runtime_session", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=runtime_session\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to inspect\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"focus_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 3500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": false,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"reverse_engineering\": true\n },\n \"outputs\": [\n {\n \"description\": \"bounded repository map\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"repo_map\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"inspection receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"low\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nIdentify the session continuity obligations that surround repo_reverse_engineer in a multi-step harness turn.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "runtime_session", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "runtime_session", "focus": "runtime_session", "split": "eval", "variant_rank": 0}} |
| {"mode": "tool_trigger", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=trigger\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to inspect\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"focus_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 3500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": false,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"reverse_engineering\": true\n },\n \"outputs\": [\n {\n \"description\": \"bounded repository map\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"repo_map\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"inspection receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"low\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nUnder the same harness rules, judge if repo_reverse_engineer is the next valid tool call rather than free-form text.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "tool_trigger", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "tool_trigger", "focus": "trigger", "split": "eval", "variant_rank": 0}} |
| {"mode": "argument_shaping", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=arguments\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to inspect\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"focus_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 3500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": false,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"reverse_engineering\": true\n },\n \"outputs\": [\n {\n \"description\": \"bounded repository map\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"repo_map\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"inspection receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"low\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nNormalize the user intent into the strict argument schema expected by repo_reverse_engineer.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "argument_shaping", "trigger": 1, "schema_ok": 1, "precondition_ok": 1, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "argument_shaping", "focus": "arguments", "split": "eval", "variant_rank": 0}} |
| {"mode": "precondition_gating", "tool_name": "repo_reverse_engineer", "user_query": "[HARNESS_ENVELOPE]\nrole=generator\nruntime_protocol=true\ntyped_tool_contracts=true\nvalidator_alignment=true\nsession_continuity=true\npermission_alignment=true\nworktree_discipline=true\nrepair_after_self_check=true\nreverse_engineering_ready=true\nrule=Do not emit free-form tool fantasies. Follow the typed tool contract exactly.\nrule=If preconditions are not satisfied, defer, repair, or ask for the next bounded step.\nrule=Validator receipts and runtime permissions outrank narration.\nrule=If code is produced, the harness may still call a bounded self-check loop before final submit.\nrule=When repository intent is unclear, reverse-engineering and repository inspection are valid bounded actions.\ncontract_focus=precondition_gating\n[/HARNESS_ENVELOPE]\n\n[TOOL_SPEC]\n{\n \"arguments\": [\n {\n \"arg_type\": \"string\",\n \"default_value\": null,\n \"description\": \"workspace root\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"workspace_root\",\n \"nullable\": false,\n \"path_like\": true,\n \"repeated\": false,\n \"required\": true,\n \"sensitive\": false,\n \"workspace_scoped\": true\n },\n {\n \"arg_type\": \"array\",\n \"default_value\": [],\n \"description\": \"paths to inspect\",\n \"enum_values\": [],\n \"examples\": [],\n \"location\": \"body\",\n \"name\": \"focus_paths\",\n \"nullable\": false,\n \"path_like\": false,\n \"repeated\": false,\n \"required\": false,\n \"sensitive\": false,\n \"workspace_scoped\": false\n }\n ],\n \"budget_hint\": {\n \"network_cost_estimate\": 0,\n \"sandbox_cost_estimate\": 1,\n \"step_cost_estimate\": 1,\n \"time_cost_estimate_ms\": 3500,\n \"token_cost_estimate\": 128,\n \"tool_call_cost_estimate\": 1\n },\n \"capability\": \"search\",\n \"constraints\": {\n \"allowed_isolation_modes\": [],\n \"allowed_path_prefixes\": [],\n \"approval_modes\": [],\n \"auto_mode_allowed\": true,\n \"emits_evidence_ids\": true,\n \"emits_patch_ref\": false,\n \"emits_receipt\": true,\n \"emits_validation_receipt\": false,\n \"forbidden_path_prefixes\": [],\n \"mutates_workspace\": false,\n \"plan_mode_allowed\": true,\n \"required_permissions\": [\n \"worktree\"\n ],\n \"required_task_statuses\": [],\n \"requires_worktree\": true,\n \"supports_compact_boundary\": false,\n \"supports_resume\": true\n },\n \"description\": \"Repository reverse-engineering tool for code understanding, dependency tracing, and intent recovery.\",\n \"invocation_mode\": \"read_only\",\n \"metadata\": {\n \"reverse_engineering\": true\n },\n \"outputs\": [\n {\n \"description\": \"bounded repository map\",\n \"evidence_backwrite\": true,\n \"kind\": \"evidence\",\n \"name\": \"repo_map\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": false\n },\n {\n \"description\": \"inspection receipt\",\n \"evidence_backwrite\": false,\n \"kind\": \"execution_receipt\",\n \"name\": \"receipt\",\n \"patch_relevant\": false,\n \"reproducible\": true,\n \"required\": true,\n \"task_output_relevant\": false,\n \"validator_relevant\": true\n }\n ],\n \"phase\": \"execute\",\n \"receipt_policy\": {\n \"include_artifact_refs\": true,\n \"include_budget_charge\": true,\n \"include_evidence_ids\": true,\n \"include_patch_refs\": true,\n \"include_permission_decision\": true,\n \"include_raw_value\": false,\n \"include_task_output_refs\": true,\n \"include_validator_refs\": true,\n \"include_worktree_ref\": true,\n \"receipt_statuses\": [\n \"succeeded\",\n \"failed\",\n \"blocked\"\n ]\n },\n \"risk_profile\": {\n \"allowed_permissions\": [\n \"worktree\",\n \"session\"\n ],\n \"blocks_on_conflict\": false,\n \"requires_clean_worktree\": false,\n \"requires_confirmation\": false,\n \"requires_evidence_before_trigger\": false,\n \"risk_band\": \"low\",\n \"sensitive_domains\": [\n \"workspace\"\n ],\n \"side_effect_level\": \"none\",\n \"supports_rollback\": false\n },\n \"summary\": \"Inspect repository structure, infer missing intent, and reconstruct bounded architecture facts.\",\n \"tool_name\": \"repo_reverse_engineer\"\n}\n[/TOOL_SPEC]\n\n[USER_QUERY]\nDetect that repo_reverse_engineer should not fire because the required runtime preconditions are still unsatisfied.\n[/USER_QUERY]", "labels": {"tool_name": "repo_reverse_engineer", "mode_name": "precondition_gating", "trigger": 0, "schema_ok": 0, "precondition_ok": 0, "validator_required": 0, "worktree_required": 1, "rollback_supported": 0, "confirmation_required": 0, "session_required": 1, "permission_bits": [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]}, "metadata": {"tool_name": "repo_reverse_engineer", "mode": "precondition_gating", "focus": "precondition_gating", "split": "eval", "variant_rank": 0}} |
|
|