File size: 12,951 Bytes
eab1ab3
128f4e8
 
 
 
 
 
d715ed0
 
 
 
eab1ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d715ed0
eab1ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08381bd
 
 
 
eab1ab3
08381bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eab1ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d715ed0
 
 
 
eab1ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d715ed0
 
 
 
 
eab1ab3
 
128f4e8
 
 
 
 
eab1ab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# ---- Changelog ----
# [2026-05-03] Claude (Sonnet 4.6) β€” Add read_only_paths to constraints schema (#168)
# What: Added read_only_paths as a valid constraints property
# Why: spec_executor.py already reads this field (lines 96-100) but work_block_schema.py
#      constraints had additionalProperties:False with no read_only_paths entry β€” any spec
#      using it failed schema validation before execution could reach the field.
# How: One new property added to constraints; no executor changes (already handled).
# [2026-04-06] Josh + Claude β€” Add edit_file tool + shell_allowlist constraint
# What: (1) edit_file in _TOOL_NAMES (2) shell_allowlist in constraints schema
# Why: Gap 2 β€” specs need to extend shell allowlist; Gap 3 β€” edit_file is a new tool
# How: New enum entry in _TOOL_NAMES, new array property in constraints
# [2026-04-05] Josh + Claude β€” Structured work block spec schema
# What: JSON Schema for QB β†’ Codemine worker handoff
# Why: Structured specs produce +37% better agent execution vs prose (zero improvisation, 97% validation)
# How: jsonschema validation, mandatory validation blocks on every action, on_failure handlers
# -------------------

"""Work Block Spec schema and validation.

Every action step MUST have a validation block and an on_failure handler.
The format is the guardrail β€” not the model.
"""

import json
import logging
from typing import Tuple

from jsonschema import Draft202012Validator, ValidationError

logger = logging.getLogger("work_block_schema")

# Tools available in Codemine's TOOL_REGISTRY
_TOOL_NAMES = [
    "read_file", "write_file", "edit_file", "list_files",
    "search_code", "search_conversations", "search_testament",
    "ingest_workspace", "shell_execute",
    "push_to_github", "pull_from_github", "create_shadow_branch",
    "notebook_read", "notebook_add", "notebook_delete",
    "map_repository_structure", "get_stats",
]

_CONDITION_OPERATORS = [
    "contains", "not_contains",
    "equals", "not_equals",
    "matches_regex",
    "result_is_string",
    "result_is_not_error",
    "file_exists",
    "file_contains",
    "output_length_gt", "output_length_lt",
]

_FAILURE_ACTIONS = ["abort_block", "retry", "skip", "goto", "escalate_to_qb"]

_GATE_TYPES = ["human_review", "qb_checkpoint", "auto_approve"]

# ---------------------------------------------------------------------------
# Sub-schemas (referenced by $defs in the main schema)
# ---------------------------------------------------------------------------

_CONDITION_CHECK = {
    "type": "object",
    "required": ["operator"],
    "additionalProperties": False,
    "properties": {
        "operator": {"enum": _CONDITION_OPERATORS},
        "target": {"type": "string"},
        "value": {},  # any type β€” depends on operator
        "description": {"type": "string"},
    },
}

# String shorthands accepted in on_failure for easier spec authoring.
# Executor normalizes these to the full object form before acting on them.
_FAILURE_SHORTHANDS = ["abort", "abort_block", "continue", "skip", "retry", "goto", "escalate_to_qb"]

_FAILURE_HANDLER = {
    "oneOf": [
        {
            "type": "string",
            "enum": _FAILURE_SHORTHANDS,
        },
        {
            "type": "object",
            "required": ["action"],
            "additionalProperties": False,
            "properties": {
                "action": {"enum": _FAILURE_ACTIONS},
                "max_retries": {"type": "integer", "minimum": 0, "default": 0},
                "goto_step": {"type": "string"},
                "message": {"type": "string"},
            },
        },
    ]
}

_VALIDATION_BLOCK = {
    "type": "object",
    "required": ["checks"],
    "additionalProperties": False,
    "properties": {
        "checks": {
            "type": "array",
            "items": _CONDITION_CHECK,
            "minItems": 1,
        },
    },
}

# Step definitions β€” each is a separate schema, unified via oneOf in the main schema
_ACTION_STEP = {
    "type": "object",
    "required": ["id", "type", "tool", "params", "validation", "on_failure"],
    "additionalProperties": False,
    "properties": {
        "id": {"type": "string"},
        "type": {"const": "action"},
        "description": {"type": "string"},
        "tool": {"enum": _TOOL_NAMES},
        "params": {"type": "object"},
        "bind_result": {"type": "string", "pattern": r"^\$[a-z_][a-z0-9_]*$"},
        "validation": _VALIDATION_BLOCK,
        "on_failure": _FAILURE_HANDLER,
    },
}

_GATE_STEP = {
    "type": "object",
    "required": ["id", "type", "description", "gate_type"],
    "additionalProperties": False,
    "properties": {
        "id": {"type": "string"},
        "type": {"const": "gate"},
        "description": {"type": "string"},
        "gate_type": {"enum": _GATE_TYPES},
        "staged_actions": {"type": "array", "items": {"type": "string"}},
        "timeout_seconds": {"type": "integer", "default": 300},
        "on_timeout": {"enum": ["abort", "skip", "auto_approve"], "default": "abort"},
    },
}

# Forward-reference placeholder β€” condition, loop, group contain nested steps.
# jsonschema handles recursive $ref via the $defs mechanism, but since we're
# building the schema as a Python dict, we use a sentinel and patch it below.
_STEP_REF = {"$ref": "#/$defs/step"}

_CONDITION_STEP = {
    "type": "object",
    "required": ["id", "type", "check", "if_true", "if_false"],
    "additionalProperties": False,
    "properties": {
        "id": {"type": "string"},
        "type": {"const": "condition"},
        "description": {"type": "string"},
        "check": _CONDITION_CHECK,
        "if_true": {"type": "array", "items": _STEP_REF},
        "if_false": {"type": "array", "items": _STEP_REF},
    },
}

_LOOP_STEP = {
    "type": "object",
    "required": ["id", "type", "over", "body"],
    "additionalProperties": False,
    "properties": {
        "id": {"type": "string"},
        "type": {"const": "loop"},
        "description": {"type": "string"},
        "over": {
            "oneOf": [
                {
                    "type": "object",
                    "required": ["items"],
                    "additionalProperties": False,
                    "properties": {
                        "items": {"type": "array", "items": {"type": "string"}},
                    },
                },
                {
                    "type": "object",
                    "required": ["from_result"],
                    "additionalProperties": False,
                    "properties": {
                        "from_result": {"type": "string"},
                        "split_on": {"type": "string", "default": "\n"},
                    },
                },
            ],
        },
        "bind_item": {"type": "string", "pattern": r"^\$[a-z_][a-z0-9_]*$", "default": "$item"},
        "max_iterations": {"type": "integer", "default": 20},
        "body": {"type": "array", "items": _STEP_REF, "minItems": 1},
    },
}

_GROUP_STEP = {
    "type": "object",
    "required": ["id", "type", "steps"],
    "additionalProperties": False,
    "properties": {
        "id": {"type": "string"},
        "type": {"const": "group"},
        "description": {"type": "string"},
        "steps": {"type": "array", "items": _STEP_REF, "minItems": 1},
        "on_failure": _FAILURE_HANDLER,
    },
}

# ---------------------------------------------------------------------------
# Main schema
# ---------------------------------------------------------------------------

WORK_BLOCK_SCHEMA = {
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "title": "WorkBlockSpec",
    "description": "Structured execution spec for QB -> Codemine worker handoff.",
    "type": "object",
    "required": ["spec_version", "block", "steps", "constraints"],
    "additionalProperties": False,
    "properties": {
        "spec_version": {"const": "1.0.0"},
        "block": {
            "type": "object",
            "required": ["id", "name", "scope", "acceptance_criteria"],
            "additionalProperties": False,
            "properties": {
                "id": {"type": "string"},
                "name": {"type": "string", "maxLength": 120},
                "agent": {"type": "string"},
                "scope": {"type": "string"},
                "workspace": {
                    "type": "string",
                    "description": "Workspace root for PolicyEngine path checks. Defaults to Codemine repo if omitted.",
                },
                "acceptance_criteria": {
                    "type": "array",
                    "items": {"type": "string"},
                    "minItems": 1,
                },
                "depends_on": {
                    "type": "array",
                    "items": {"type": "string"},
                    "default": [],
                },
            },
        },
        "snap_interface": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "inputs": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "required": ["name", "type", "source_block"],
                        "additionalProperties": False,
                        "properties": {
                            "name": {"type": "string"},
                            "type": {"enum": ["file", "function", "config", "state"]},
                            "source_block": {"type": "string"},
                            "path": {"type": "string"},
                        },
                    },
                    "default": [],
                },
                "outputs": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "required": ["name", "type", "path"],
                        "additionalProperties": False,
                        "properties": {
                            "name": {"type": "string"},
                            "type": {"enum": ["file", "function", "config", "state"]},
                            "path": {"type": "string"},
                            "contract": {"type": "string"},
                        },
                    },
                    "default": [],
                },
            },
        },
        "constraints": {
            "type": "object",
            "required": ["never", "anti_drift"],
            "additionalProperties": False,
            "properties": {
                "never": {"type": "array", "items": {"type": "string"}, "minItems": 1},
                "anti_drift": {"type": "array", "items": {"type": "string"}},
                "tool_allowlist": {"type": "array", "items": {"enum": _TOOL_NAMES}},
                "shell_allowlist": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Additional shell command prefixes allowed for this spec, extends PolicyEngine's base allowlist.",
                },
                "max_iterations": {"type": "integer", "minimum": 1, "maximum": 100, "default": 15},
                "timeout_seconds": {"type": "integer", "minimum": 30, "maximum": 3600, "default": 300},
                "read_only_paths": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Absolute paths outside workspace that read_file may access (read-only). Typical use: sibling repos QB needs to inspect without writing.",
                },
            },
        },
        "steps": {
            "type": "array",
            "items": _STEP_REF,
            "minItems": 1,
        },
    },
    "$defs": {
        "step": {
            "oneOf": [
                _ACTION_STEP,
                _GATE_STEP,
                _CONDITION_STEP,
                _LOOP_STEP,
                _GROUP_STEP,
            ],
        },
    },
}

# Pre-compile the validator for reuse
_validator = Draft202012Validator(WORK_BLOCK_SCHEMA)


def validate_spec(spec: dict) -> Tuple[bool, list]:
    """Validate a work block spec against the schema.

    Returns (is_valid, errors) where errors is a list of human-readable strings.
    """
    errors = []
    for error in sorted(_validator.iter_errors(spec), key=lambda e: list(e.path)):
        path = ".".join(str(p) for p in error.absolute_path) or "(root)"
        errors.append(f"{path}: {error.message}")

    if errors:
        logger.warning("Spec validation failed with %d errors", len(errors))
    return len(errors) == 0, errors


def validate_spec_file(path: str) -> Tuple[bool, list]:
    """Load and validate a JSON spec file."""
    try:
        with open(path, "r") as f:
            spec = json.load(f)
    except (json.JSONDecodeError, OSError) as e:
        return False, [f"Failed to load spec: {e}"]
    return validate_spec(spec)