Spaces:

Executor-Tyrant-Framework
/

Working-in-a-Codemine

Running

App Files Files Community

Working-in-a-Codemine / spec_generator.py

Executor-Tyrant-Framework

Iterate spec generator: don't guess paths, absolute paths, valid operators only

cf8c05f about 2 months ago

raw

history blame contribute delete

13.2 kB

	# ---- Changelog ----
	# [2026-04-07] Josh + Claude — Spec generator via Strategist persona (Phase 5)
	# What: Turns natural language intent + constraints into valid WorkBlockSpec JSON
	# Why: The system needs to generate its own specs, not have humans write JSON by hand
	# How: Strategist persona + Graph context + schema reference + validation retry loop
	# -------------------

	"""Spec Generator — Strategist creates structured work block specs.

	The Strategist persona takes natural language intent and constraints,
	queries the Graph for relevant past experience, and produces a valid
	WorkBlockSpec JSON. Schema validation on output — malformed specs
	trigger a retry with the validation errors fed back in.
	"""

	import json
	import logging
	from typing import Optional

	from persona_client import call_persona
	from work_block_schema import validate_spec, WORK_BLOCK_SCHEMA, _TOOL_NAMES

	logger = logging.getLogger("spec_generator")

	# Compact schema reference for the Strategist prompt — just enough to generate valid specs
	_SCHEMA_REFERENCE = f"""## WorkBlockSpec Schema Reference

	Required top-level keys: spec_version, block, steps, constraints

	### spec_version
	Always "1.0.0"

	### block
	Required: id (string), name (string, max 120 chars), scope (string), acceptance_criteria (array of strings, min 1)
	Optional: agent (string), workspace (string — absolute path for cross-repo work), depends_on (array of strings)

	### snap_interface (optional)
	inputs: array of {{name, type (file\|function\|config\|state), source_block, path}}
	outputs: array of {{name, type, path, contract}}

	### constraints
	Required: never (array of strings, min 1), anti_drift (array of strings)
	Optional: tool_allowlist (array from: {', '.join(_TOOL_NAMES)}), shell_allowlist (array of strings), max_iterations (int, 1-100, default 15), timeout_seconds (int, 30-3600, default 300)

	### steps (array, min 1)
	Five step types:

	action (required: id, type:"action", tool, params, validation, on_failure)
	- tool: one of {', '.join(_TOOL_NAMES)}
	- params: object matching tool's input schema
	- bind_result: optional "$variable_name" to store result
	- validation.checks: array of {{operator, target, value, description}}
	operators: contains, not_contains, equals, not_equals, matches_regex, result_is_string, result_is_not_error, file_exists, file_contains, output_length_gt, output_length_lt
	- on_failure: {{action: abort_block\|retry\|skip\|goto\|escalate_to_qb, message}}

	gate (required: id, type:"gate", description, gate_type)
	- gate_type: human_review \| qb_checkpoint \| auto_approve
	- staged_actions: array of step IDs

	condition (required: id, type:"condition", check, if_true, if_false)
	- check: {{operator, target, value, description}}
	- if_true / if_false: arrays of steps

	loop (required: id, type:"loop", over, body)
	- over: {{items: [strings]}} or {{from_result: "$var", split_on: "\\n"}}
	- bind_item: "$item" (default)
	- body: array of steps

	group (required: id, type:"group", steps)
	- steps: array of steps
	- on_failure: optional

	### CRITICAL RULES
	1. EVERY action step MUST have a validation block with at least one check
	2. EVERY action step MUST have an on_failure handler
	3. NEVER guess file paths — start with shell_execute "find" or "ls" to discover repo structure
	4. Start specs by reading the source of truth (vault docs, canonical files) before acting
	5. Verify before asserting — add read/grep steps before write steps
	6. Use shell_execute for reads outside the default workspace (grep, head, tail, find, ls, etc.)
	7. All file paths in params must be ABSOLUTE (starting with /) when workspace is set
	8. ONLY use operators from the list above — do NOT invent new operators
	"""


	def generate_spec(
	intent: str,
	constraints: Optional[str] = None,
	graph_context: Optional[list] = None,
	workspace: Optional[str] = None,
	max_retries: int = 2,
	) -> dict:
	"""Generate a WorkBlockSpec from natural language intent.

	Args:
	intent: What to build/fix/audit (natural language)
	constraints: Project-specific constraints (natural language or structured)
	graph_context: Graph recall results for relevant past experience
	workspace: Workspace path if cross-repo work needed
	max_retries: How many times to retry on schema validation failure

	Returns:
	{
	success: bool,
	spec: dict \| None (the validated spec),
	errors: [str] \| None (validation errors if failed),
	generation_attempts: int,
	strategist_response: str (raw response for debugging),
	}
	"""
	task = _build_generation_prompt(intent, constraints, workspace)

	for attempt in range(max_retries + 1):
	logger.info("Spec generation attempt %d/%d", attempt + 1, max_retries + 1)

	result = call_persona(
	role="strategist",
	task=task,
	graph_context=graph_context,
	temperature=0.4, # Low temp for structured output
	max_tokens=8192, # Specs can be long
	)

	raw = result.get("response", "")

	# Extract JSON from the response
	spec = _extract_json(raw)
	if spec is None:
	logger.warning("Attempt %d: No valid JSON found in response", attempt + 1)
	task = _build_retry_prompt(intent, constraints, workspace,
	["No valid JSON found in response. Output ONLY the JSON spec, no prose."])
	continue

	# Schema validation
	valid, errors = validate_spec(spec)
	if valid:
	logger.info("Spec generated and validated on attempt %d", attempt + 1)
	return {
	"success": True,
	"spec": spec,
	"errors": None,
	"generation_attempts": attempt + 1,
	"strategist_response": raw,
	}

	# Validation failed — retry with errors
	logger.warning("Attempt %d: Schema validation failed with %d errors", attempt + 1, len(errors))
	task = _build_retry_prompt(intent, constraints, workspace, errors)

	return {
	"success": False,
	"spec": spec, # Return the last attempt even if invalid
	"errors": errors if 'errors' in dir() else ["Max retries exceeded"],
	"generation_attempts": max_retries + 1,
	"strategist_response": raw if 'raw' in dir() else "",
	}


	def generate_followup_spec(
	original_spec: dict,
	failed_report: dict,
	evaluation: dict,
	graph_context: Optional[list] = None,
	max_retries: int = 2,
	) -> dict:
	"""Generate a follow-up spec targeting gaps from a failed execution.

	Args:
	original_spec: The spec that was executed
	failed_report: The execution report showing failures
	evaluation: The evaluate_report() output with iteration hints
	graph_context: Graph recall for context
	max_retries: Retry count for validation

	Returns:
	Same format as generate_spec()
	"""
	task = _build_followup_prompt(original_spec, failed_report, evaluation)

	for attempt in range(max_retries + 1):
	logger.info("Follow-up spec generation attempt %d/%d", attempt + 1, max_retries + 1)

	result = call_persona(
	role="strategist",
	task=task,
	graph_context=graph_context,
	temperature=0.4,
	max_tokens=8192,
	)

	raw = result.get("response", "")
	spec = _extract_json(raw)

	if spec is None:
	task = _build_retry_prompt(
	f"Follow-up for {original_spec.get('block', {}).get('name', 'unknown')}",
	None, None,
	["No valid JSON found. Output ONLY the JSON spec."]
	)
	continue

	valid, errors = validate_spec(spec)
	if valid:
	logger.info("Follow-up spec validated on attempt %d", attempt + 1)
	return {
	"success": True,
	"spec": spec,
	"errors": None,
	"generation_attempts": attempt + 1,
	"strategist_response": raw,
	}

	task = _build_retry_prompt(
	f"Follow-up for {original_spec.get('block', {}).get('name', 'unknown')}",
	None, None, errors
	)

	return {
	"success": False,
	"spec": spec if 'spec' in dir() else None,
	"errors": errors if 'errors' in dir() else ["Max retries exceeded"],
	"generation_attempts": max_retries + 1,
	"strategist_response": raw if 'raw' in dir() else "",
	}


	# ---------------------------------------------------------------------------
	# Prompt builders
	# ---------------------------------------------------------------------------

	def _build_generation_prompt(intent: str, constraints: Optional[str], workspace: Optional[str]) -> str:
	"""Build the initial spec generation prompt for Strategist."""
	parts = [
	"Generate a WorkBlockSpec JSON for the following task.\n",
	"Output ONLY valid JSON — no prose before or after. No markdown code fences.\n\n",
	f"## Task\n{intent}\n",
	]

	if constraints:
	parts.append(f"\n## Project Constraints\n{constraints}\n")

	if workspace:
	parts.append(f"\n## Workspace\nSet block.workspace to: {workspace}\n")

	parts.append(f"\n{_SCHEMA_REFERENCE}\n")

	parts.append(
	"\n## Remember\n"
	"- Start by reading the source of truth before acting\n"
	"- Verify paths exist on disk before using them\n"
	"- Every action MUST have validation checks and on_failure\n"
	"- Use shell_execute with head/tail/grep for cross-repo reads\n"
	"- Output ONLY the JSON. No explanation. No wrapping.\n"
	)

	return "\n".join(parts)


	def _build_retry_prompt(intent: str, constraints: Optional[str], workspace: Optional[str], errors: list) -> str:
	"""Build a retry prompt with validation errors."""
	error_text = "\n".join(f"- {e}" for e in errors[:10])
	return (
	f"Your previous spec had validation errors:\n{error_text}\n\n"
	f"Fix these errors and output a corrected WorkBlockSpec JSON.\n"
	f"Output ONLY valid JSON — no prose, no code fences.\n\n"
	f"{_SCHEMA_REFERENCE}"
	)


	def _build_followup_prompt(original_spec: dict, report: dict, evaluation: dict) -> str:
	"""Build a follow-up spec generation prompt from a failed execution."""
	hints = evaluation.get("iteration_hints", [])
	hints_text = "\n".join(f"- {h}" for h in hints) if hints else "No specific hints."

	# Summarize what failed
	failed_steps = []
	for sid, r in report.get("step_results", {}).items():
	if r.get("status") == "fail":
	failed_steps.append(f"- {sid}: {r.get('reason', 'unknown')}")
	failed_text = "\n".join(failed_steps) if failed_steps else "No step failures (evaluation flagged other issues)."

	return (
	f"The previous spec execution needs a follow-up.\n\n"
	f"## Original Block\n"
	f"Name: {original_spec.get('block', {}).get('name', '?')}\n"
	f"ID: {original_spec.get('block', {}).get('id', '?')}\n\n"
	f"## What Failed\n{failed_text}\n\n"
	f"## Iteration Hints\n{hints_text}\n\n"
	f"## Acceptance Criteria Still Unmet\n"
	f"{json.dumps(original_spec.get('block', {}).get('acceptance_criteria', []), indent=2)}\n\n"
	f"## Constraints From Original Spec\n"
	f"{json.dumps(original_spec.get('constraints', {}), indent=2)}\n\n"
	f"Generate a NEW WorkBlockSpec JSON that addresses the failures and unmet criteria.\n"
	f"Output ONLY valid JSON — no prose, no code fences.\n\n"
	f"{_SCHEMA_REFERENCE}"
	)


	# ---------------------------------------------------------------------------
	# JSON extraction
	# ---------------------------------------------------------------------------

	def _extract_json(text: str) -> Optional[dict]:
	"""Extract a JSON object from text, handling various formats.

	Tries: raw JSON, markdown code blocks, finding { } boundaries.
	"""
	text = text.strip()

	# Try direct parse
	try:
	return json.loads(text)
	except json.JSONDecodeError:
	pass

	# Try extracting from markdown code block
	if "```json" in text:
	start = text.index("```json") + 7
	end = text.index("```", start)
	try:
	return json.loads(text[start:end].strip())
	except (json.JSONDecodeError, ValueError):
	pass

	if "```" in text:
	start = text.index("```") + 3
	# Skip language identifier if present
	newline = text.index("\n", start)
	content_start = newline + 1
	end = text.index("```", content_start)
	try:
	return json.loads(text[content_start:end].strip())
	except (json.JSONDecodeError, ValueError):
	pass

	# Try finding outermost { } pair
	first_brace = text.find("{")
	last_brace = text.rfind("}")
	if first_brace >= 0 and last_brace > first_brace:
	try:
	return json.loads(text[first_brace:last_brace + 1])
	except json.JSONDecodeError:
	pass

	return None