Agentic-Service-Data-Eyond-Catalog

Sleeping

Rifqi Hafizuddin Claude Opus 4.8 commited on 4 days ago

Commit

72306d0

1 Parent(s): 83ba6b1

[KM-567][AI] Planner agent: validator + service

- validator.py: PlannerValidator runs the 8 checks from §7.3 (tools in registry,
catalog refs exist, DAG valid / no cycles, parallelism consistent, within task
cap, checkable success_criteria, args valid, inline query_structured IR via the
existing IRValidator). Raises PlannerValidationError with self-correctable
messages.
- service.py: PlannerService + plan_analysis(). LLM chain mirrors
query/planner/service.py; validate-and-retry loop (max 3) mirrors QueryService.
Takes the full Catalog, derives the PII-safe CatalogSummary for the prompt.
Static plan only — no replanning.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

Files changed (2) hide show

src/agents/planner/service.py +150 -0
src/agents/planner/validator.py +249 -0

src/agents/planner/service.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""PlannerService — single LLM call: context + catalog + tools + question -> TaskList.
+Mirrors `query/planner/service.py` (chain construction) and `query/service.py`
+(validate-and-retry loop). The planner LLM emits a `TaskList` via structured
+output; the `PlannerValidator` runs the 8 checks; on failure the planner is
+re-prompted with the error context, up to `max_retries` (default 3). No
+replanning happens at execution time — this loop only hardens the *initial*
+static plan.
+The service takes the full `Catalog` (not just a `CatalogSummary`): it derives
+the PII-safe `CatalogSummary` for the prompt, but validation needs the full
+catalog so the existing `IRValidator` can check inline `query_structured` IRs.
+See AGENT_ARCHITECTURE_CONTEXT_new.md §7.3.
+"""
+from __future__ import annotations
+from pathlib import Path
+from langchain_core.messages import SystemMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import Runnable
+from langchain_openai import AzureChatOpenAI
+from src.middlewares.logging import get_logger
+from ...catalog.models import Catalog
+from .contracts import BusinessContext, ToolRegistry
+from .errors import PlannerError, PlannerValidationError
+from .inputs import CatalogSummary, Constraints
+from .prompt import build_planner_prompt
+from .schemas import TaskList
+from .validator import PlannerValidator
+logger = get_logger("planner_agent")
+_PROMPT_PATH = (
+    Path(__file__).resolve().parent.parent.parent / "config" / "prompts" / "planner.md"
+)
+def _load_prompt_text() -> str:
+    return _PROMPT_PATH.read_text(encoding="utf-8")
+def _build_default_chain() -> Runnable:
+    from src.config.settings import settings
+    llm = AzureChatOpenAI(
+        azure_deployment=settings.azureai_deployment_name_4o,
+        openai_api_version=settings.azureai_api_version_4o,
+        azure_endpoint=settings.azureai_endpoint_url_4o,
+        api_key=settings.azureai_api_key_4o,
+        temperature=0,
+    )
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessage(content=_load_prompt_text()),
+            ("human", "{human_content}"),
+        ]
+    )
+    return prompt | llm.with_structured_output(TaskList)
+_default_chain: Runnable | None = None
+def _get_default_chain() -> Runnable:
+    global _default_chain
+    if _default_chain is None:
+        _default_chain = _build_default_chain()
+    return _default_chain
+class PlannerService:
+    """Wraps the planner LLM call + the validate-and-retry loop.
+    Inject `structured_chain` and/or `validator` for tests.
+    """
+    def __init__(
+        self,
+        structured_chain: Runnable | None = None,
+        validator: PlannerValidator | None = None,
+        max_retries: int = 3,
+    ) -> None:
+        self._chain = structured_chain
+        self._validator = validator or PlannerValidator()
+        self._max_retries = max_retries
+    def _ensure_chain(self) -> Runnable:
+        if self._chain is None:
+            self._chain = _get_default_chain()
+        return self._chain
+    async def plan(
+        self,
+        context: BusinessContext,
+        catalog: Catalog,
+        tools: ToolRegistry,
+        query: str,
+        constraints: Constraints,
+    ) -> TaskList:
+        summary = CatalogSummary.from_catalog(catalog)
+        chain = self._ensure_chain()
+        previous_error: str | None = None
+        for attempt in range(1, self._max_retries + 1):
+            human_content = build_planner_prompt(
+                context, summary, tools, query, constraints, previous_error
+            )
+            task_list: TaskList = await chain.ainvoke({"human_content": human_content})
+            try:
+                self._validator.validate(task_list, tools, catalog, constraints)
+            except PlannerValidationError as e:
+                previous_error = str(e)
+                logger.warning(
+                    "planner validation failed",
+                    project_id=context.project_id,
+                    plan_id=task_list.plan_id,
+                    attempt=attempt,
+                    error=previous_error,
+                )
+                continue
+            logger.info(
+                "analysis planned",
+                project_id=context.project_id,
+                plan_id=task_list.plan_id,
+                n_tasks=len(task_list.tasks),
+                retry=attempt > 1,
+            )
+            return task_list
+        raise PlannerError(
+            f"planner failed validation after {self._max_retries} attempts; "
+            f"last error: {previous_error}"
+        )
+async def plan_analysis(
+    context: BusinessContext,
+    catalog: Catalog,
+    tools: ToolRegistry,
+    query: str,
+    constraints: Constraints,
+) -> TaskList:
+    """Convenience entry point using the default chain + validator."""
+    return await PlannerService().plan(context, catalog, tools, query, constraints)

src/agents/planner/validator.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""PlannerValidator — checks a TaskList before it reaches the TaskRunner.
+Runs the 8 checks from AGENT_ARCHITECTURE_CONTEXT_new.md §7.3. On failure it
+raises `PlannerValidationError` with a message specific enough that the planner
+can be re-prompted to self-correct (the retry loop lives in service.py).
+Check #1 (Pydantic parse) is enforced at the structured-output boundary — by the
+time a `TaskList` reaches here it has already parsed; this validator additionally
+rejects structurally-invalid plans (duplicate ids, dangling edges, cycles).
+"""
+from __future__ import annotations
+import re
+from pydantic import ValidationError
+from ...catalog.models import Catalog
+from ...query.ir.models import QueryIR
+from ...query.ir.validator import IRValidationError, IRValidator
+from .contracts import ToolRegistry
+from .errors import PlannerValidationError
+from .inputs import Constraints
+from .schemas import TaskList
+# Heuristic: a checkable success_criteria mentions a measurable signal.
+_CHECKABLE_TOKENS = ("rate", "count", "match", "produced", "above", "below", "equal")
+_PLACEHOLDER_RE = re.compile(r"\$\{(t[^}]+)\}")
+# DFS colors for cycle detection.
+_WHITE, _GREY, _BLACK = 0, 1, 2
+class PlannerValidator:
+    def __init__(self, ir_validator: IRValidator | None = None) -> None:
+        self._ir_validator = ir_validator or IRValidator()
+    def validate(
+        self,
+        task_list: TaskList,
+        registry: ToolRegistry,
+        catalog: Catalog,
+        constraints: Constraints,
+    ) -> None:
+        tasks = task_list.tasks
+        # Check 6 — plan non-empty and within the task cap.
+        if not tasks:
+            raise PlannerValidationError("plan is empty: at least one task is required")
+        if len(tasks) > constraints.max_tasks:
+            raise PlannerValidationError(
+                f"plan has {len(tasks)} tasks, exceeds max_tasks={constraints.max_tasks}"
+            )
+        ids = [t.id for t in tasks]
+        if len(set(ids)) != len(ids):
+            dupes = sorted({i for i in ids if ids.count(i) > 1})
+            raise PlannerValidationError(f"duplicate task id(s): {dupes}")
+        id_set = set(ids)
+        tasks_by_id = {t.id: t for t in tasks}
+        known_tools = registry.names()
+        known_sources = {s.source_id for s in catalog.sources}
+        for task in tasks:
+            for call in task.tool_calls:
+                # Check 2 — every tool exists in the registry.
+                if call.tool not in known_tools:
+                    raise PlannerValidationError(
+                        f"task {task.id}: tool {call.tool!r} not in registry "
+                        f"(known: {sorted(known_tools)})"
+                    )
+                spec = registry.get(call.tool)
+                assert spec is not None  # guaranteed by the membership check above
+                # Check 8a — args carry the required keys and no unknown keys.
+                required = set(spec.input_schema.get("required", []))
+                allowed = set(spec.input_schema.get("properties", {}).keys()) | required
+                missing = required - set(call.args.keys())
+                if missing:
+                    raise PlannerValidationError(
+                        f"task {task.id}: tool {call.tool!r} missing required arg(s): "
+                        f"{sorted(missing)}"
+                    )
+                unknown = set(call.args.keys()) - allowed
+                if unknown:
+                    raise PlannerValidationError(
+                        f"task {task.id}: tool {call.tool!r} has unknown arg(s): "
+                        f"{sorted(unknown)} (allowed: {sorted(allowed)})"
+                    )
+                # Check 3 — concrete source_id args must exist in the catalog.
+                src = call.args.get("source_id")
+                if isinstance(src, str) and not _is_placeholder(src):
+                    if src not in known_sources:
+                        raise PlannerValidationError(
+                            f"task {task.id}: tool {call.tool!r} references unknown "
+                            f"source_id {src!r} (known: {sorted(known_sources)})"
+                        )
+                # Check 8b — inline query_structured IR validates against the catalog.
+                if call.tool == "query_structured":
+                    self._validate_inline_ir(task.id, call.args, catalog)
+            # Check 7 — success_criteria is checkable.
+            if not _is_checkable(task.success_criteria):
+                raise PlannerValidationError(
+                    f"task {task.id}: success_criteria is not checkable — include a "
+                    f"measurable signal (one of {list(_CHECKABLE_TOKENS)}); "
+                    f"got {task.success_criteria!r}"
+                )
+        # Check 4 — DAG: edges resolve, placeholders resolve, no cycles.
+        self._validate_dag(tasks_by_id, id_set)
+        # Check 5 — parallelizable_with is consistent with the dependency graph.
+        self._validate_parallelism(tasks_by_id, id_set)
+    def _validate_inline_ir(self, task_id: str, args: dict, catalog: Catalog) -> None:
+        raw_ir = args.get("ir")
+        if not isinstance(raw_ir, dict):
+            raise PlannerValidationError(
+                f"task {task_id}: query_structured.args.ir must be an inline QueryIR "
+                f"object, got {type(raw_ir).__name__}"
+            )
+        try:
+            ir = QueryIR.model_validate(raw_ir)
+        except ValidationError as e:
+            raise PlannerValidationError(
+                f"task {task_id}: query_structured.args.ir is not a valid QueryIR: {e}"
+            ) from e
+        try:
+            self._ir_validator.validate(ir, catalog)
+        except IRValidationError as e:
+            raise PlannerValidationError(
+                f"task {task_id}: query_structured IR failed catalog validation: {e}"
+            ) from e
+    @staticmethod
+    def _validate_dag(tasks_by_id: dict, id_set: set[str]) -> None:
+        for task in tasks_by_id.values():
+            for dep in task.depends_on:
+                if dep not in id_set:
+                    raise PlannerValidationError(
+                        f"task {task.id}: depends_on references unknown task {dep!r}"
+                    )
+                if dep == task.id:
+                    raise PlannerValidationError(
+                        f"task {task.id}: depends_on includes itself"
+                    )
+            # Placeholders must reference an existing, declared dependency.
+            for ref in _placeholder_refs(task):
+                if ref not in id_set:
+                    raise PlannerValidationError(
+                        f"task {task.id}: placeholder '${{{ref}}}' references unknown task"
+                    )
+                if ref not in task.depends_on:
+                    raise PlannerValidationError(
+                        f"task {task.id}: placeholder '${{{ref}}}' used but {ref!r} is "
+                        f"not in depends_on"
+                    )
+        cycle = _find_cycle(tasks_by_id)
+        if cycle:
+            raise PlannerValidationError(f"cycle detected in depends_on: {' -> '.join(cycle)}")
+    @staticmethod
+    def _validate_parallelism(tasks_by_id: dict, id_set: set[str]) -> None:
+        ancestors = _all_ancestors(tasks_by_id)
+        for task in tasks_by_id.values():
+            for other in task.parallelizable_with:
+                if other not in id_set:
+                    raise PlannerValidationError(
+                        f"task {task.id}: parallelizable_with references unknown task "
+                        f"{other!r}"
+                    )
+                if other == task.id:
+                    raise PlannerValidationError(
+                        f"task {task.id}: parallelizable_with includes itself"
+                    )
+                if other in ancestors[task.id] or task.id in ancestors[other]:
+                    raise PlannerValidationError(
+                        f"task {task.id}: parallelizable_with {other!r} conflicts with a "
+                        f"(transitive) depends_on relationship between them"
+                    )
+def _is_placeholder(value: str) -> bool:
+    return bool(_PLACEHOLDER_RE.fullmatch(value.strip()))
+def _placeholder_refs(task) -> set[str]:
+    refs: set[str] = set()
+    for call in task.tool_calls:
+        for value in call.args.values():
+            if isinstance(value, str):
+                refs.update(_PLACEHOLDER_RE.findall(value))
+    return refs
+def _is_checkable(text: str) -> bool:
+    low = text.lower()
+    return any(tok in low for tok in _CHECKABLE_TOKENS)
+def _find_cycle(tasks_by_id: dict) -> list[str] | None:
+    color = {tid: _WHITE for tid in tasks_by_id}
+    stack: list[str] = []
+    def dfs(node: str) -> list[str] | None:
+        color[node] = _GREY
+        stack.append(node)
+        for dep in tasks_by_id[node].depends_on:
+            if color.get(dep) == _GREY:
+                idx = stack.index(dep)
+                return stack[idx:] + [dep]
+            if color.get(dep) == _WHITE:
+                found = dfs(dep)
+                if found:
+                    return found
+        stack.pop()
+        color[node] = _BLACK
+        return None
+    for tid in tasks_by_id:
+        if color[tid] == _WHITE:
+            found = dfs(tid)
+            if found:
+                return found
+    return None
+def _all_ancestors(tasks_by_id: dict) -> dict[str, set[str]]:
+    """ancestors[id] = all tasks reachable by following depends_on edges."""
+    cache: dict[str, set[str]] = {}
+    def visit(node: str, seen: set[str]) -> set[str]:
+        if node in cache:
+            return cache[node]
+        acc: set[str] = set()
+        for dep in tasks_by_id[node].depends_on:
+            if dep in seen or dep not in tasks_by_id:
+                continue
+            acc.add(dep)
+            acc |= visit(dep, seen | {dep})
+        cache[node] = acc
+        return acc
+    return {tid: visit(tid, {tid}) for tid in tasks_by_id}