| | """compliance_planner node — dual-plan generation (crops + code queries)."""
|
| | from __future__ import annotations
|
| |
|
| | import json
|
| | import re
|
| | from datetime import datetime
|
| |
|
| | from google import genai
|
| | from google.genai import types
|
| |
|
| | from config import GOOGLE_API_KEY, PLANNER_MODEL
|
| | from prompts.compliance_planner import COMPLIANCE_PLANNER_SYSTEM_PROMPT
|
| | from state import AgentMessage, CodeQuery, ComplianceState, CropTask
|
| |
|
| |
|
| | def compliance_planner(state: ComplianceState) -> dict:
|
| | """Analyze page metadata + user question and produce dual plans for
|
| | image cropping AND code lookup."""
|
| | question = state["question"]
|
| | num_pages = state.get("num_pages", 0)
|
| | page_metadata_json = state.get("page_metadata_json", "")
|
| | investigation_round = state.get("investigation_round", 0)
|
| |
|
| | client = genai.Client(api_key=GOOGLE_API_KEY)
|
| |
|
| | question_text = (
|
| | f"USER COMPLIANCE QUESTION: {question}\n\n"
|
| | f"The PDF has {num_pages} pages (1-indexed, from page 1 to page {num_pages}).\n"
|
| | f"This is investigation round {investigation_round + 1}.\n\n"
|
| | )
|
| |
|
| | if page_metadata_json:
|
| | question_text += f"PAGE METADATA:\n{page_metadata_json}"
|
| | else:
|
| | question_text += (
|
| | "No page metadata available. Based on the question alone, "
|
| | "plan what code lookups are needed. Crop tasks will use default pages."
|
| | )
|
| |
|
| | response = client.models.generate_content(
|
| | model=PLANNER_MODEL,
|
| | contents=[types.Content(role="user", parts=[types.Part.from_text(text=question_text)])],
|
| | config=types.GenerateContentConfig(
|
| | system_instruction=COMPLIANCE_PLANNER_SYSTEM_PROMPT,
|
| | ),
|
| | )
|
| |
|
| | response_text = response.text.strip()
|
| |
|
| |
|
| | json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
|
| |
|
| | target_pages: list[int] = []
|
| | legend_pages: list[int] = []
|
| | crop_tasks: list[CropTask] = []
|
| | code_queries: list[CodeQuery] = []
|
| |
|
| | if json_match:
|
| | try:
|
| | parsed = json.loads(json_match.group())
|
| | valid_0indexed = set(range(num_pages))
|
| |
|
| | target_pages = [
|
| | int(p) - 1 for p in parsed.get("target_pages", [])
|
| | if int(p) - 1 in valid_0indexed
|
| | ]
|
| | legend_pages = [
|
| | int(p) - 1 for p in parsed.get("legend_pages", [])
|
| | if int(p) - 1 in valid_0indexed
|
| | ]
|
| |
|
| | for t in parsed.get("crop_tasks", []):
|
| | raw_page = int(t.get("page_num", 1))
|
| | crop_tasks.append(
|
| | CropTask(
|
| | page_num=raw_page - 1,
|
| | crop_instruction=t.get("crop_instruction", ""),
|
| | annotate=bool(t.get("annotate", False)),
|
| | annotation_prompt=t.get("annotation_prompt", ""),
|
| | label=t.get("label", f"Page {raw_page} crop"),
|
| | priority=int(t.get("priority", 1)),
|
| | )
|
| | )
|
| |
|
| | for q in parsed.get("code_queries", []):
|
| | code_queries.append(
|
| | CodeQuery(
|
| | query=q.get("query", ""),
|
| | focus_area=q.get("focus_area", ""),
|
| | context=q.get("context", ""),
|
| | priority=int(q.get("priority", 0)),
|
| | )
|
| | )
|
| |
|
| | except (json.JSONDecodeError, ValueError, KeyError):
|
| | pass
|
| |
|
| |
|
| | crop_tasks.sort(key=lambda t: t["priority"])
|
| |
|
| |
|
| | if not target_pages and not crop_tasks:
|
| | target_pages = list(range(min(num_pages, 5)))
|
| |
|
| |
|
| | crop_summary = f"{len(crop_tasks)} crop tasks on pages {', '.join(str(p + 1) for p in target_pages[:5])}"
|
| | code_summary = f"{len(code_queries)} code queries"
|
| | if code_queries:
|
| | code_summary += f" ({', '.join(q['focus_area'] for q in code_queries[:3])})"
|
| |
|
| | discussion_msg = AgentMessage(
|
| | timestamp=datetime.now().strftime("%H:%M:%S"),
|
| | agent="planner",
|
| | action="plan",
|
| | summary=f"Planned {crop_summary} and {code_summary}.",
|
| | detail=response_text,
|
| | evidence_refs=[],
|
| | )
|
| |
|
| | return {
|
| | "target_pages": target_pages,
|
| | "legend_pages": legend_pages,
|
| | "crop_tasks": crop_tasks,
|
| | "code_queries": code_queries,
|
| | "discussion_log": [discussion_msg],
|
| | "status_message": [
|
| | f"Selected {len(target_pages)} pages ({len(legend_pages)} legends), "
|
| | f"planned {len(crop_tasks)} crop tasks, {len(code_queries)} code queries."
|
| | ],
|
| | }
|
| |
|