"""Deterministic chart-intent parsing + validation (F005, serve-only). The reliability lever for charts on a small (4B) model: the model never emits raw Vega-Lite. Instead its ANSWER value carries a TINY, validated chart request inside a single fenced ``` ```chart {…json…} ``` ``` block. This module owns: - ``ChartIntent`` — the Pydantic value model the model emits (``chart_type ∈ bar|line|scatter``, ``x``, ``y``, optional ``color``/``agg``). - ``parse_chart_intent`` — extract+strip that block from the ANSWER text (fail-open on display, fail-closed on chart; never raises). - ``validate_intent`` — confirm ``x``/``y`` are in the result columns and drop invalid optional fields, returning a column-validated ``ChartSpec`` or ``None``. This module is **gradio-free** and dependency-light (stdlib + pydantic only — no gradio, pandas, torch, transformers, or trl on import), so it is fully unit-testable headless. All gradio ``gr.*Plot`` wiring lives in ``app_ui.py``. It NEVER touches the trained ``tooling.parse_action`` / ``answer(value=...)`` contract (F002 train/serve parity): the chart block is parsed app-side from the ANSWER value text AFTER ``tooling`` produced it. This is the ONLY chart parser. Known limitation / F006 dependency: today only the demo-script ANSWERs carry a ``​```chart {…}```​`` block, so the chart only lights up for the 3 example questions. Making the REAL model emit the block is a prompt/training change deferred to F006 — and it is GATED ON the scoring path stripping the block first (``strip_chart_block`` is called in ``sql_environment._handle_answer`` before ``verify_answer`` so a prose+block answer still matches its gold value). """ from __future__ import annotations import json import re from typing import Literal from pydantic import BaseModel, Field, ValidationError ChartType = Literal["bar", "line", "scatter"] # Recognised advisory aggregation hints (case-insensitive). An ``agg`` outside # this set is dropped to None during validation (drop-don't-reject, like color). # Advisory only — the SQL already aggregated, so this is NEVER applied here. _AGG_HINTS = frozenset({"sum", "avg", "mean", "count", "min", "max"}) # Match a fenced block whose info-string is ``chart``, capturing the JSON object # body non-greedily. DOTALL lets the body span newlines; IGNORECASE so a # capitalized fence (```` ```Chart ````) is also matched/stripped (R1). # ``parse_chart_intent`` uses the FIRST match for the intent; ``strip_chart_block`` # removes ALL matches (a second block must never leak raw JSON into the displayed # answer — C3). _CHART_BLOCK_RE = re.compile( r"```chart\s*(?P\{.*?\})\s*```", re.DOTALL | re.IGNORECASE, ) # After removing well-formed chart blocks, scrub orphaned/partial fence remnants a # truncated or malformed block can leave behind (e.g. a bare ``` ``` ``` opener # with no matching closer, or a lone ``` ```chart ``` line) so the displayed # answer never shows a stray fence marker (C4). Matches a fence line whose # info-string is empty or ``chart`` only — NOT ``sql``/other code the prose may # legitimately contain. _ORPHAN_FENCE_RE = re.compile( r"^[ \t]*```(?:chart)?[ \t]*$", re.MULTILINE | re.IGNORECASE ) class ChartIntent(BaseModel): """The tiny, validated chart request the model emits inside the ANSWER value. Parsed from a single fenced ``` ```chart {…json…} ``` ``` block in the ANSWER text. Pydantic rejects an unknown chart_type or a missing required field, so a malformed intent never produces a ChartIntent (parse returns None instead). """ chart_type: ChartType = Field(..., description="One of bar | line | scatter.") x: str = Field( ..., description="Column name for the x axis (validated against result columns).", ) y: str = Field( ..., description="Column name for the y axis (validated against result columns).", ) color: str | None = Field( default=None, description="Optional column for series/colour grouping." ) agg: str | None = Field( default=None, description="Optional advisory agg hint (e.g. 'sum'); carried, not applied.", ) class ChartSpec(BaseModel): """App-ready, column-validated chart description. The value app_ui maps to a plot. Produced ONLY by validate_intent after x/y were confirmed present in the result columns. color/agg are present only if they were valid; an invalid optional field is dropped (not a reason to reject the whole chart). """ chart_type: ChartType = Field(..., description="Which typed plot to reveal.") x: str = Field(..., description="Validated x column (∈ result columns).") y: str = Field(..., description="Validated y column (∈ result columns).") color: str | None = Field( default=None, description="Validated colour column, or None if absent/dropped." ) agg: str | None = Field( default=None, description="Advisory agg hint if recognised, else None." ) def strip_chart_block(text: str) -> str: """Remove ALL ``​```chart {…}```​`` blocks and tidy the leftover whitespace. The single strip site for BOTH display (``parse_chart_intent``'s clean_text) and scoring (``sql_environment._handle_answer`` calls this before ``verify_answer``, so a prose+block answer is compared on its clean prose). Removes every well-formed chart block (C3: a second block must not leak raw JSON), then scrubs orphaned/partial fence remnants a truncated block can leave (C4), and finally collapses the runs of blank lines / trailing whitespace so the displayed answer reads cleanly. NO-OP on text with no chart block: a plain answer is returned byte-identical (only the trailing ``strip`` applies, and an already-trimmed plain answer is unchanged), so the gold scoring path stays behaviour-identical for block-free answers. """ without_blocks = _CHART_BLOCK_RE.sub("", text) without_fences = _ORPHAN_FENCE_RE.sub("", without_blocks) # Collapse 3+ consecutive newlines (left where a block sat) to a paragraph # break, then trim surrounding whitespace. collapsed = re.sub(r"\n{3,}", "\n\n", without_fences) return collapsed.strip() def parse_chart_intent(answer_text: str) -> tuple[str, ChartIntent | None]: """Extract the optional chart-intent from an ANSWER value, stripping its block. Looks for fenced ``` ```chart {…json…} ``` ``` blocks anywhere in answer_text. The FIRST well-formed block supplies the ``ChartIntent``; ``clean_text`` is answer_text with ALL chart blocks removed and surrounding whitespace tidied (via ``strip_chart_block``) — so a second block can never leak raw JSON into the displayed answer (C3) and the displayed answer never shows the raw JSON. On absent block OR malformed JSON OR a JSON object that fails ChartIntent validation, returns ``(answer_text, None)`` with the ORIGINAL text unchanged (fail-open on display, fail-closed on chart). Never raises. Never touches tooling.parse_action — this is the ONLY chart parser. Args: answer_text: The ANSWER step's value text (Step.argument for the ANSWER step). Returns: (clean_text, ChartIntent | None). clean_text == answer_text when intent is None. """ match = _CHART_BLOCK_RE.search(answer_text) if match is None: return answer_text, None try: data = json.loads(match.group("json")) intent = ChartIntent.model_validate(data) except (json.JSONDecodeError, ValidationError): return answer_text, None # malformed → keep original text, no chart # Strip ALL chart blocks for display (C3), but the intent is the FIRST block. clean_text = strip_chart_block(answer_text) return clean_text, intent def validate_intent(intent: ChartIntent, columns: list[str]) -> ChartSpec | None: """Fail-closed: confirm x/y ∈ columns and drop invalid optional fields. Returns a ChartSpec when BOTH intent.x and intent.y are in columns. color is kept only if intent.color ∈ columns (else dropped to None). agg is kept only if it is a recognised hint (else dropped to None). Returns None (→ table only) when x or y is missing/not in columns, or when columns is empty. Never raises. Args: intent: A parsed ChartIntent (already enum/required-field valid). columns: The last successful QUERY's result column names (the allow-set). Returns: A ChartSpec ready for app wiring, or None when no valid chart can render. """ # Column matching is CASE-SENSITIVE BY DESIGN: SQLite preserves identifier # case in result-set column names, so ``x``/``y`` must match the column the # query actually returned (an ``intent.x`` of "city" does NOT match a "City" # column). dtype is INTENTIONALLY UNENFORCED here — membership-only validation # is advisory per the spec's Out-of-Scope (numeric-dtype enforcement of ``y`` # is deferred); the app_ui render guard hides a non-numeric ``y`` at draw time. column_set = set(columns) if intent.x not in column_set or intent.y not in column_set: return None color = intent.color if intent.color in column_set else None agg = None if intent.agg is not None and intent.agg.lower() in _AGG_HINTS: agg = intent.agg return ChartSpec( chart_type=intent.chart_type, x=intent.x, y=intent.y, color=color, agg=agg, )