Spaces:
Running on Zero
Running on Zero
| """Deterministic chart-intent parsing + validation (F005, serve-only). | |
| The reliability lever for charts on a small (4B) model: the model never emits | |
| raw Vega-Lite. Instead its ANSWER value carries a TINY, validated chart request | |
| inside a single fenced ``` ```chart {…json…} ``` ``` block. This module owns: | |
| - ``ChartIntent`` — the Pydantic value model the model emits | |
| (``chart_type ∈ bar|line|scatter``, ``x``, ``y``, optional ``color``/``agg``). | |
| - ``parse_chart_intent`` — extract+strip that block from the ANSWER text | |
| (fail-open on display, fail-closed on chart; never raises). | |
| - ``validate_intent`` — confirm ``x``/``y`` are in the result columns and drop | |
| invalid optional fields, returning a column-validated ``ChartSpec`` or ``None``. | |
| This module is **gradio-free** and dependency-light (stdlib + pydantic only — no | |
| gradio, pandas, torch, transformers, or trl on import), so it is fully | |
| unit-testable headless. All gradio ``gr.*Plot`` wiring lives in ``app_ui.py``. | |
| It NEVER touches the trained ``tooling.parse_action`` / ``answer(value=...)`` | |
| contract (F002 train/serve parity): the chart block is parsed app-side from the | |
| ANSWER value text AFTER ``tooling`` produced it. This is the ONLY chart parser. | |
| Known limitation / F006 dependency: today only the demo-script ANSWERs carry a | |
| `````chart {…}````` block, so the chart only lights up for the 3 example | |
| questions. Making the REAL model emit the block is a prompt/training change | |
| deferred to F006 — and it is GATED ON the scoring path stripping the block first | |
| (``strip_chart_block`` is called in ``sql_environment._handle_answer`` before | |
| ``verify_answer`` so a prose+block answer still matches its gold value). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| from typing import Literal | |
| from pydantic import BaseModel, Field, ValidationError | |
| ChartType = Literal["bar", "line", "scatter"] | |
| # Recognised advisory aggregation hints (case-insensitive). An ``agg`` outside | |
| # this set is dropped to None during validation (drop-don't-reject, like color). | |
| # Advisory only — the SQL already aggregated, so this is NEVER applied here. | |
| _AGG_HINTS = frozenset({"sum", "avg", "mean", "count", "min", "max"}) | |
| # Match a fenced block whose info-string is ``chart``, capturing the JSON object | |
| # body non-greedily. DOTALL lets the body span newlines; IGNORECASE so a | |
| # capitalized fence (```` ```Chart ````) is also matched/stripped (R1). | |
| # ``parse_chart_intent`` uses the FIRST match for the intent; ``strip_chart_block`` | |
| # removes ALL matches (a second block must never leak raw JSON into the displayed | |
| # answer — C3). | |
| _CHART_BLOCK_RE = re.compile( | |
| r"```chart\s*(?P<json>\{.*?\})\s*```", | |
| re.DOTALL | re.IGNORECASE, | |
| ) | |
| # After removing well-formed chart blocks, scrub orphaned/partial fence remnants a | |
| # truncated or malformed block can leave behind (e.g. a bare ``` ``` ``` opener | |
| # with no matching closer, or a lone ``` ```chart ``` line) so the displayed | |
| # answer never shows a stray fence marker (C4). Matches a fence line whose | |
| # info-string is empty or ``chart`` only — NOT ``sql``/other code the prose may | |
| # legitimately contain. | |
| _ORPHAN_FENCE_RE = re.compile( | |
| r"^[ \t]*```(?:chart)?[ \t]*$", re.MULTILINE | re.IGNORECASE | |
| ) | |
| class ChartIntent(BaseModel): | |
| """The tiny, validated chart request the model emits inside the ANSWER value. | |
| Parsed from a single fenced ``` ```chart {…json…} ``` ``` block in the ANSWER | |
| text. Pydantic rejects an unknown chart_type or a missing required field, so a | |
| malformed intent never produces a ChartIntent (parse returns None instead). | |
| """ | |
| chart_type: ChartType = Field(..., description="One of bar | line | scatter.") | |
| x: str = Field( | |
| ..., | |
| description="Column name for the x axis (validated against result columns).", | |
| ) | |
| y: str = Field( | |
| ..., | |
| description="Column name for the y axis (validated against result columns).", | |
| ) | |
| color: str | None = Field( | |
| default=None, description="Optional column for series/colour grouping." | |
| ) | |
| agg: str | None = Field( | |
| default=None, | |
| description="Optional advisory agg hint (e.g. 'sum'); carried, not applied.", | |
| ) | |
| class ChartSpec(BaseModel): | |
| """App-ready, column-validated chart description. The value app_ui maps to a plot. | |
| Produced ONLY by validate_intent after x/y were confirmed present in the | |
| result columns. color/agg are present only if they were valid; an invalid | |
| optional field is dropped (not a reason to reject the whole chart). | |
| """ | |
| chart_type: ChartType = Field(..., description="Which typed plot to reveal.") | |
| x: str = Field(..., description="Validated x column (∈ result columns).") | |
| y: str = Field(..., description="Validated y column (∈ result columns).") | |
| color: str | None = Field( | |
| default=None, description="Validated colour column, or None if absent/dropped." | |
| ) | |
| agg: str | None = Field( | |
| default=None, description="Advisory agg hint if recognised, else None." | |
| ) | |
| def strip_chart_block(text: str) -> str: | |
| """Remove ALL `````chart {…}````` blocks and tidy the leftover whitespace. | |
| The single strip site for BOTH display (``parse_chart_intent``'s clean_text) | |
| and scoring (``sql_environment._handle_answer`` calls this before | |
| ``verify_answer``, so a prose+block answer is compared on its clean prose). | |
| Removes every well-formed chart block (C3: a second block must not leak raw | |
| JSON), then scrubs orphaned/partial fence remnants a truncated block can leave | |
| (C4), and finally collapses the runs of blank lines / trailing whitespace so | |
| the displayed answer reads cleanly. NO-OP on text with no chart block: a plain | |
| answer is returned byte-identical (only the trailing ``strip`` applies, and an | |
| already-trimmed plain answer is unchanged), so the gold scoring path stays | |
| behaviour-identical for block-free answers. | |
| """ | |
| without_blocks = _CHART_BLOCK_RE.sub("", text) | |
| without_fences = _ORPHAN_FENCE_RE.sub("", without_blocks) | |
| # Collapse 3+ consecutive newlines (left where a block sat) to a paragraph | |
| # break, then trim surrounding whitespace. | |
| collapsed = re.sub(r"\n{3,}", "\n\n", without_fences) | |
| return collapsed.strip() | |
| def parse_chart_intent(answer_text: str) -> tuple[str, ChartIntent | None]: | |
| """Extract the optional chart-intent from an ANSWER value, stripping its block. | |
| Looks for fenced ``` ```chart {…json…} ``` ``` blocks anywhere in answer_text. | |
| The FIRST well-formed block supplies the ``ChartIntent``; ``clean_text`` is | |
| answer_text with ALL chart blocks removed and surrounding whitespace tidied | |
| (via ``strip_chart_block``) — so a second block can never leak raw JSON into | |
| the displayed answer (C3) and the displayed answer never shows the raw JSON. | |
| On absent block OR malformed JSON OR a JSON object that fails ChartIntent | |
| validation, returns ``(answer_text, None)`` with the ORIGINAL text unchanged | |
| (fail-open on display, fail-closed on chart). Never raises. Never touches | |
| tooling.parse_action — this is the ONLY chart parser. | |
| Args: | |
| answer_text: The ANSWER step's value text (Step.argument for the ANSWER step). | |
| Returns: | |
| (clean_text, ChartIntent | None). clean_text == answer_text when intent is None. | |
| """ | |
| match = _CHART_BLOCK_RE.search(answer_text) | |
| if match is None: | |
| return answer_text, None | |
| try: | |
| data = json.loads(match.group("json")) | |
| intent = ChartIntent.model_validate(data) | |
| except (json.JSONDecodeError, ValidationError): | |
| return answer_text, None # malformed → keep original text, no chart | |
| # Strip ALL chart blocks for display (C3), but the intent is the FIRST block. | |
| clean_text = strip_chart_block(answer_text) | |
| return clean_text, intent | |
| def validate_intent(intent: ChartIntent, columns: list[str]) -> ChartSpec | None: | |
| """Fail-closed: confirm x/y ∈ columns and drop invalid optional fields. | |
| Returns a ChartSpec when BOTH intent.x and intent.y are in columns. color is | |
| kept only if intent.color ∈ columns (else dropped to None). agg is kept only if | |
| it is a recognised hint (else dropped to None). Returns None (→ table only) when | |
| x or y is missing/not in columns, or when columns is empty. Never raises. | |
| Args: | |
| intent: A parsed ChartIntent (already enum/required-field valid). | |
| columns: The last successful QUERY's result column names (the allow-set). | |
| Returns: | |
| A ChartSpec ready for app wiring, or None when no valid chart can render. | |
| """ | |
| # Column matching is CASE-SENSITIVE BY DESIGN: SQLite preserves identifier | |
| # case in result-set column names, so ``x``/``y`` must match the column the | |
| # query actually returned (an ``intent.x`` of "city" does NOT match a "City" | |
| # column). dtype is INTENTIONALLY UNENFORCED here — membership-only validation | |
| # is advisory per the spec's Out-of-Scope (numeric-dtype enforcement of ``y`` | |
| # is deferred); the app_ui render guard hides a non-numeric ``y`` at draw time. | |
| column_set = set(columns) | |
| if intent.x not in column_set or intent.y not in column_set: | |
| return None | |
| color = intent.color if intent.color in column_set else None | |
| agg = None | |
| if intent.agg is not None and intent.agg.lower() in _AGG_HINTS: | |
| agg = intent.agg | |
| return ChartSpec( | |
| chart_type=intent.chart_type, | |
| x=intent.x, | |
| y=intent.y, | |
| color=color, | |
| agg=agg, | |
| ) | |