Spaces:
Running on Zero
Running on Zero
| """Capture user feedback on answers β the self-improving flywheel. | |
| The MVP's core idea: a small shop owner can't audit SQL, but they CAN say | |
| "that's right" or "no, that's wrong β it should be X". Because the app already | |
| shows the exact query it ran, each thumbs-down + correction is a high-signal | |
| *candidate training example*: the question, the SQL the model actually produced, | |
| and what the human says is correct. These accumulate into exactly the data the | |
| next training round needs β notably the multi-hop-join / error-recovery cases | |
| the model is weakest on (see docs/guides/training_playbook.md). | |
| Dep-light, stdlib only: one JSON object per line (JSONL), append-only, so it is | |
| trivially greppable and feeds straight into scripts/generate_sft_data.py-style | |
| curation later. No model or heavy deps imported here. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import time | |
| from pathlib import Path | |
| from typing import Any | |
| # Runtime artifact (not source). Append-only; one record per feedback click. | |
| DEFAULT_FEEDBACK_PATH = Path("data/feedback/feedback.jsonl") | |
| _VERDICTS = ("up", "down") | |
| def record_feedback( | |
| *, | |
| question: str, | |
| dataset: str, | |
| shown_sql: str, | |
| result: str, | |
| verdict: str, | |
| correction: str = "", | |
| path: str | Path | None = None, | |
| ) -> dict[str, Any]: | |
| """Append one feedback record as a JSONL line; return the stored record. | |
| Args: | |
| question: The user's natural-language question. | |
| dataset: Which dataset/DB it was asked against. | |
| shown_sql: The SQL the model actually ran (the audit surface). | |
| result: The answer/result the user is reacting to. | |
| verdict: "up" (correct) or "down" (wrong). | |
| correction: Free-text "what it should be" (the training signal on a | |
| down-vote; optional on an up-vote). | |
| path: Override the JSONL location (tests pass a tmp path). | |
| Returns: | |
| The record dict (also written to disk). | |
| Raises: | |
| ValueError: if ``verdict`` is not "up"/"down". | |
| """ | |
| if verdict not in _VERDICTS: | |
| raise ValueError(f"verdict must be one of {_VERDICTS}, got {verdict!r}") | |
| correction = (correction or "").strip() | |
| record = { | |
| "ts": time.time(), | |
| "question": question, | |
| "dataset": dataset, | |
| "shown_sql": shown_sql, | |
| "result": result, | |
| "verdict": verdict, | |
| "correction": correction, | |
| # A down-vote WITH a correction is the gold: a labelled "the model said | |
| # X, the truth is Y" pair the next training round can learn from. | |
| "is_training_candidate": verdict == "down" and bool(correction), | |
| } | |
| out = Path(path) if path is not None else DEFAULT_FEEDBACK_PATH | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| with out.open("a", encoding="utf-8") as fh: | |
| fh.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| return record | |
| def load_feedback(path: str | Path | None = None) -> list[dict[str, Any]]: | |
| """Read all feedback records (for inspection / the demo's flywheel counter).""" | |
| src = Path(path) if path is not None else DEFAULT_FEEDBACK_PATH | |
| if not src.exists(): | |
| return [] | |
| records: list[dict[str, Any]] = [] | |
| for line in src.read_text(encoding="utf-8").splitlines(): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Skip a malformed/truncated line (e.g. an interrupted write) rather than | |
| # let one bad record break feedback_summary() inside a UI click handler. | |
| try: | |
| records.append(json.loads(line)) | |
| except json.JSONDecodeError: | |
| continue | |
| return records | |
| def feedback_summary(path: str | Path | None = None) -> dict[str, int]: | |
| """Counts for a "flywheel" status line: total, π, π, training candidates.""" | |
| records = load_feedback(path) | |
| return { | |
| "total": len(records), | |
| "up": sum(1 for r in records if r.get("verdict") == "up"), | |
| "down": sum(1 for r in records if r.get("verdict") == "down"), | |
| "training_candidates": sum( | |
| 1 for r in records if r.get("is_training_candidate") | |
| ), | |
| } | |