"""Auto-file GitHub issues for validator-internal bugs. Mirror of `tools/hf_watch/validate.py::_ensure_validator_internal_issues` moved to the HF Space side so the Space can self-report tooling failures without depending on the GH Actions wrapper to do it. Policy reminder (from CLAUDE.md / project conventions): - GitHub Issues track NVIDIA-internal *tooling* problems only. - Customer-asset findings (real spec violations) stay on the dashboard; they do NOT become issues. - Distinguishing the two is `is_validator_internal_issue`'s job. Token: GH_VALIDATOR_TOKEN (or GITHUB_TOKEN) — set as a Space secret. A fine-grained PAT with `issues: read+write` on `NVIDIA-dev/simready-oem-library-pm` is enough; no other scope needed. """ from __future__ import annotations import json import os import re import urllib.parse import urllib.request from typing import Any GH_REPO = "NVIDIA-dev/simready-oem-library-pm" # Redact token-shaped strings before they land in a (public-ish) issue. # Covers GitHub PATs (gh[ps]_…), HF tokens (hf_…), and long base64-ish # secrets. Validator log text passes through here on its way into bodies. _SECRET_RE = re.compile(r"gh[ps]_\w+|hf_\w+|[A-Za-z0-9+/]{40,}={0,2}") def scrub_secrets(text: str) -> str: return _SECRET_RE.sub("[REDACTED]", text or "") def _gh_token() -> str | None: return os.environ.get("GH_VALIDATOR_TOKEN") or os.environ.get("GITHUB_TOKEN") def is_validator_internal_issue(iss: dict) -> bool: """Heuristic: distinguish validator-package crashes from real asset findings.""" code = iss.get("code") or "" msg = iss.get("msg") or "" if code == "UNKNOWN" or code.startswith("SDK."): return True if "Uncaught error" in msg: return True if "is not registered to requirement" in msg: return True return False def _gh_request(method: str, path: str, payload: dict | None = None) -> dict | list: token = _gh_token() if not token: raise RuntimeError("no GitHub token in env (GH_VALIDATOR_TOKEN or GITHUB_TOKEN)") url = f"https://api.github.com/repos/{GH_REPO}{path}" headers = { "Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}", "X-GitHub-Api-Version": "2022-11-28", "User-Agent": "simready-validator-space/0.1", } body = None if payload is not None: body = json.dumps(payload).encode("utf-8") headers["Content-Type"] = "application/json" req = urllib.request.Request(url, data=body, headers=headers, method=method) with urllib.request.urlopen(req, timeout=30) as r: return json.loads(r.read() or "null") def _find_issue(title: str) -> dict | None: # Strip `"` — title is untrusted and a quote would break the # quoted-phrase search query (and could inject extra qualifiers). safe_title = title.replace('"', "") q = urllib.parse.quote(f'repo:{GH_REPO} in:title "{safe_title}" is:issue') result = _gh_request("GET", f"/../../search/issues?q={q}") items = (result or {}).get("items") or [] for it in items: if it.get("title") == title: return it return None def _create_issue(title: str, body: str, labels: list[str]) -> int: result = _gh_request("POST", "/issues", {"title": title, "body": body, "labels": labels}) return result.get("number", 0) def _add_comment(issue_num: int, body: str) -> None: _gh_request("POST", f"/issues/{issue_num}/comments", {"body": body}) def _build_dataset_issue_body(by_pair: dict, dataset: str, profile: str, total: int) -> str: rows = "\n".join( f"| `{rule}` | `{code}` | {g['severity'] or '?'} | {g['count']} | `{g['sample_msg']}` |" for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"]) ) return ( f"**Validator-internal bugs on a single dataset** — surfaced during " f"automatic SimReady validation. NOT a customer-asset finding; the " f"validator's own rule registration / spec loading is misbehaving " f"on this dataset and emitting errors that don't map to any real " f"spec violation.\n\n" f"| Field | Value |\n|---|---|\n" f"| Dataset | `{dataset}` |\n" f"| Profile (first run) | `{profile}` |\n" f"| Total internal occurrences (first run) | {total} |\n" f"| Distinct (rule, code) pairs (first run) | {len(by_pair)} |\n\n" f"**Breakdown** (sorted by occurrence count, descending):\n\n" f"| Rule | Code | Severity | Count | Sample message |\n" f"|---|---|---|---|---|\n{rows}\n\n" f"---\n" f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). " f"One issue per dataset — re-validating the same dataset comments " f"here with the new counts instead of opening a duplicate._" ) def _build_dataset_recurrence_comment(by_pair: dict, dataset: str, profile: str, total: int) -> str: rows = "\n".join( f"| `{rule}` | `{code}` | {g['count']} |" for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"]) ) return ( f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n" f"This run: **{total}** internal occurrences across **{len(by_pair)}** " f"distinct (rule, code) pairs.\n\n" f"| Rule | Code | Count this run |\n|---|---|---|\n{rows}" ) def _list_existing_internal_issues() -> list[dict]: """Pull every validator-internal issue (open + recently closed) so the agent can dedupe semantically against them. Closed issues are included because the same bug can come back after a fix is reverted or after a regression.""" q = urllib.parse.quote( f'repo:{GH_REPO} label:"validator-internal" is:issue') try: result = _gh_request("GET", f"/../../search/issues?q={q}&per_page=100") except Exception: return [] return (result or {}).get("items") or [] def _execute_decisions(decisions: list[dict], by_pair: dict, dataset: str, profile: str, total: int, log_fn) -> dict: """Carry out the agent's decisions. Returns counters.""" out = log_fn created = updated = skipped = 0 aborted = False # Map negative placeholder numbers → real issue numbers as `create` # decisions resolve. The agent uses negatives to cross-reference # decisions that share a parent issue when several groups roll up # into one new bug. placeholder_to_real: dict[int, int] = {} # Two passes: creates first (so their numbers are known), then # comments (so cross-references resolve). Skips are free. creates = [d for d in decisions if d.get("action") == "create"] comments = [d for d in decisions if d.get("action") == "comment"] skips = [d for d in decisions if d.get("action") == "skip"] for d in creates: if aborted: break title = d.get("title") or f"[validator-internal] {dataset} :: {d.get('rule')} / {d.get('code')}" body = d.get("body") or "(agent did not provide a body)" try: existing = _find_issue(title) if existing: _add_comment(existing["number"], f"_Re-hit during validation of `{dataset}` " f"(profile `{profile}`)._\n\n{body}") updated += 1 out(f" internal-issue #{existing['number']}: comment added " f"(agent: create→existing match by title)") placeholder_to_real.setdefault(-(creates.index(d) + 1), existing["number"]) else: num = _create_issue(title, body, ["validator-internal", "process", "agent-reviewed"]) created += 1 out(f" internal-issue #{num}: opened ({title!r}) — agent reasoning: " f"{d.get('reasoning', '')[:160]}") placeholder_to_real[-(creates.index(d) + 1)] = num except Exception as e: msg = f"{type(e).__name__}: {e}" if "404" in msg: out(f" ! internal-issue tracking aborted (404 — token lacks issues:write on {GH_REPO})") aborted = True else: out(f" ! create failed for {title!r}: {msg}") for d in comments: if aborted: break target = d.get("target_issue") if target is None: out(f" ! comment decision has no target_issue; skipping ({d.get('reasoning', '')[:100]})") continue if target < 0: target = placeholder_to_real.get(target) if target is None: out(f" ! comment decision cross-references an unresolved placeholder; skipping") continue body = d.get("body") or ( f"Re-hit during validation of `{dataset}` (profile `{profile}`). " f"Same underlying bug as this issue — see agent reasoning: " f"{d.get('reasoning', '')}" ) try: _add_comment(target, body) updated += 1 out(f" internal-issue #{target}: comment added (agent: comment) — {d.get('reasoning', '')[:120]}") except Exception as e: msg = f"{type(e).__name__}: {e}" if "404" in msg: out(f" ! comment tracking aborted (404)") aborted = True else: out(f" ! comment failed for #{target}: {msg}") for d in skips: skipped += 1 out(f" internal-issue {d.get('rule')}/{d.get('code')}: skipped — {d.get('reasoning', '')[:160]}") return {"created": created, "updated": updated, "skipped": skipped, "aborted_404": aborted} def _ensure_internal_issues_simple(by_pair: dict, dataset: str, profile: str, total: int, log_fn) -> dict: """Fallback (no agentic review): one issue per dataset, dedup by exact title match. This is what we used before the agent was wired up; kept as a backstop for when ANTHROPIC_API_KEY is unset, the SDK is missing, or the Claude call fails.""" out = log_fn title = f"[validator-internal] {dataset}" try: existing = _find_issue(title) if existing: _add_comment(existing["number"], _build_dataset_recurrence_comment(by_pair, dataset, profile, total)) out(f" internal-issue #{existing['number']}: comment added for dataset " f"{dataset} ({total} occurrences, {len(by_pair)} pairs) — fallback policy") return {"created": 0, "updated": 1, "pairs": len(by_pair), "total": total, "fallback": True} num = _create_issue(title, _build_dataset_issue_body(by_pair, dataset, profile, total), ["validator-internal", "process"]) out(f" internal-issue #{num}: opened for dataset {dataset} " f"({total} occurrences, {len(by_pair)} pairs) — fallback policy") return {"created": 1, "updated": 0, "pairs": len(by_pair), "total": total, "fallback": True} except Exception as e: msg = f"{type(e).__name__}: {e}" if "404" in msg: out(f" ! internal-issue tracking aborted (404 — token lacks issues:write on {GH_REPO})") return {"created": 0, "updated": 0, "aborted_404": True, "fallback": True} out(f" ! internal-issue tracking for dataset {dataset} failed: {msg}") return {"created": 0, "updated": 0, "error": msg, "fallback": True} def ensure_internal_issues(results_json: dict, dataset: str, profile: str, log_fn=None) -> dict: """Scan results.json for validator-internal bugs and route them to GitHub issues via an agentic review pass (Claude) that classifies, dedupes against existing issues, and writes plain-language explanations. Falls back to a simple one-issue-per-dataset policy if the agent is unavailable. Best-effort — swallowed exceptions return {"error": ...} so the validator's verdict is never blocked on GitHub being flaky.""" out = log_fn or (lambda s: print(s, flush=True)) if not _gh_token(): out(" (skipping internal-issue tracking: no GH token)") return {"skipped": True, "reason": "no_token"} # Group across the whole dataset: (rule, code) → {count, sample, severity} by_pair: dict[tuple[str, str], dict[str, Any]] = {} total = 0 for asset in results_json.get("results", []): for iss in (asset.get("issues") or []): if not is_validator_internal_issue(iss): continue rule = iss.get("rule") or "?" code = iss.get("code") or "UNKNOWN" key = (rule, code) g = by_pair.setdefault(key, { "count": 0, "sample_msg": scrub_secrets((iss.get("msg") or "")[:200]), "severity": (iss.get("severity") or "").lower(), }) g["count"] += 1 total += 1 if not by_pair: return {"created": 0, "updated": 0} # Agentic path: Claude classifies + writes the issue body. try: from agentic_issues import is_available as _agent_available, review_and_decide except Exception as e: out(f" (agentic_issues import failed: {type(e).__name__}: {str(e)[:120]}); " f"using fallback policy") return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out) if not _agent_available(): out(" (agentic review unavailable; using fallback policy)") return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out) existing = _list_existing_internal_issues() out(f" agentic review: {len(by_pair)} group(s) vs {len(existing)} existing " f"validator-internal issue(s)") review = review_and_decide(by_pair, dataset, profile, total, existing, log_fn=out) if review is None or not review.get("decisions"): return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out) if review.get("summary"): out(f" agent summary: {review['summary'][:300]}") result = _execute_decisions(review["decisions"], by_pair, dataset, profile, total, out) result.update({"pairs": len(by_pair), "total": total, "agentic": True}) return result