Spaces:

nvidia
/

simready-validator

Sleeping

App Files Files Community

simready-validator / tools /hf_space /github_issues.py

loginowskid

Sync from simready-oem-library-pm@c858e9dd

cd53438 verified 3 days ago

raw

history blame contribute delete

14.5 kB

	"""Auto-file GitHub issues for validator-internal bugs.

	Mirror of `tools/hf_watch/validate.py::_ensure_validator_internal_issues`
	moved to the HF Space side so the Space can self-report tooling
	failures without depending on the GH Actions wrapper to do it.

	Policy reminder (from CLAUDE.md / project conventions):
	- GitHub Issues track NVIDIA-internal tooling problems only.
	- Customer-asset findings (real spec violations) stay on the dashboard;
	they do NOT become issues.
	- Distinguishing the two is `is_validator_internal_issue`'s job.

	Token: GH_VALIDATOR_TOKEN (or GITHUB_TOKEN) — set as a Space secret.
	A fine-grained PAT with `issues: read+write` on
	`NVIDIA-dev/simready-oem-library-pm` is enough; no other scope needed.
	"""
	from __future__ import annotations

	import json
	import os
	import re
	import urllib.parse
	import urllib.request
	from typing import Any


	GH_REPO = "NVIDIA-dev/simready-oem-library-pm"

	# Redact token-shaped strings before they land in a (public-ish) issue.
	# Covers GitHub PATs (gh[ps]_…), HF tokens (hf_…), and long base64-ish
	# secrets. Validator log text passes through here on its way into bodies.
	_SECRET_RE = re.compile(r"gh[ps]_\w+\|hf_\w+\|[A-Za-z0-9+/]{40,}={0,2}")


	def scrub_secrets(text: str) -> str:
	return _SECRET_RE.sub("[REDACTED]", text or "")


	def _gh_token() -> str \| None:
	return os.environ.get("GH_VALIDATOR_TOKEN") or os.environ.get("GITHUB_TOKEN")


	def is_validator_internal_issue(iss: dict) -> bool:
	"""Heuristic: distinguish validator-package crashes from real
	asset findings."""
	code = iss.get("code") or ""
	msg = iss.get("msg") or ""
	if code == "UNKNOWN" or code.startswith("SDK."):
	return True
	if "Uncaught error" in msg:
	return True
	if "is not registered to requirement" in msg:
	return True
	return False


	def _gh_request(method: str, path: str, payload: dict \| None = None) -> dict \| list:
	token = _gh_token()
	if not token:
	raise RuntimeError("no GitHub token in env (GH_VALIDATOR_TOKEN or GITHUB_TOKEN)")
	url = f"https://api.github.com/repos/{GH_REPO}{path}"
	headers = {
	"Accept": "application/vnd.github+json",
	"Authorization": f"Bearer {token}",
	"X-GitHub-Api-Version": "2022-11-28",
	"User-Agent": "simready-validator-space/0.1",
	}
	body = None
	if payload is not None:
	body = json.dumps(payload).encode("utf-8")
	headers["Content-Type"] = "application/json"
	req = urllib.request.Request(url, data=body, headers=headers, method=method)
	with urllib.request.urlopen(req, timeout=30) as r:
	return json.loads(r.read() or "null")


	def _find_issue(title: str) -> dict \| None:
	# Strip `"` — title is untrusted and a quote would break the
	# quoted-phrase search query (and could inject extra qualifiers).
	safe_title = title.replace('"', "")
	q = urllib.parse.quote(f'repo:{GH_REPO} in:title "{safe_title}" is:issue')
	result = _gh_request("GET", f"/../../search/issues?q={q}")
	items = (result or {}).get("items") or []
	for it in items:
	if it.get("title") == title:
	return it
	return None


	def _create_issue(title: str, body: str, labels: list[str]) -> int:
	result = _gh_request("POST", "/issues",
	{"title": title, "body": body, "labels": labels})
	return result.get("number", 0)


	def _add_comment(issue_num: int, body: str) -> None:
	_gh_request("POST", f"/issues/{issue_num}/comments", {"body": body})


	def _build_dataset_issue_body(by_pair: dict, dataset: str, profile: str,
	total: int) -> str:
	rows = "\n".join(
	f"\| `{rule}` \| `{code}` \| {g['severity'] or '?'} \| {g['count']} \| `{g['sample_msg']}` \|"
	for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
	)
	return (
	f"Validator-internal bugs on a single dataset — surfaced during "
	f"automatic SimReady validation. NOT a customer-asset finding; the "
	f"validator's own rule registration / spec loading is misbehaving "
	f"on this dataset and emitting errors that don't map to any real "
	f"spec violation.\n\n"
	f"\| Field \| Value \|\n\|---\|---\|\n"
	f"\| Dataset \| `{dataset}` \|\n"
	f"\| Profile (first run) \| `{profile}` \|\n"
	f"\| Total internal occurrences (first run) \| {total} \|\n"
	f"\| Distinct (rule, code) pairs (first run) \| {len(by_pair)} \|\n\n"
	f"Breakdown (sorted by occurrence count, descending):\n\n"
	f"\| Rule \| Code \| Severity \| Count \| Sample message \|\n"
	f"\|---\|---\|---\|---\|---\|\n{rows}\n\n"
	f"---\n"
	f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). "
	f"One issue per dataset — re-validating the same dataset comments "
	f"here with the new counts instead of opening a duplicate._"
	)


	def _build_dataset_recurrence_comment(by_pair: dict, dataset: str, profile: str,
	total: int) -> str:
	rows = "\n".join(
	f"\| `{rule}` \| `{code}` \| {g['count']} \|"
	for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
	)
	return (
	f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n"
	f"This run: {total} internal occurrences across {len(by_pair)} "
	f"distinct (rule, code) pairs.\n\n"
	f"\| Rule \| Code \| Count this run \|\n\|---\|---\|---\|\n{rows}"
	)


	def _list_existing_internal_issues() -> list[dict]:
	"""Pull every validator-internal issue (open + recently closed) so
	the agent can dedupe semantically against them. Closed issues are
	included because the same bug can come back after a fix is
	reverted or after a regression."""
	q = urllib.parse.quote(
	f'repo:{GH_REPO} label:"validator-internal" is:issue')
	try:
	result = _gh_request("GET", f"/../../search/issues?q={q}&per_page=100")
	except Exception:
	return []
	return (result or {}).get("items") or []


	def _execute_decisions(decisions: list[dict], by_pair: dict, dataset: str,
	profile: str, total: int, log_fn) -> dict:
	"""Carry out the agent's decisions. Returns counters."""
	out = log_fn
	created = updated = skipped = 0
	aborted = False
	# Map negative placeholder numbers → real issue numbers as `create`
	# decisions resolve. The agent uses negatives to cross-reference
	# decisions that share a parent issue when several groups roll up
	# into one new bug.
	placeholder_to_real: dict[int, int] = {}
	# Two passes: creates first (so their numbers are known), then
	# comments (so cross-references resolve). Skips are free.
	creates = [d for d in decisions if d.get("action") == "create"]
	comments = [d for d in decisions if d.get("action") == "comment"]
	skips = [d for d in decisions if d.get("action") == "skip"]
	for d in creates:
	if aborted: break
	title = d.get("title") or f"[validator-internal] {dataset} :: {d.get('rule')} / {d.get('code')}"
	body = d.get("body") or "(agent did not provide a body)"
	try:
	existing = _find_issue(title)
	if existing:
	_add_comment(existing["number"],
	f"_Re-hit during validation of `{dataset}` "
	f"(profile `{profile}`)._\n\n{body}")
	updated += 1
	out(f" internal-issue #{existing['number']}: comment added "
	f"(agent: create→existing match by title)")
	placeholder_to_real.setdefault(-(creates.index(d) + 1), existing["number"])
	else:
	num = _create_issue(title, body, ["validator-internal", "process", "agent-reviewed"])
	created += 1
	out(f" internal-issue #{num}: opened ({title!r}) — agent reasoning: "
	f"{d.get('reasoning', '')[:160]}")
	placeholder_to_real[-(creates.index(d) + 1)] = num
	except Exception as e:
	msg = f"{type(e).__name__}: {e}"
	if "404" in msg:
	out(f" ! internal-issue tracking aborted (404 — token lacks issues:write on {GH_REPO})")
	aborted = True
	else:
	out(f" ! create failed for {title!r}: {msg}")
	for d in comments:
	if aborted: break
	target = d.get("target_issue")
	if target is None:
	out(f" ! comment decision has no target_issue; skipping ({d.get('reasoning', '')[:100]})")
	continue
	if target < 0:
	target = placeholder_to_real.get(target)
	if target is None:
	out(f" ! comment decision cross-references an unresolved placeholder; skipping")
	continue
	body = d.get("body") or (
	f"Re-hit during validation of `{dataset}` (profile `{profile}`). "
	f"Same underlying bug as this issue — see agent reasoning: "
	f"{d.get('reasoning', '')}"
	)
	try:
	_add_comment(target, body)
	updated += 1
	out(f" internal-issue #{target}: comment added (agent: comment) — {d.get('reasoning', '')[:120]}")
	except Exception as e:
	msg = f"{type(e).__name__}: {e}"
	if "404" in msg:
	out(f" ! comment tracking aborted (404)")
	aborted = True
	else:
	out(f" ! comment failed for #{target}: {msg}")
	for d in skips:
	skipped += 1
	out(f" internal-issue {d.get('rule')}/{d.get('code')}: skipped — {d.get('reasoning', '')[:160]}")
	return {"created": created, "updated": updated, "skipped": skipped,
	"aborted_404": aborted}


	def _ensure_internal_issues_simple(by_pair: dict, dataset: str, profile: str,
	total: int, log_fn) -> dict:
	"""Fallback (no agentic review): one issue per dataset, dedup by
	exact title match. This is what we used before the agent was
	wired up; kept as a backstop for when ANTHROPIC_API_KEY is unset,
	the SDK is missing, or the Claude call fails."""
	out = log_fn
	title = f"[validator-internal] {dataset}"
	try:
	existing = _find_issue(title)
	if existing:
	_add_comment(existing["number"],
	_build_dataset_recurrence_comment(by_pair, dataset, profile, total))
	out(f" internal-issue #{existing['number']}: comment added for dataset "
	f"{dataset} ({total} occurrences, {len(by_pair)} pairs) — fallback policy")
	return {"created": 0, "updated": 1, "pairs": len(by_pair), "total": total,
	"fallback": True}
	num = _create_issue(title,
	_build_dataset_issue_body(by_pair, dataset, profile, total),
	["validator-internal", "process"])
	out(f" internal-issue #{num}: opened for dataset {dataset} "
	f"({total} occurrences, {len(by_pair)} pairs) — fallback policy")
	return {"created": 1, "updated": 0, "pairs": len(by_pair), "total": total,
	"fallback": True}
	except Exception as e:
	msg = f"{type(e).__name__}: {e}"
	if "404" in msg:
	out(f" ! internal-issue tracking aborted (404 — token lacks issues:write on {GH_REPO})")
	return {"created": 0, "updated": 0, "aborted_404": True, "fallback": True}
	out(f" ! internal-issue tracking for dataset {dataset} failed: {msg}")
	return {"created": 0, "updated": 0, "error": msg, "fallback": True}


	def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
	log_fn=None) -> dict:
	"""Scan results.json for validator-internal bugs and route them to
	GitHub issues via an agentic review pass (Claude) that classifies,
	dedupes against existing issues, and writes plain-language
	explanations. Falls back to a simple one-issue-per-dataset policy
	if the agent is unavailable.

	Best-effort — swallowed exceptions return {"error": ...} so the
	validator's verdict is never blocked on GitHub being flaky."""
	out = log_fn or (lambda s: print(s, flush=True))
	if not _gh_token():
	out(" (skipping internal-issue tracking: no GH token)")
	return {"skipped": True, "reason": "no_token"}

	# Group across the whole dataset: (rule, code) → {count, sample, severity}
	by_pair: dict[tuple[str, str], dict[str, Any]] = {}
	total = 0
	for asset in results_json.get("results", []):
	for iss in (asset.get("issues") or []):
	if not is_validator_internal_issue(iss):
	continue
	rule = iss.get("rule") or "?"
	code = iss.get("code") or "UNKNOWN"
	key = (rule, code)
	g = by_pair.setdefault(key, {
	"count": 0,
	"sample_msg": scrub_secrets((iss.get("msg") or "")[:200]),
	"severity": (iss.get("severity") or "").lower(),
	})
	g["count"] += 1
	total += 1

	if not by_pair:
	return {"created": 0, "updated": 0}

	# Agentic path: Claude classifies + writes the issue body.
	try:
	from agentic_issues import is_available as _agent_available, review_and_decide
	except Exception as e:
	out(f" (agentic_issues import failed: {type(e).__name__}: {str(e)[:120]}); "
	f"using fallback policy")
	return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out)

	if not _agent_available():
	out(" (agentic review unavailable; using fallback policy)")
	return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out)

	existing = _list_existing_internal_issues()
	out(f" agentic review: {len(by_pair)} group(s) vs {len(existing)} existing "
	f"validator-internal issue(s)")
	review = review_and_decide(by_pair, dataset, profile, total, existing, log_fn=out)
	if review is None or not review.get("decisions"):
	return _ensure_internal_issues_simple(by_pair, dataset, profile, total, out)

	if review.get("summary"):
	out(f" agent summary: {review['summary'][:300]}")
	result = _execute_decisions(review["decisions"], by_pair, dataset, profile, total, out)
	result.update({"pairs": len(by_pair), "total": total, "agentic": True})
	return result