Spaces:
Sleeping
Sleeping
Sync from simready-oem-library-pm@9cf6fb4c
Browse files- tools/hf_space/github_issues.py +64 -56
tools/hf_space/github_issues.py
CHANGED
|
@@ -84,48 +84,64 @@ def _add_comment(issue_num: int, body: str) -> None:
|
|
| 84 |
_gh_request("POST", f"/issues/{issue_num}/comments", {"body": body})
|
| 85 |
|
| 86 |
|
| 87 |
-
def
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
return (
|
| 90 |
-
f"**Validator-internal
|
| 91 |
-
f"
|
| 92 |
-
f"registration / spec loading is misbehaving
|
| 93 |
-
f"don't map to any real
|
|
|
|
| 94 |
f"| Field | Value |\n|---|---|\n"
|
| 95 |
-
f"| Rule | `{rule}` |\n"
|
| 96 |
-
f"| Code (as reported) | `{code}` |\n"
|
| 97 |
-
f"| Severity | {g.get('severity') or '?'} |\n"
|
| 98 |
-
f"| Occurrence count (first run) | {g['count']} |\n"
|
| 99 |
f"| Dataset | `{dataset}` |\n"
|
| 100 |
-
f"| Profile | `{profile}` |\n
|
| 101 |
-
f"
|
| 102 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 103 |
f"---\n"
|
| 104 |
f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). "
|
| 105 |
-
f"
|
| 106 |
-
f"here
|
| 107 |
)
|
| 108 |
|
| 109 |
|
| 110 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
return (
|
| 112 |
f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n"
|
| 113 |
-
f"
|
|
|
|
|
|
|
| 114 |
)
|
| 115 |
|
| 116 |
|
| 117 |
def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
|
| 118 |
log_fn=None) -> dict:
|
| 119 |
-
"""Scan results.json for validator-internal bugs and ensure
|
| 120 |
-
GitHub issue exists
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
out = log_fn or (lambda s: print(s, flush=True))
|
| 124 |
if not _gh_token():
|
| 125 |
out(" (skipping internal-issue tracking: no GH token)")
|
| 126 |
return {"skipped": True, "reason": "no_token"}
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
for asset in results_json.get("results", []):
|
| 130 |
for iss in (asset.get("issues") or []):
|
| 131 |
if not is_validator_internal_issue(iss):
|
|
@@ -133,45 +149,37 @@ def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
|
|
| 133 |
rule = iss.get("rule") or "?"
|
| 134 |
code = iss.get("code") or "UNKNOWN"
|
| 135 |
key = (rule, code)
|
| 136 |
-
g =
|
| 137 |
"count": 0,
|
| 138 |
"sample_msg": (iss.get("msg") or "")[:200],
|
| 139 |
"severity": (iss.get("severity") or "").lower(),
|
| 140 |
})
|
| 141 |
g["count"] += 1
|
|
|
|
| 142 |
|
| 143 |
-
if not
|
| 144 |
return {"created": 0, "updated": 0}
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
msg = f"{type(e).__name__}: {e}"
|
| 170 |
-
if "404" in msg:
|
| 171 |
-
out(f" ! internal-issue tracking aborted (404 β token lacks issues:write "
|
| 172 |
-
f"on {GH_REPO}); skipping {len(groups) - created - updated} remaining group(s)")
|
| 173 |
-
aborted = True
|
| 174 |
-
else:
|
| 175 |
-
out(f" ! internal-issue {rule}/{code} tracking failed: {msg}")
|
| 176 |
-
return {"created": created, "updated": updated, "groups": len(groups),
|
| 177 |
-
"aborted_404": aborted}
|
|
|
|
| 84 |
_gh_request("POST", f"/issues/{issue_num}/comments", {"body": body})
|
| 85 |
|
| 86 |
|
| 87 |
+
def _build_dataset_issue_body(by_pair: dict, dataset: str, profile: str,
|
| 88 |
+
total: int) -> str:
|
| 89 |
+
rows = "\n".join(
|
| 90 |
+
f"| `{rule}` | `{code}` | {g['severity'] or '?'} | {g['count']} | `{g['sample_msg']}` |"
|
| 91 |
+
for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
|
| 92 |
+
)
|
| 93 |
return (
|
| 94 |
+
f"**Validator-internal bugs on a single dataset** β surfaced during "
|
| 95 |
+
f"automatic SimReady validation. NOT a customer-asset finding; the "
|
| 96 |
+
f"validator's own rule registration / spec loading is misbehaving "
|
| 97 |
+
f"on this dataset and emitting errors that don't map to any real "
|
| 98 |
+
f"spec violation.\n\n"
|
| 99 |
f"| Field | Value |\n|---|---|\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
f"| Dataset | `{dataset}` |\n"
|
| 101 |
+
f"| Profile (first run) | `{profile}` |\n"
|
| 102 |
+
f"| Total internal occurrences (first run) | {total} |\n"
|
| 103 |
+
f"| Distinct (rule, code) pairs (first run) | {len(by_pair)} |\n\n"
|
| 104 |
+
f"**Breakdown** (sorted by occurrence count, descending):\n\n"
|
| 105 |
+
f"| Rule | Code | Severity | Count | Sample message |\n"
|
| 106 |
+
f"|---|---|---|---|---|\n{rows}\n\n"
|
| 107 |
f"---\n"
|
| 108 |
f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). "
|
| 109 |
+
f"One issue per dataset β re-validating the same dataset comments "
|
| 110 |
+
f"here with the new counts instead of opening a duplicate._"
|
| 111 |
)
|
| 112 |
|
| 113 |
|
| 114 |
+
def _build_dataset_recurrence_comment(by_pair: dict, dataset: str, profile: str,
|
| 115 |
+
total: int) -> str:
|
| 116 |
+
rows = "\n".join(
|
| 117 |
+
f"| `{rule}` | `{code}` | {g['count']} |"
|
| 118 |
+
for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
|
| 119 |
+
)
|
| 120 |
return (
|
| 121 |
f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n"
|
| 122 |
+
f"This run: **{total}** internal occurrences across **{len(by_pair)}** "
|
| 123 |
+
f"distinct (rule, code) pairs.\n\n"
|
| 124 |
+
f"| Rule | Code | Count this run |\n|---|---|---|\n{rows}"
|
| 125 |
)
|
| 126 |
|
| 127 |
|
| 128 |
def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
|
| 129 |
log_fn=None) -> dict:
|
| 130 |
+
"""Scan results.json for validator-internal bugs and ensure exactly ONE
|
| 131 |
+
tracking GitHub issue exists per dataset. The issue lists every
|
| 132 |
+
distinct (rule, code) pair found across the whole dataset; re-runs
|
| 133 |
+
add a comment with the new counts instead of opening duplicates.
|
| 134 |
+
|
| 135 |
+
Best-effort β swallowed exceptions return {"error": ...} so the
|
| 136 |
+
validator's verdict is never blocked on GitHub being flaky."""
|
| 137 |
out = log_fn or (lambda s: print(s, flush=True))
|
| 138 |
if not _gh_token():
|
| 139 |
out(" (skipping internal-issue tracking: no GH token)")
|
| 140 |
return {"skipped": True, "reason": "no_token"}
|
| 141 |
|
| 142 |
+
# Group across the whole dataset: (rule, code) β {count, sample, severity}
|
| 143 |
+
by_pair: dict[tuple[str, str], dict[str, Any]] = {}
|
| 144 |
+
total = 0
|
| 145 |
for asset in results_json.get("results", []):
|
| 146 |
for iss in (asset.get("issues") or []):
|
| 147 |
if not is_validator_internal_issue(iss):
|
|
|
|
| 149 |
rule = iss.get("rule") or "?"
|
| 150 |
code = iss.get("code") or "UNKNOWN"
|
| 151 |
key = (rule, code)
|
| 152 |
+
g = by_pair.setdefault(key, {
|
| 153 |
"count": 0,
|
| 154 |
"sample_msg": (iss.get("msg") or "")[:200],
|
| 155 |
"severity": (iss.get("severity") or "").lower(),
|
| 156 |
})
|
| 157 |
g["count"] += 1
|
| 158 |
+
total += 1
|
| 159 |
|
| 160 |
+
if not by_pair:
|
| 161 |
return {"created": 0, "updated": 0}
|
| 162 |
|
| 163 |
+
title = f"[validator-internal] {dataset}"
|
| 164 |
+
try:
|
| 165 |
+
existing = _find_issue(title)
|
| 166 |
+
if existing:
|
| 167 |
+
_add_comment(existing["number"],
|
| 168 |
+
_build_dataset_recurrence_comment(by_pair, dataset, profile, total))
|
| 169 |
+
out(f" internal-issue #{existing['number']}: comment added for dataset "
|
| 170 |
+
f"{dataset} ({total} occurrences, {len(by_pair)} pairs)")
|
| 171 |
+
return {"created": 0, "updated": 1, "pairs": len(by_pair), "total": total}
|
| 172 |
+
num = _create_issue(title,
|
| 173 |
+
_build_dataset_issue_body(by_pair, dataset, profile, total),
|
| 174 |
+
["validator-internal", "process"])
|
| 175 |
+
out(f" internal-issue #{num}: opened for dataset {dataset} "
|
| 176 |
+
f"({total} occurrences, {len(by_pair)} pairs)")
|
| 177 |
+
return {"created": 1, "updated": 0, "pairs": len(by_pair), "total": total}
|
| 178 |
+
except Exception as e:
|
| 179 |
+
msg = f"{type(e).__name__}: {e}"
|
| 180 |
+
if "404" in msg:
|
| 181 |
+
out(f" ! internal-issue tracking aborted (404 β token lacks issues:write "
|
| 182 |
+
f"on {GH_REPO})")
|
| 183 |
+
return {"created": 0, "updated": 0, "aborted_404": True}
|
| 184 |
+
out(f" ! internal-issue tracking for dataset {dataset} failed: {msg}")
|
| 185 |
+
return {"created": 0, "updated": 0, "error": msg}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|