loginowskid commited on
Commit
d126183
Β·
verified Β·
1 Parent(s): c551136

Sync from simready-oem-library-pm@9cf6fb4c

Browse files
Files changed (1) hide show
  1. tools/hf_space/github_issues.py +64 -56
tools/hf_space/github_issues.py CHANGED
@@ -84,48 +84,64 @@ def _add_comment(issue_num: int, body: str) -> None:
84
  _gh_request("POST", f"/issues/{issue_num}/comments", {"body": body})
85
 
86
 
87
- def _build_internal_issue_body(rule: str, code: str, g: dict[str, Any],
88
- dataset: str, profile: str) -> str:
 
 
 
 
89
  return (
90
- f"**Validator-internal bug** β€” surfaced during automatic SimReady validation.\n\n"
91
- f"This is NOT a customer-asset finding. The validator's own rule "
92
- f"registration / spec loading is misbehaving and emitting errors that "
93
- f"don't map to any real spec violation. Track + fix here.\n\n"
 
94
  f"| Field | Value |\n|---|---|\n"
95
- f"| Rule | `{rule}` |\n"
96
- f"| Code (as reported) | `{code}` |\n"
97
- f"| Severity | {g.get('severity') or '?'} |\n"
98
- f"| Occurrence count (first run) | {g['count']} |\n"
99
  f"| Dataset | `{dataset}` |\n"
100
- f"| Profile | `{profile}` |\n\n"
101
- f"**Sample message** (truncated to 200 chars):\n\n"
102
- f"```\n{g['sample_msg']}\n```\n\n"
 
 
 
103
  f"---\n"
104
  f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). "
105
- f"Subsequent runs that re-hit the same (rule, code) pair will comment "
106
- f"here rather than open new issues._"
107
  )
108
 
109
 
110
- def _build_recurrence_comment(g: dict[str, Any], dataset: str, profile: str) -> str:
 
 
 
 
 
111
  return (
112
  f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n"
113
- f"Occurrences this run: {g['count']}, severity: {g.get('severity') or '?'}."
 
 
114
  )
115
 
116
 
117
  def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
118
  log_fn=None) -> dict:
119
- """Scan results.json for validator-internal bugs and ensure a tracking
120
- GitHub issue exists for each distinct (rule, code) pair. Best-effort β€”
121
- swallowed exceptions return {"error": ...} so the validator's verdict
122
- is never blocked on GitHub being flaky."""
 
 
 
123
  out = log_fn or (lambda s: print(s, flush=True))
124
  if not _gh_token():
125
  out(" (skipping internal-issue tracking: no GH token)")
126
  return {"skipped": True, "reason": "no_token"}
127
 
128
- groups: dict[tuple[str, str], dict[str, Any]] = {}
 
 
129
  for asset in results_json.get("results", []):
130
  for iss in (asset.get("issues") or []):
131
  if not is_validator_internal_issue(iss):
@@ -133,45 +149,37 @@ def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
133
  rule = iss.get("rule") or "?"
134
  code = iss.get("code") or "UNKNOWN"
135
  key = (rule, code)
136
- g = groups.setdefault(key, {
137
  "count": 0,
138
  "sample_msg": (iss.get("msg") or "")[:200],
139
  "severity": (iss.get("severity") or "").lower(),
140
  })
141
  g["count"] += 1
 
142
 
143
- if not groups:
144
  return {"created": 0, "updated": 0}
145
 
146
- created = updated = 0
147
- aborted = False
148
- # First 404 β†’ token doesn't have access to issues on this repo.
149
- # Bail out instead of trying all N groups, which would 404 each
150
- # and spam the log with 70+ identical errors.
151
- for (rule, code), g in groups.items():
152
- if aborted:
153
- break
154
- title = f"[validator-internal] {rule} β†’ {code}"
155
- try:
156
- existing = _find_issue(title)
157
- if existing:
158
- _add_comment(existing["number"],
159
- _build_recurrence_comment(g, dataset, profile))
160
- updated += 1
161
- out(f" internal-issue #{existing['number']}: comment added for {rule}/{code}")
162
- else:
163
- num = _create_issue(title,
164
- _build_internal_issue_body(rule, code, g, dataset, profile),
165
- ["validator-internal", "process"])
166
- created += 1
167
- out(f" internal-issue #{num}: opened for {rule}/{code}")
168
- except Exception as e:
169
- msg = f"{type(e).__name__}: {e}"
170
- if "404" in msg:
171
- out(f" ! internal-issue tracking aborted (404 β€” token lacks issues:write "
172
- f"on {GH_REPO}); skipping {len(groups) - created - updated} remaining group(s)")
173
- aborted = True
174
- else:
175
- out(f" ! internal-issue {rule}/{code} tracking failed: {msg}")
176
- return {"created": created, "updated": updated, "groups": len(groups),
177
- "aborted_404": aborted}
 
84
  _gh_request("POST", f"/issues/{issue_num}/comments", {"body": body})
85
 
86
 
87
+ def _build_dataset_issue_body(by_pair: dict, dataset: str, profile: str,
88
+ total: int) -> str:
89
+ rows = "\n".join(
90
+ f"| `{rule}` | `{code}` | {g['severity'] or '?'} | {g['count']} | `{g['sample_msg']}` |"
91
+ for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
92
+ )
93
  return (
94
+ f"**Validator-internal bugs on a single dataset** β€” surfaced during "
95
+ f"automatic SimReady validation. NOT a customer-asset finding; the "
96
+ f"validator's own rule registration / spec loading is misbehaving "
97
+ f"on this dataset and emitting errors that don't map to any real "
98
+ f"spec violation.\n\n"
99
  f"| Field | Value |\n|---|---|\n"
 
 
 
 
100
  f"| Dataset | `{dataset}` |\n"
101
+ f"| Profile (first run) | `{profile}` |\n"
102
+ f"| Total internal occurrences (first run) | {total} |\n"
103
+ f"| Distinct (rule, code) pairs (first run) | {len(by_pair)} |\n\n"
104
+ f"**Breakdown** (sorted by occurrence count, descending):\n\n"
105
+ f"| Rule | Code | Severity | Count | Sample message |\n"
106
+ f"|---|---|---|---|---|\n{rows}\n\n"
107
  f"---\n"
108
  f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). "
109
+ f"One issue per dataset β€” re-validating the same dataset comments "
110
+ f"here with the new counts instead of opening a duplicate._"
111
  )
112
 
113
 
114
+ def _build_dataset_recurrence_comment(by_pair: dict, dataset: str, profile: str,
115
+ total: int) -> str:
116
+ rows = "\n".join(
117
+ f"| `{rule}` | `{code}` | {g['count']} |"
118
+ for (rule, code), g in sorted(by_pair.items(), key=lambda kv: -kv[1]["count"])
119
+ )
120
  return (
121
  f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n"
122
+ f"This run: **{total}** internal occurrences across **{len(by_pair)}** "
123
+ f"distinct (rule, code) pairs.\n\n"
124
+ f"| Rule | Code | Count this run |\n|---|---|---|\n{rows}"
125
  )
126
 
127
 
128
  def ensure_internal_issues(results_json: dict, dataset: str, profile: str,
129
  log_fn=None) -> dict:
130
+ """Scan results.json for validator-internal bugs and ensure exactly ONE
131
+ tracking GitHub issue exists per dataset. The issue lists every
132
+ distinct (rule, code) pair found across the whole dataset; re-runs
133
+ add a comment with the new counts instead of opening duplicates.
134
+
135
+ Best-effort β€” swallowed exceptions return {"error": ...} so the
136
+ validator's verdict is never blocked on GitHub being flaky."""
137
  out = log_fn or (lambda s: print(s, flush=True))
138
  if not _gh_token():
139
  out(" (skipping internal-issue tracking: no GH token)")
140
  return {"skipped": True, "reason": "no_token"}
141
 
142
+ # Group across the whole dataset: (rule, code) β†’ {count, sample, severity}
143
+ by_pair: dict[tuple[str, str], dict[str, Any]] = {}
144
+ total = 0
145
  for asset in results_json.get("results", []):
146
  for iss in (asset.get("issues") or []):
147
  if not is_validator_internal_issue(iss):
 
149
  rule = iss.get("rule") or "?"
150
  code = iss.get("code") or "UNKNOWN"
151
  key = (rule, code)
152
+ g = by_pair.setdefault(key, {
153
  "count": 0,
154
  "sample_msg": (iss.get("msg") or "")[:200],
155
  "severity": (iss.get("severity") or "").lower(),
156
  })
157
  g["count"] += 1
158
+ total += 1
159
 
160
+ if not by_pair:
161
  return {"created": 0, "updated": 0}
162
 
163
+ title = f"[validator-internal] {dataset}"
164
+ try:
165
+ existing = _find_issue(title)
166
+ if existing:
167
+ _add_comment(existing["number"],
168
+ _build_dataset_recurrence_comment(by_pair, dataset, profile, total))
169
+ out(f" internal-issue #{existing['number']}: comment added for dataset "
170
+ f"{dataset} ({total} occurrences, {len(by_pair)} pairs)")
171
+ return {"created": 0, "updated": 1, "pairs": len(by_pair), "total": total}
172
+ num = _create_issue(title,
173
+ _build_dataset_issue_body(by_pair, dataset, profile, total),
174
+ ["validator-internal", "process"])
175
+ out(f" internal-issue #{num}: opened for dataset {dataset} "
176
+ f"({total} occurrences, {len(by_pair)} pairs)")
177
+ return {"created": 1, "updated": 0, "pairs": len(by_pair), "total": total}
178
+ except Exception as e:
179
+ msg = f"{type(e).__name__}: {e}"
180
+ if "404" in msg:
181
+ out(f" ! internal-issue tracking aborted (404 β€” token lacks issues:write "
182
+ f"on {GH_REPO})")
183
+ return {"created": 0, "updated": 0, "aborted_404": True}
184
+ out(f" ! internal-issue tracking for dataset {dataset} failed: {msg}")
185
+ return {"created": 0, "updated": 0, "error": msg}