deepmage121 commited on
Commit
f53394d
Β·
1 Parent(s): 13f7398

going through changes

Browse files
Files changed (1) hide show
  1. app.py +40 -29
app.py CHANGED
@@ -155,53 +155,53 @@ def format_comment(
155
  num_failed = sum(1 for r in validation_results if not r.valid)
156
  total = len(validation_results)
157
 
 
 
 
 
 
 
 
158
  lines = [
159
- "## EEE Validation Report",
160
  f"**PR:** #{pr_num} | **Run:** {now}",
161
  "",
162
- "### Validation Results",
163
- "| File | Status | Details |",
164
- "|------|--------|---------|",
165
  ]
166
 
167
- for r in validation_results:
168
- if r.valid:
169
- type_label = "EvaluationLog" if r.file_type == "json" else "InstanceLevelEvaluationLog"
170
- lines.append(f"| `{r.file_path}` | PASS | Validated as {type_label} |")
171
- else:
172
- # Show first few errors to avoid extremely long comments
173
  error_summary = "; ".join(r.errors[:5])
174
  if len(r.errors) > 5:
175
  error_summary += f" ... and {len(r.errors) - 5} more error(s)"
176
- lines.append(f"| `{r.file_path}` | FAIL | {error_summary} |")
177
 
178
  # Dedup section
179
- lines.append("")
180
- lines.append("### Duplicate Check")
181
-
182
  has_any_dupes = False
 
183
  for dr in dedup_report.results:
184
  if dr.exact_duplicate_of:
185
- lines.append(
186
  f"- **Exact duplicate:** `{dr.file_path}` is identical to "
187
  f"existing `{dr.exact_duplicate_of}`"
188
  )
189
  has_any_dupes = True
190
  if dr.near_duplicate_of:
191
- lines.append(
192
  f"- **Potential near-duplicate:** `{dr.file_path}` shares fingerprint "
193
  f"with existing `{dr.near_duplicate_of}` "
194
  f"(identical content minus timestamps/UUIDs)"
195
  )
196
  has_any_dupes = True
197
 
198
- if not has_any_dupes:
199
- lines.append("- No exact or near duplicates found.")
200
-
201
- # Summary
202
- lines.append("")
203
- lines.append("### Summary")
204
- lines.append(f"{total} file(s) checked: {num_passed} passed, {num_failed} failed")
205
 
206
  return "\n".join(lines)
207
 
@@ -264,16 +264,27 @@ def process_pr(pr_num: int) -> dict:
264
  # Startup sweep β€” catch PRs missed while the Space was asleep
265
  # ---------------------------------------------------------------------------
266
 
267
- def pr_has_validation_comment(pr_num: int) -> bool:
268
- """Check if a PR already has an EEE Validation Report comment."""
269
  details = api.get_discussion_details(
270
  repo_id=DATASET_REPO_ID,
271
  discussion_num=pr_num,
272
  repo_type="dataset",
273
  )
 
 
274
  for event in details.events:
275
  if event.type == "comment" and event.content and event.content.startswith(REPORT_HEADER):
276
- return True
 
 
 
 
 
 
 
 
 
277
  return False
278
 
279
 
@@ -288,10 +299,10 @@ def startup_sweep() -> None:
288
  for disc in discussions:
289
  if not disc.is_pull_request or disc.status != "open":
290
  continue
291
- if pr_has_validation_comment(disc.num):
292
- logger.info("PR #%d already has a validation report, skipping", disc.num)
293
  continue
294
- logger.info("PR #%d has no validation report, processing", disc.num)
295
  try:
296
  process_pr(disc.num)
297
  except Exception:
 
155
  num_failed = sum(1 for r in validation_results if not r.valid)
156
  total = len(validation_results)
157
 
158
+ failed = [r for r in validation_results if not r.valid]
159
+
160
+ if num_failed == 0:
161
+ status_line = "## βœ… EEE Validation β€” Ready to Merge"
162
+ else:
163
+ status_line = "## ❌ EEE Validation β€” Changes Requested"
164
+
165
  lines = [
166
+ status_line,
167
  f"**PR:** #{pr_num} | **Run:** {now}",
168
  "",
169
+ f"**{num_passed}/{total} files passed**",
 
 
170
  ]
171
 
172
+ if num_failed > 0:
173
+ lines.append("")
174
+ lines.append("### Failures")
175
+ lines.append("| File | Details |")
176
+ lines.append("|------|---------|")
177
+ for r in failed:
178
  error_summary = "; ".join(r.errors[:5])
179
  if len(r.errors) > 5:
180
  error_summary += f" ... and {len(r.errors) - 5} more error(s)"
181
+ lines.append(f"| `{r.file_path}` | {error_summary} |")
182
 
183
  # Dedup section
 
 
 
184
  has_any_dupes = False
185
+ dedup_lines: list[str] = []
186
  for dr in dedup_report.results:
187
  if dr.exact_duplicate_of:
188
+ dedup_lines.append(
189
  f"- **Exact duplicate:** `{dr.file_path}` is identical to "
190
  f"existing `{dr.exact_duplicate_of}`"
191
  )
192
  has_any_dupes = True
193
  if dr.near_duplicate_of:
194
+ dedup_lines.append(
195
  f"- **Potential near-duplicate:** `{dr.file_path}` shares fingerprint "
196
  f"with existing `{dr.near_duplicate_of}` "
197
  f"(identical content minus timestamps/UUIDs)"
198
  )
199
  has_any_dupes = True
200
 
201
+ if has_any_dupes:
202
+ lines.append("")
203
+ lines.append("### Duplicate Check")
204
+ lines.extend(dedup_lines)
 
 
 
205
 
206
  return "\n".join(lines)
207
 
 
264
  # Startup sweep β€” catch PRs missed while the Space was asleep
265
  # ---------------------------------------------------------------------------
266
 
267
+ def pr_needs_validation(pr_num: int) -> bool:
268
+ """Check if a PR has commits newer than the last validation report."""
269
  details = api.get_discussion_details(
270
  repo_id=DATASET_REPO_ID,
271
  discussion_num=pr_num,
272
  repo_type="dataset",
273
  )
274
+ last_report_time = None
275
+ last_commit_time = None
276
  for event in details.events:
277
  if event.type == "comment" and event.content and event.content.startswith(REPORT_HEADER):
278
+ last_report_time = event.created_at
279
+ if event.type == "commit":
280
+ last_commit_time = event.created_at
281
+
282
+ # No report yet β€” needs validation
283
+ if last_report_time is None:
284
+ return True
285
+ # Has commits after the last report β€” needs re-validation
286
+ if last_commit_time is not None and last_commit_time > last_report_time:
287
+ return True
288
  return False
289
 
290
 
 
299
  for disc in discussions:
300
  if not disc.is_pull_request or disc.status != "open":
301
  continue
302
+ if not pr_needs_validation(disc.num):
303
+ logger.info("PR #%d is up to date, skipping", disc.num)
304
  continue
305
+ logger.info("PR #%d needs validation, processing", disc.num)
306
  try:
307
  process_pr(disc.num)
308
  except Exception: