Spaces:
Sleeping
Sleeping
Commit Β·
f53394d
1
Parent(s): 13f7398
going through changes
Browse files
app.py
CHANGED
|
@@ -155,53 +155,53 @@ def format_comment(
|
|
| 155 |
num_failed = sum(1 for r in validation_results if not r.valid)
|
| 156 |
total = len(validation_results)
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
lines = [
|
| 159 |
-
|
| 160 |
f"**PR:** #{pr_num} | **Run:** {now}",
|
| 161 |
"",
|
| 162 |
-
"
|
| 163 |
-
"| File | Status | Details |",
|
| 164 |
-
"|------|--------|---------|",
|
| 165 |
]
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
error_summary = "; ".join(r.errors[:5])
|
| 174 |
if len(r.errors) > 5:
|
| 175 |
error_summary += f" ... and {len(r.errors) - 5} more error(s)"
|
| 176 |
-
lines.append(f"| `{r.file_path}` |
|
| 177 |
|
| 178 |
# Dedup section
|
| 179 |
-
lines.append("")
|
| 180 |
-
lines.append("### Duplicate Check")
|
| 181 |
-
|
| 182 |
has_any_dupes = False
|
|
|
|
| 183 |
for dr in dedup_report.results:
|
| 184 |
if dr.exact_duplicate_of:
|
| 185 |
-
|
| 186 |
f"- **Exact duplicate:** `{dr.file_path}` is identical to "
|
| 187 |
f"existing `{dr.exact_duplicate_of}`"
|
| 188 |
)
|
| 189 |
has_any_dupes = True
|
| 190 |
if dr.near_duplicate_of:
|
| 191 |
-
|
| 192 |
f"- **Potential near-duplicate:** `{dr.file_path}` shares fingerprint "
|
| 193 |
f"with existing `{dr.near_duplicate_of}` "
|
| 194 |
f"(identical content minus timestamps/UUIDs)"
|
| 195 |
)
|
| 196 |
has_any_dupes = True
|
| 197 |
|
| 198 |
-
if
|
| 199 |
-
lines.append("
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
lines.append("")
|
| 203 |
-
lines.append("### Summary")
|
| 204 |
-
lines.append(f"{total} file(s) checked: {num_passed} passed, {num_failed} failed")
|
| 205 |
|
| 206 |
return "\n".join(lines)
|
| 207 |
|
|
@@ -264,16 +264,27 @@ def process_pr(pr_num: int) -> dict:
|
|
| 264 |
# Startup sweep β catch PRs missed while the Space was asleep
|
| 265 |
# ---------------------------------------------------------------------------
|
| 266 |
|
| 267 |
-
def
|
| 268 |
-
"""Check if a PR
|
| 269 |
details = api.get_discussion_details(
|
| 270 |
repo_id=DATASET_REPO_ID,
|
| 271 |
discussion_num=pr_num,
|
| 272 |
repo_type="dataset",
|
| 273 |
)
|
|
|
|
|
|
|
| 274 |
for event in details.events:
|
| 275 |
if event.type == "comment" and event.content and event.content.startswith(REPORT_HEADER):
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
return False
|
| 278 |
|
| 279 |
|
|
@@ -288,10 +299,10 @@ def startup_sweep() -> None:
|
|
| 288 |
for disc in discussions:
|
| 289 |
if not disc.is_pull_request or disc.status != "open":
|
| 290 |
continue
|
| 291 |
-
if
|
| 292 |
-
logger.info("PR #%d
|
| 293 |
continue
|
| 294 |
-
logger.info("PR #%d
|
| 295 |
try:
|
| 296 |
process_pr(disc.num)
|
| 297 |
except Exception:
|
|
|
|
| 155 |
num_failed = sum(1 for r in validation_results if not r.valid)
|
| 156 |
total = len(validation_results)
|
| 157 |
|
| 158 |
+
failed = [r for r in validation_results if not r.valid]
|
| 159 |
+
|
| 160 |
+
if num_failed == 0:
|
| 161 |
+
status_line = "## β
EEE Validation β Ready to Merge"
|
| 162 |
+
else:
|
| 163 |
+
status_line = "## β EEE Validation β Changes Requested"
|
| 164 |
+
|
| 165 |
lines = [
|
| 166 |
+
status_line,
|
| 167 |
f"**PR:** #{pr_num} | **Run:** {now}",
|
| 168 |
"",
|
| 169 |
+
f"**{num_passed}/{total} files passed**",
|
|
|
|
|
|
|
| 170 |
]
|
| 171 |
|
| 172 |
+
if num_failed > 0:
|
| 173 |
+
lines.append("")
|
| 174 |
+
lines.append("### Failures")
|
| 175 |
+
lines.append("| File | Details |")
|
| 176 |
+
lines.append("|------|---------|")
|
| 177 |
+
for r in failed:
|
| 178 |
error_summary = "; ".join(r.errors[:5])
|
| 179 |
if len(r.errors) > 5:
|
| 180 |
error_summary += f" ... and {len(r.errors) - 5} more error(s)"
|
| 181 |
+
lines.append(f"| `{r.file_path}` | {error_summary} |")
|
| 182 |
|
| 183 |
# Dedup section
|
|
|
|
|
|
|
|
|
|
| 184 |
has_any_dupes = False
|
| 185 |
+
dedup_lines: list[str] = []
|
| 186 |
for dr in dedup_report.results:
|
| 187 |
if dr.exact_duplicate_of:
|
| 188 |
+
dedup_lines.append(
|
| 189 |
f"- **Exact duplicate:** `{dr.file_path}` is identical to "
|
| 190 |
f"existing `{dr.exact_duplicate_of}`"
|
| 191 |
)
|
| 192 |
has_any_dupes = True
|
| 193 |
if dr.near_duplicate_of:
|
| 194 |
+
dedup_lines.append(
|
| 195 |
f"- **Potential near-duplicate:** `{dr.file_path}` shares fingerprint "
|
| 196 |
f"with existing `{dr.near_duplicate_of}` "
|
| 197 |
f"(identical content minus timestamps/UUIDs)"
|
| 198 |
)
|
| 199 |
has_any_dupes = True
|
| 200 |
|
| 201 |
+
if has_any_dupes:
|
| 202 |
+
lines.append("")
|
| 203 |
+
lines.append("### Duplicate Check")
|
| 204 |
+
lines.extend(dedup_lines)
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
return "\n".join(lines)
|
| 207 |
|
|
|
|
| 264 |
# Startup sweep β catch PRs missed while the Space was asleep
|
| 265 |
# ---------------------------------------------------------------------------
|
| 266 |
|
| 267 |
+
def pr_needs_validation(pr_num: int) -> bool:
|
| 268 |
+
"""Check if a PR has commits newer than the last validation report."""
|
| 269 |
details = api.get_discussion_details(
|
| 270 |
repo_id=DATASET_REPO_ID,
|
| 271 |
discussion_num=pr_num,
|
| 272 |
repo_type="dataset",
|
| 273 |
)
|
| 274 |
+
last_report_time = None
|
| 275 |
+
last_commit_time = None
|
| 276 |
for event in details.events:
|
| 277 |
if event.type == "comment" and event.content and event.content.startswith(REPORT_HEADER):
|
| 278 |
+
last_report_time = event.created_at
|
| 279 |
+
if event.type == "commit":
|
| 280 |
+
last_commit_time = event.created_at
|
| 281 |
+
|
| 282 |
+
# No report yet β needs validation
|
| 283 |
+
if last_report_time is None:
|
| 284 |
+
return True
|
| 285 |
+
# Has commits after the last report β needs re-validation
|
| 286 |
+
if last_commit_time is not None and last_commit_time > last_report_time:
|
| 287 |
+
return True
|
| 288 |
return False
|
| 289 |
|
| 290 |
|
|
|
|
| 299 |
for disc in discussions:
|
| 300 |
if not disc.is_pull_request or disc.status != "open":
|
| 301 |
continue
|
| 302 |
+
if not pr_needs_validation(disc.num):
|
| 303 |
+
logger.info("PR #%d is up to date, skipping", disc.num)
|
| 304 |
continue
|
| 305 |
+
logger.info("PR #%d needs validation, processing", disc.num)
|
| 306 |
try:
|
| 307 |
process_pr(disc.num)
|
| 308 |
except Exception:
|