Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,17 +71,21 @@ audit_state = AuditState()
|
|
| 71 |
def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
| 72 |
"""Main audit generator β yields progress updates."""
|
| 73 |
if sb is None:
|
| 74 |
-
yield "β Supabase not connected. Set SUPABASE_URL and SUPABASE_KEY in Space secrets.",
|
|
|
|
|
|
|
|
|
|
| 75 |
return
|
| 76 |
|
| 77 |
# Parse URLs
|
| 78 |
urls = []
|
| 79 |
if file is not None:
|
| 80 |
try:
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
else:
|
| 84 |
-
df = pd.read_excel(
|
| 85 |
url_col = None
|
| 86 |
for col in df.columns:
|
| 87 |
sample = str(df[col].iloc[0]).strip().lower()
|
|
@@ -92,13 +96,17 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 92 |
url_col = df.columns[0]
|
| 93 |
urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
|
| 94 |
except Exception as e:
|
| 95 |
-
yield f"β File error: {e}", "",
|
|
|
|
|
|
|
| 96 |
return
|
| 97 |
elif pasted_urls and pasted_urls.strip():
|
| 98 |
urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
|
| 99 |
|
| 100 |
if not urls:
|
| 101 |
-
yield "β No valid URLs found. Upload a file or paste URLs.", "",
|
|
|
|
|
|
|
| 102 |
return
|
| 103 |
|
| 104 |
# Deduplicate preserving order
|
|
@@ -117,21 +125,27 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 117 |
audit_state.resume() # Reset pause flag
|
| 118 |
|
| 119 |
total = len(urls)
|
|
|
|
|
|
|
|
|
|
| 120 |
start_time = time.time()
|
| 121 |
batch_num = 0
|
| 122 |
log_lines = []
|
| 123 |
|
| 124 |
-
yield f"π Started: {run_name}\nπ¦ {total} URLs Β· Batch size: {batch_size}", "",
|
|
|
|
|
|
|
| 125 |
|
| 126 |
try:
|
| 127 |
for batch_start in range(0, total, batch_size):
|
| 128 |
-
# Check for pause
|
| 129 |
if audit_state.is_paused():
|
| 130 |
completed = get_completed_count(sb, run_id)
|
| 131 |
update_run_status(sb, run_id, "paused", completed)
|
| 132 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total} β resume from Past Runs")
|
| 133 |
audit_state.set_running(False)
|
| 134 |
-
yield "\n".join(log_lines[-40:]), "
|
|
|
|
|
|
|
| 135 |
return
|
| 136 |
|
| 137 |
batch_end = min(batch_start + batch_size, total)
|
|
@@ -140,16 +154,16 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 140 |
batch_results = []
|
| 141 |
|
| 142 |
for j, url in enumerate(batch_urls):
|
| 143 |
-
# Check pause between each URL
|
| 144 |
if audit_state.is_paused():
|
| 145 |
-
# Save partial batch
|
| 146 |
if batch_results:
|
| 147 |
save_batch_results(sb, run_id, batch_results)
|
| 148 |
completed = get_completed_count(sb, run_id)
|
| 149 |
update_run_status(sb, run_id, "paused", completed)
|
| 150 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 151 |
audit_state.set_running(False)
|
| 152 |
-
yield "\n".join(log_lines[-40:]), "
|
|
|
|
|
|
|
| 153 |
return
|
| 154 |
|
| 155 |
global_idx = batch_start + j + 1
|
|
@@ -179,13 +193,15 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 179 |
flag_str = " Β· ".join(flags) if flags else "β
"
|
| 180 |
log_lines.append(f"[{global_idx}/{total}] {short} β Int:{result['int_count']} Ext:{result['ext_count']} Β· {flag_str}")
|
| 181 |
|
| 182 |
-
|
| 183 |
-
yield "\n".join(log_lines[-40:]),
|
|
|
|
|
|
|
| 184 |
|
| 185 |
if j < len(batch_urls) - 1:
|
| 186 |
time.sleep(delay)
|
| 187 |
|
| 188 |
-
# Save batch
|
| 189 |
if batch_results:
|
| 190 |
try:
|
| 191 |
save_batch_results(sb, run_id, batch_results)
|
|
@@ -195,12 +211,16 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 195 |
except Exception as e:
|
| 196 |
log_lines.append(f"β Batch save error: {str(e)[:60]}")
|
| 197 |
|
| 198 |
-
yield "\n".join(log_lines[-40:]), f"π
|
|
|
|
|
|
|
| 199 |
del batch_results
|
| 200 |
|
| 201 |
# ββ ALL DONE β Orphan analysis ββ
|
| 202 |
log_lines.append("π Running orphan page analysis...")
|
| 203 |
-
yield "\n".join(log_lines[-40:]),
|
|
|
|
|
|
|
| 204 |
|
| 205 |
all_pages = get_all_page_results(sb, run_id)
|
| 206 |
all_results = [p['result'] for p in all_pages]
|
|
@@ -233,15 +253,22 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
|
| 233 |
log_lines.append("β Go to Past Runs tab to generate report")
|
| 234 |
|
| 235 |
audit_state.set_running(False)
|
| 236 |
-
yield "\n".join(log_lines[-40:]), f"β
Complete β {len(all_results)} pages",
|
|
|
|
|
|
|
| 237 |
|
| 238 |
except Exception as e:
|
| 239 |
log_lines.append(f"β Error: {str(e)}")
|
| 240 |
audit_state.set_running(False)
|
| 241 |
if run_id:
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
|
| 247 |
def pause_audit():
|
|
@@ -259,11 +286,15 @@ def pause_audit():
|
|
| 259 |
def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
| 260 |
"""Resume a paused/interrupted run."""
|
| 261 |
if sb is None:
|
| 262 |
-
yield "β Supabase not connected.", "",
|
|
|
|
|
|
|
| 263 |
return
|
| 264 |
|
| 265 |
if not run_id:
|
| 266 |
-
yield "β No run selected.", "",
|
|
|
|
|
|
|
| 267 |
return
|
| 268 |
|
| 269 |
all_urls_for_run = get_pending_urls(sb, run_id)
|
|
@@ -272,19 +303,35 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 272 |
|
| 273 |
if not remaining:
|
| 274 |
update_run_status(sb, run_id, "completed", len(done_urls))
|
| 275 |
-
yield "β
All pages already audited!", "",
|
|
|
|
|
|
|
| 276 |
return
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
audit_state.set_running(True, run_id)
|
| 279 |
audit_state.resume()
|
| 280 |
update_run_status(sb, run_id, "running")
|
| 281 |
|
| 282 |
total = len(all_urls_for_run)
|
|
|
|
|
|
|
|
|
|
| 283 |
start_time = time.time()
|
| 284 |
batch_num = 0
|
| 285 |
log_lines = [f"βΆοΈ Resuming β {len(remaining)} pages remaining ({len(done_urls)} already done)"]
|
| 286 |
|
| 287 |
-
yield "\n".join(log_lines), f"π Resuming: {len(done_urls)}/{total}",
|
|
|
|
|
|
|
| 288 |
|
| 289 |
try:
|
| 290 |
for batch_start in range(0, len(remaining), batch_size):
|
|
@@ -293,7 +340,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 293 |
update_run_status(sb, run_id, "paused", completed)
|
| 294 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 295 |
audit_state.set_running(False)
|
| 296 |
-
yield "\n".join(log_lines[-40:]), "
|
|
|
|
|
|
|
| 297 |
return
|
| 298 |
|
| 299 |
batch_end = min(batch_start + batch_size, len(remaining))
|
|
@@ -309,7 +358,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 309 |
update_run_status(sb, run_id, "paused", completed)
|
| 310 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 311 |
audit_state.set_running(False)
|
| 312 |
-
yield "\n".join(log_lines[-40:]), "
|
|
|
|
|
|
|
| 313 |
return
|
| 314 |
|
| 315 |
global_idx = len(done_urls) + batch_start + j + 1
|
|
@@ -334,7 +385,10 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 334 |
flag_str = f"π΄ {b} broken" if b else "β
"
|
| 335 |
log_lines.append(f"[{global_idx}/{total}] {short} Β· {flag_str}")
|
| 336 |
|
| 337 |
-
yield "\n".join(log_lines[-40:]),
|
|
|
|
|
|
|
|
|
|
| 338 |
if j < len(batch_urls) - 1:
|
| 339 |
time.sleep(delay)
|
| 340 |
|
|
@@ -347,7 +401,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 347 |
|
| 348 |
# Orphan analysis
|
| 349 |
log_lines.append("π Orphan analysis...")
|
| 350 |
-
yield "\n".join(log_lines[-40:]), "π Orphan analysis...",
|
|
|
|
|
|
|
| 351 |
|
| 352 |
all_pages = get_all_page_results(sb, run_id)
|
| 353 |
all_results = [p['result'] for p in all_pages]
|
|
@@ -377,14 +433,21 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 377 |
total_time = time.time() - start_time
|
| 378 |
log_lines.append(f"β
COMPLETE! {len(all_results)} pages in {total_time:.0f}s Β· {len(orphans)} orphans")
|
| 379 |
audit_state.set_running(False)
|
| 380 |
-
yield "\n".join(log_lines[-40:]), f"β
Complete β {len(all_results)} pages",
|
|
|
|
|
|
|
| 381 |
|
| 382 |
except Exception as e:
|
| 383 |
log_lines.append(f"β Error: {str(e)}")
|
| 384 |
audit_state.set_running(False)
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
|
| 390 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -393,13 +456,12 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
|
| 393 |
|
| 394 |
def load_past_runs():
|
| 395 |
if sb is None:
|
| 396 |
-
return "<p>β Supabase not connected</p>", gr.
|
| 397 |
|
| 398 |
runs = get_all_runs(sb)
|
| 399 |
if not runs:
|
| 400 |
-
return "<p>No saved runs yet.</p>", gr.
|
| 401 |
|
| 402 |
-
# Build choices for dropdown
|
| 403 |
choices = []
|
| 404 |
for r in runs:
|
| 405 |
status = r.get('status', 'unknown')
|
|
@@ -408,7 +470,6 @@ def load_past_runs():
|
|
| 408 |
label = f"{r.get('name', 'Untitled')} [{status.upper()}] ({completed}/{total})"
|
| 409 |
choices.append((label, r['id']))
|
| 410 |
|
| 411 |
-
# Build HTML table
|
| 412 |
html = '<div style="max-height:400px;overflow-y:auto;">'
|
| 413 |
html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
|
| 414 |
html += '<tr style="background:#f1f5f9;"><th style="padding:10px;text-align:left;">Run Name</th><th style="padding:10px;text-align:center;">Status</th><th style="padding:10px;text-align:center;">Pages</th><th style="padding:10px;text-align:center;">Broken</th><th style="padding:10px;text-align:center;">Flags</th><th style="padding:10px;text-align:center;">Dups</th><th style="padding:10px;text-align:center;">Orphans</th></tr>'
|
|
@@ -430,7 +491,7 @@ def load_past_runs():
|
|
| 430 |
</tr>'''
|
| 431 |
|
| 432 |
html += '</table></div>'
|
| 433 |
-
return html, gr.
|
| 434 |
|
| 435 |
|
| 436 |
def generate_report_for_run(run_id, domain):
|
|
@@ -456,7 +517,6 @@ def generate_report_for_run(run_id, domain):
|
|
| 456 |
|
| 457 |
report_html = generate_report(results, orphan_urls, report_domain)
|
| 458 |
|
| 459 |
-
# Save to temp file
|
| 460 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Link_Audit_')
|
| 461 |
tmp.write(report_html.encode('utf-8'))
|
| 462 |
tmp.close()
|
|
@@ -516,21 +576,18 @@ css = """
|
|
| 516 |
.main-header h1 { margin: 0 0 4px 0; font-size: 24px; }
|
| 517 |
.main-header p { margin: 0; opacity: 0.8; font-size: 13px; }
|
| 518 |
.status-bar { background: #f1f5f9; border: 1px solid #e2e8f0; border-radius: 8px; padding: 10px 16px; font-family: monospace; font-size: 13px; font-weight: 600; }
|
| 519 |
-
.log-area textarea { font-family: 'JetBrains Mono', monospace !important; font-size: 12px !important; line-height: 1.6 !important; }
|
| 520 |
"""
|
| 521 |
|
| 522 |
with gr.Blocks(css=css, title="π Link Audit Tool", theme=gr.themes.Soft()) as app:
|
| 523 |
|
| 524 |
-
# Header
|
| 525 |
gr.HTML("""
|
| 526 |
<div class="main-header">
|
| 527 |
-
<p style="font-size:10px;font-weight:700;letter-spacing:1.5px;text-transform:uppercase;color:#93c5fd;margin-bottom:8px;">SEO
|
| 528 |
<h1>π Bulk Link Audit</h1>
|
| 529 |
<p>Upload URLs β batch crawl with auto-save β pause/resume anytime β generate interactive report</p>
|
| 530 |
</div>
|
| 531 |
""")
|
| 532 |
|
| 533 |
-
# Connection status
|
| 534 |
conn_status = "β
Supabase Connected" if sb else "β Supabase Not Connected β add SUPABASE_URL and SUPABASE_KEY to Space secrets"
|
| 535 |
gr.HTML(f'<div class="status-bar">ποΈ {conn_status}</div>')
|
| 536 |
|
|
@@ -544,27 +601,24 @@ with gr.Blocks(css=css, title="π Link Audit Tool", theme=gr.themes.Soft()) as
|
|
| 544 |
pasted_urls = gr.Textbox(label="Or paste URLs (one per line)", lines=5, placeholder="https://www.example.com/blog/page1\nhttps://www.example.com/blog/page2")
|
| 545 |
|
| 546 |
with gr.Column(scale=1):
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
|
| 553 |
with gr.Row():
|
| 554 |
run_btn = gr.Button("π Run Audit", variant="primary", scale=2)
|
| 555 |
pause_btn = gr.Button("βΈοΈ Pause", variant="stop", scale=1, interactive=False)
|
| 556 |
|
| 557 |
-
progress_text = gr.Textbox(label="Status", interactive=False
|
| 558 |
-
log_output = gr.Textbox(label="Audit Log", lines=20, interactive=False
|
| 559 |
|
| 560 |
-
# Wire up run button (generator for streaming)
|
| 561 |
run_btn.click(
|
| 562 |
fn=run_audit,
|
| 563 |
-
inputs=[file_input, pasted_urls,
|
| 564 |
outputs=[log_output, progress_text, pause_btn, run_btn],
|
| 565 |
)
|
| 566 |
-
|
| 567 |
-
# Wire up pause button
|
| 568 |
pause_btn.click(fn=pause_audit, outputs=[progress_text])
|
| 569 |
|
| 570 |
# βββ TAB 2: PAST RUNS βββ
|
|
@@ -583,33 +637,20 @@ with gr.Blocks(css=css, title="π Link Audit Tool", theme=gr.themes.Soft()) as
|
|
| 583 |
report_file = gr.File(label="Download Report", interactive=False)
|
| 584 |
csv_file = gr.File(label="Download CSV", interactive=False)
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False, elem_classes=["log-area"])
|
| 589 |
resume_pause_btn = gr.Button("βΈοΈ Pause Resume", variant="stop", interactive=False)
|
| 590 |
|
| 591 |
-
# Refresh
|
| 592 |
refresh_btn.click(fn=load_past_runs, outputs=[runs_html, run_dropdown])
|
| 593 |
|
| 594 |
-
# Generate report
|
| 595 |
-
def gen_report_wrapper(run_id, domain_val):
|
| 596 |
-
filepath, msg = generate_report_for_run(run_id, domain_val)
|
| 597 |
-
return filepath, msg
|
| 598 |
-
|
| 599 |
report_btn.click(
|
| 600 |
-
fn=
|
| 601 |
-
inputs=[run_dropdown,
|
| 602 |
outputs=[report_file, action_status],
|
| 603 |
)
|
| 604 |
|
| 605 |
-
|
| 606 |
-
def csv_wrapper(run_id):
|
| 607 |
-
filepath, msg = generate_csv_for_run(run_id)
|
| 608 |
-
return filepath, msg
|
| 609 |
-
|
| 610 |
-
csv_btn.click(fn=csv_wrapper, inputs=[run_dropdown], outputs=[csv_file, action_status])
|
| 611 |
|
| 612 |
-
# Delete
|
| 613 |
def delete_wrapper(run_id):
|
| 614 |
msg = delete_selected_run(run_id)
|
| 615 |
html, dropdown = load_past_runs()
|
|
@@ -617,15 +658,13 @@ with gr.Blocks(css=css, title="π Link Audit Tool", theme=gr.themes.Soft()) as
|
|
| 617 |
|
| 618 |
delete_btn.click(fn=delete_wrapper, inputs=[run_dropdown], outputs=[action_status, runs_html, run_dropdown])
|
| 619 |
|
| 620 |
-
# Resume
|
| 621 |
resume_btn.click(
|
| 622 |
fn=resume_audit,
|
| 623 |
-
inputs=[run_dropdown,
|
| 624 |
outputs=[resume_log, resume_progress, resume_pause_btn, resume_btn],
|
| 625 |
)
|
| 626 |
resume_pause_btn.click(fn=pause_audit, outputs=[resume_progress])
|
| 627 |
|
| 628 |
-
# Auto-load runs on startup
|
| 629 |
app.load(fn=load_past_runs, outputs=[runs_html, run_dropdown])
|
| 630 |
|
| 631 |
|
|
|
|
| 71 |
def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
|
| 72 |
"""Main audit generator β yields progress updates."""
|
| 73 |
if sb is None:
|
| 74 |
+
yield ("β Supabase not connected. Set SUPABASE_URL and SUPABASE_KEY in Space secrets.",
|
| 75 |
+
"",
|
| 76 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 77 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 78 |
return
|
| 79 |
|
| 80 |
# Parse URLs
|
| 81 |
urls = []
|
| 82 |
if file is not None:
|
| 83 |
try:
|
| 84 |
+
fpath = file.name if hasattr(file, 'name') else file
|
| 85 |
+
if str(fpath).endswith('.csv'):
|
| 86 |
+
df = pd.read_csv(fpath)
|
| 87 |
else:
|
| 88 |
+
df = pd.read_excel(fpath)
|
| 89 |
url_col = None
|
| 90 |
for col in df.columns:
|
| 91 |
sample = str(df[col].iloc[0]).strip().lower()
|
|
|
|
| 96 |
url_col = df.columns[0]
|
| 97 |
urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
|
| 98 |
except Exception as e:
|
| 99 |
+
yield (f"β File error: {e}", "",
|
| 100 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 101 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 102 |
return
|
| 103 |
elif pasted_urls and pasted_urls.strip():
|
| 104 |
urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
|
| 105 |
|
| 106 |
if not urls:
|
| 107 |
+
yield ("β No valid URLs found. Upload a file or paste URLs.", "",
|
| 108 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 109 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 110 |
return
|
| 111 |
|
| 112 |
# Deduplicate preserving order
|
|
|
|
| 125 |
audit_state.resume() # Reset pause flag
|
| 126 |
|
| 127 |
total = len(urls)
|
| 128 |
+
batch_size = int(batch_size)
|
| 129 |
+
timeout = int(timeout)
|
| 130 |
+
workers = int(workers)
|
| 131 |
start_time = time.time()
|
| 132 |
batch_num = 0
|
| 133 |
log_lines = []
|
| 134 |
|
| 135 |
+
yield (f"π Started: {run_name}\nπ¦ {total} URLs Β· Batch size: {batch_size}", "",
|
| 136 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=True),
|
| 137 |
+
gr.Button(value="π Running...", variant="primary", interactive=False))
|
| 138 |
|
| 139 |
try:
|
| 140 |
for batch_start in range(0, total, batch_size):
|
|
|
|
| 141 |
if audit_state.is_paused():
|
| 142 |
completed = get_completed_count(sb, run_id)
|
| 143 |
update_run_status(sb, run_id, "paused", completed)
|
| 144 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total} β resume from Past Runs")
|
| 145 |
audit_state.set_running(False)
|
| 146 |
+
yield ("\n".join(log_lines[-40:]), f"βΈοΈ Paused at {completed}/{total}",
|
| 147 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 148 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 149 |
return
|
| 150 |
|
| 151 |
batch_end = min(batch_start + batch_size, total)
|
|
|
|
| 154 |
batch_results = []
|
| 155 |
|
| 156 |
for j, url in enumerate(batch_urls):
|
|
|
|
| 157 |
if audit_state.is_paused():
|
|
|
|
| 158 |
if batch_results:
|
| 159 |
save_batch_results(sb, run_id, batch_results)
|
| 160 |
completed = get_completed_count(sb, run_id)
|
| 161 |
update_run_status(sb, run_id, "paused", completed)
|
| 162 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 163 |
audit_state.set_running(False)
|
| 164 |
+
yield ("\n".join(log_lines[-40:]), f"βΈοΈ Paused at {completed}/{total}",
|
| 165 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 166 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 167 |
return
|
| 168 |
|
| 169 |
global_idx = batch_start + j + 1
|
|
|
|
| 193 |
flag_str = " Β· ".join(flags) if flags else "β
"
|
| 194 |
log_lines.append(f"[{global_idx}/{total}] {short} β Int:{result['int_count']} Ext:{result['ext_count']} Β· {flag_str}")
|
| 195 |
|
| 196 |
+
progress_msg = f"π Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· Batch {batch_num} Β· ETA: {eta_str}"
|
| 197 |
+
yield ("\n".join(log_lines[-40:]), progress_msg,
|
| 198 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=True),
|
| 199 |
+
gr.Button(value="π Running...", variant="primary", interactive=False))
|
| 200 |
|
| 201 |
if j < len(batch_urls) - 1:
|
| 202 |
time.sleep(delay)
|
| 203 |
|
| 204 |
+
# Save batch
|
| 205 |
if batch_results:
|
| 206 |
try:
|
| 207 |
save_batch_results(sb, run_id, batch_results)
|
|
|
|
| 211 |
except Exception as e:
|
| 212 |
log_lines.append(f"β Batch save error: {str(e)[:60]}")
|
| 213 |
|
| 214 |
+
yield ("\n".join(log_lines[-40:]), f"π Saved batch {batch_num} β {min(batch_end, total)}/{total}",
|
| 215 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=True),
|
| 216 |
+
gr.Button(value="π Running...", variant="primary", interactive=False))
|
| 217 |
del batch_results
|
| 218 |
|
| 219 |
# ββ ALL DONE β Orphan analysis ββ
|
| 220 |
log_lines.append("π Running orphan page analysis...")
|
| 221 |
+
yield ("\n".join(log_lines[-40:]), "π Orphan analysis...",
|
| 222 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 223 |
+
gr.Button(value="π Running...", variant="primary", interactive=False))
|
| 224 |
|
| 225 |
all_pages = get_all_page_results(sb, run_id)
|
| 226 |
all_results = [p['result'] for p in all_pages]
|
|
|
|
| 253 |
log_lines.append("β Go to Past Runs tab to generate report")
|
| 254 |
|
| 255 |
audit_state.set_running(False)
|
| 256 |
+
yield ("\n".join(log_lines[-40:]), f"β
Complete β {len(all_results)} pages",
|
| 257 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 258 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 259 |
|
| 260 |
except Exception as e:
|
| 261 |
log_lines.append(f"β Error: {str(e)}")
|
| 262 |
audit_state.set_running(False)
|
| 263 |
if run_id:
|
| 264 |
+
try:
|
| 265 |
+
completed = get_completed_count(sb, run_id)
|
| 266 |
+
update_run_status(sb, run_id, "paused", completed)
|
| 267 |
+
except:
|
| 268 |
+
pass
|
| 269 |
+
yield ("\n".join(log_lines[-40:]), f"β Error β saved progress to Supabase",
|
| 270 |
+
gr.Button(value="βΈοΈ Pause", variant="stop", interactive=False),
|
| 271 |
+
gr.Button(value="π Run Audit", variant="primary", interactive=True))
|
| 272 |
|
| 273 |
|
| 274 |
def pause_audit():
|
|
|
|
| 286 |
def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
|
| 287 |
"""Resume a paused/interrupted run."""
|
| 288 |
if sb is None:
|
| 289 |
+
yield ("β Supabase not connected.", "",
|
| 290 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 291 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 292 |
return
|
| 293 |
|
| 294 |
if not run_id:
|
| 295 |
+
yield ("β No run selected.", "",
|
| 296 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 297 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 298 |
return
|
| 299 |
|
| 300 |
all_urls_for_run = get_pending_urls(sb, run_id)
|
|
|
|
| 303 |
|
| 304 |
if not remaining:
|
| 305 |
update_run_status(sb, run_id, "completed", len(done_urls))
|
| 306 |
+
yield ("β
All pages already audited!", "",
|
| 307 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 308 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 309 |
return
|
| 310 |
|
| 311 |
+
# Get domain from run
|
| 312 |
+
try:
|
| 313 |
+
runs = get_all_runs(sb)
|
| 314 |
+
run_data = next((r for r in runs if r['id'] == run_id), None)
|
| 315 |
+
if run_data:
|
| 316 |
+
domain = run_data.get('domain', domain)
|
| 317 |
+
except:
|
| 318 |
+
pass
|
| 319 |
+
|
| 320 |
audit_state.set_running(True, run_id)
|
| 321 |
audit_state.resume()
|
| 322 |
update_run_status(sb, run_id, "running")
|
| 323 |
|
| 324 |
total = len(all_urls_for_run)
|
| 325 |
+
batch_size = int(batch_size)
|
| 326 |
+
timeout = int(timeout)
|
| 327 |
+
workers = int(workers)
|
| 328 |
start_time = time.time()
|
| 329 |
batch_num = 0
|
| 330 |
log_lines = [f"βΆοΈ Resuming β {len(remaining)} pages remaining ({len(done_urls)} already done)"]
|
| 331 |
|
| 332 |
+
yield ("\n".join(log_lines), f"π Resuming: {len(done_urls)}/{total}",
|
| 333 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=True),
|
| 334 |
+
gr.Button(value="βΆοΈ Resuming...", variant="primary", interactive=False))
|
| 335 |
|
| 336 |
try:
|
| 337 |
for batch_start in range(0, len(remaining), batch_size):
|
|
|
|
| 340 |
update_run_status(sb, run_id, "paused", completed)
|
| 341 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 342 |
audit_state.set_running(False)
|
| 343 |
+
yield ("\n".join(log_lines[-40:]), f"βΈοΈ Paused at {completed}/{total}",
|
| 344 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 345 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 346 |
return
|
| 347 |
|
| 348 |
batch_end = min(batch_start + batch_size, len(remaining))
|
|
|
|
| 358 |
update_run_status(sb, run_id, "paused", completed)
|
| 359 |
log_lines.append(f"βΈοΈ PAUSED at {completed}/{total}")
|
| 360 |
audit_state.set_running(False)
|
| 361 |
+
yield ("\n".join(log_lines[-40:]), f"βΈοΈ Paused at {completed}/{total}",
|
| 362 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 363 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 364 |
return
|
| 365 |
|
| 366 |
global_idx = len(done_urls) + batch_start + j + 1
|
|
|
|
| 385 |
flag_str = f"π΄ {b} broken" if b else "β
"
|
| 386 |
log_lines.append(f"[{global_idx}/{total}] {short} Β· {flag_str}")
|
| 387 |
|
| 388 |
+
yield ("\n".join(log_lines[-40:]),
|
| 389 |
+
f"π Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· ETA: {eta_str}",
|
| 390 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=True),
|
| 391 |
+
gr.Button(value="βΆοΈ Resuming...", variant="primary", interactive=False))
|
| 392 |
if j < len(batch_urls) - 1:
|
| 393 |
time.sleep(delay)
|
| 394 |
|
|
|
|
| 401 |
|
| 402 |
# Orphan analysis
|
| 403 |
log_lines.append("π Orphan analysis...")
|
| 404 |
+
yield ("\n".join(log_lines[-40:]), "π Orphan analysis...",
|
| 405 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 406 |
+
gr.Button(value="βΆοΈ Resuming...", variant="primary", interactive=False))
|
| 407 |
|
| 408 |
all_pages = get_all_page_results(sb, run_id)
|
| 409 |
all_results = [p['result'] for p in all_pages]
|
|
|
|
| 433 |
total_time = time.time() - start_time
|
| 434 |
log_lines.append(f"β
COMPLETE! {len(all_results)} pages in {total_time:.0f}s Β· {len(orphans)} orphans")
|
| 435 |
audit_state.set_running(False)
|
| 436 |
+
yield ("\n".join(log_lines[-40:]), f"β
Complete β {len(all_results)} pages",
|
| 437 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 438 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 439 |
|
| 440 |
except Exception as e:
|
| 441 |
log_lines.append(f"β Error: {str(e)}")
|
| 442 |
audit_state.set_running(False)
|
| 443 |
+
try:
|
| 444 |
+
completed = get_completed_count(sb, run_id)
|
| 445 |
+
update_run_status(sb, run_id, "paused", completed)
|
| 446 |
+
except:
|
| 447 |
+
pass
|
| 448 |
+
yield ("\n".join(log_lines[-40:]), "β Error",
|
| 449 |
+
gr.Button(value="βΈοΈ Pause Resume", variant="stop", interactive=False),
|
| 450 |
+
gr.Button(value="βΆοΈ Resume Audit", variant="primary", interactive=True))
|
| 451 |
|
| 452 |
|
| 453 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 456 |
|
| 457 |
def load_past_runs():
|
| 458 |
if sb is None:
|
| 459 |
+
return "<p>β Supabase not connected</p>", gr.Dropdown(choices=[], value=None)
|
| 460 |
|
| 461 |
runs = get_all_runs(sb)
|
| 462 |
if not runs:
|
| 463 |
+
return "<p>No saved runs yet.</p>", gr.Dropdown(choices=[], value=None)
|
| 464 |
|
|
|
|
| 465 |
choices = []
|
| 466 |
for r in runs:
|
| 467 |
status = r.get('status', 'unknown')
|
|
|
|
| 470 |
label = f"{r.get('name', 'Untitled')} [{status.upper()}] ({completed}/{total})"
|
| 471 |
choices.append((label, r['id']))
|
| 472 |
|
|
|
|
| 473 |
html = '<div style="max-height:400px;overflow-y:auto;">'
|
| 474 |
html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
|
| 475 |
html += '<tr style="background:#f1f5f9;"><th style="padding:10px;text-align:left;">Run Name</th><th style="padding:10px;text-align:center;">Status</th><th style="padding:10px;text-align:center;">Pages</th><th style="padding:10px;text-align:center;">Broken</th><th style="padding:10px;text-align:center;">Flags</th><th style="padding:10px;text-align:center;">Dups</th><th style="padding:10px;text-align:center;">Orphans</th></tr>'
|
|
|
|
| 491 |
</tr>'''
|
| 492 |
|
| 493 |
html += '</table></div>'
|
| 494 |
+
return html, gr.Dropdown(choices=choices, value=choices[0][1] if choices else None)
|
| 495 |
|
| 496 |
|
| 497 |
def generate_report_for_run(run_id, domain):
|
|
|
|
| 517 |
|
| 518 |
report_html = generate_report(results, orphan_urls, report_domain)
|
| 519 |
|
|
|
|
| 520 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Link_Audit_')
|
| 521 |
tmp.write(report_html.encode('utf-8'))
|
| 522 |
tmp.close()
|
|
|
|
| 576 |
.main-header h1 { margin: 0 0 4px 0; font-size: 24px; }
|
| 577 |
.main-header p { margin: 0; opacity: 0.8; font-size: 13px; }
|
| 578 |
.status-bar { background: #f1f5f9; border: 1px solid #e2e8f0; border-radius: 8px; padding: 10px 16px; font-family: monospace; font-size: 13px; font-weight: 600; }
|
|
|
|
| 579 |
"""
|
| 580 |
|
| 581 |
with gr.Blocks(css=css, title="π Link Audit Tool", theme=gr.themes.Soft()) as app:
|
| 582 |
|
|
|
|
| 583 |
gr.HTML("""
|
| 584 |
<div class="main-header">
|
| 585 |
+
<p style="font-size:10px;font-weight:700;letter-spacing:1.5px;text-transform:uppercase;color:#93c5fd;margin-bottom:8px;">SEO LINK AUDIT TOOL</p>
|
| 586 |
<h1>π Bulk Link Audit</h1>
|
| 587 |
<p>Upload URLs β batch crawl with auto-save β pause/resume anytime β generate interactive report</p>
|
| 588 |
</div>
|
| 589 |
""")
|
| 590 |
|
|
|
|
| 591 |
conn_status = "β
Supabase Connected" if sb else "β Supabase Not Connected β add SUPABASE_URL and SUPABASE_KEY to Space secrets"
|
| 592 |
gr.HTML(f'<div class="status-bar">ποΈ {conn_status}</div>')
|
| 593 |
|
|
|
|
| 601 |
pasted_urls = gr.Textbox(label="Or paste URLs (one per line)", lines=5, placeholder="https://www.example.com/blog/page1\nhttps://www.example.com/blog/page2")
|
| 602 |
|
| 603 |
with gr.Column(scale=1):
|
| 604 |
+
domain_input = gr.Textbox(label="Your Domain", value="edstellar.com")
|
| 605 |
+
batch_size_input = gr.Slider(5, 50, value=25, step=5, label="Batch Size")
|
| 606 |
+
timeout_input = gr.Slider(5, 60, value=15, step=5, label="Timeout (s)")
|
| 607 |
+
delay_input = gr.Slider(0, 5, value=1.0, step=0.5, label="Delay between pages (s)")
|
| 608 |
+
workers_input = gr.Slider(1, 10, value=5, step=1, label="Parallel link checks")
|
| 609 |
|
| 610 |
with gr.Row():
|
| 611 |
run_btn = gr.Button("π Run Audit", variant="primary", scale=2)
|
| 612 |
pause_btn = gr.Button("βΈοΈ Pause", variant="stop", scale=1, interactive=False)
|
| 613 |
|
| 614 |
+
progress_text = gr.Textbox(label="Status", interactive=False)
|
| 615 |
+
log_output = gr.Textbox(label="Audit Log", lines=20, interactive=False)
|
| 616 |
|
|
|
|
| 617 |
run_btn.click(
|
| 618 |
fn=run_audit,
|
| 619 |
+
inputs=[file_input, pasted_urls, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
|
| 620 |
outputs=[log_output, progress_text, pause_btn, run_btn],
|
| 621 |
)
|
|
|
|
|
|
|
| 622 |
pause_btn.click(fn=pause_audit, outputs=[progress_text])
|
| 623 |
|
| 624 |
# βββ TAB 2: PAST RUNS βββ
|
|
|
|
| 637 |
report_file = gr.File(label="Download Report", interactive=False)
|
| 638 |
csv_file = gr.File(label="Download CSV", interactive=False)
|
| 639 |
|
| 640 |
+
resume_progress = gr.Textbox(label="Resume Status", interactive=False)
|
| 641 |
+
resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
|
|
|
|
| 642 |
resume_pause_btn = gr.Button("βΈοΈ Pause Resume", variant="stop", interactive=False)
|
| 643 |
|
|
|
|
| 644 |
refresh_btn.click(fn=load_past_runs, outputs=[runs_html, run_dropdown])
|
| 645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
report_btn.click(
|
| 647 |
+
fn=generate_report_for_run,
|
| 648 |
+
inputs=[run_dropdown, domain_input],
|
| 649 |
outputs=[report_file, action_status],
|
| 650 |
)
|
| 651 |
|
| 652 |
+
csv_btn.click(fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
|
|
|
|
| 654 |
def delete_wrapper(run_id):
|
| 655 |
msg = delete_selected_run(run_id)
|
| 656 |
html, dropdown = load_past_runs()
|
|
|
|
| 658 |
|
| 659 |
delete_btn.click(fn=delete_wrapper, inputs=[run_dropdown], outputs=[action_status, runs_html, run_dropdown])
|
| 660 |
|
|
|
|
| 661 |
resume_btn.click(
|
| 662 |
fn=resume_audit,
|
| 663 |
+
inputs=[run_dropdown, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
|
| 664 |
outputs=[resume_log, resume_progress, resume_pause_btn, resume_btn],
|
| 665 |
)
|
| 666 |
resume_pause_btn.click(fn=pause_audit, outputs=[resume_progress])
|
| 667 |
|
|
|
|
| 668 |
app.load(fn=load_past_runs, outputs=[runs_html, run_dropdown])
|
| 669 |
|
| 670 |
|