vijaykumaredstellar commited on
Commit
1af7764
Β·
verified Β·
1 Parent(s): 7f10996

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -75
app.py CHANGED
@@ -71,17 +71,21 @@ audit_state = AuditState()
71
  def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
72
  """Main audit generator β€” yields progress updates."""
73
  if sb is None:
74
- yield "❌ Supabase not connected. Set SUPABASE_URL and SUPABASE_KEY in Space secrets.", "", gr.update(), gr.update()
 
 
 
75
  return
76
 
77
  # Parse URLs
78
  urls = []
79
  if file is not None:
80
  try:
81
- if file.name.endswith('.csv'):
82
- df = pd.read_csv(file.name)
 
83
  else:
84
- df = pd.read_excel(file.name)
85
  url_col = None
86
  for col in df.columns:
87
  sample = str(df[col].iloc[0]).strip().lower()
@@ -92,13 +96,17 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
92
  url_col = df.columns[0]
93
  urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
94
  except Exception as e:
95
- yield f"❌ File error: {e}", "", gr.update(), gr.update()
 
 
96
  return
97
  elif pasted_urls and pasted_urls.strip():
98
  urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
99
 
100
  if not urls:
101
- yield "⚠ No valid URLs found. Upload a file or paste URLs.", "", gr.update(), gr.update()
 
 
102
  return
103
 
104
  # Deduplicate preserving order
@@ -117,21 +125,27 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
117
  audit_state.resume() # Reset pause flag
118
 
119
  total = len(urls)
 
 
 
120
  start_time = time.time()
121
  batch_num = 0
122
  log_lines = []
123
 
124
- yield f"πŸš€ Started: {run_name}\nπŸ“¦ {total} URLs Β· Batch size: {batch_size}", "", gr.update(interactive=True), gr.update(interactive=False)
 
 
125
 
126
  try:
127
  for batch_start in range(0, total, batch_size):
128
- # Check for pause
129
  if audit_state.is_paused():
130
  completed = get_completed_count(sb, run_id)
131
  update_run_status(sb, run_id, "paused", completed)
132
  log_lines.append(f"⏸️ PAUSED at {completed}/{total} β€” resume from Past Runs")
133
  audit_state.set_running(False)
134
- yield "\n".join(log_lines[-40:]), "", gr.update(interactive=False), gr.update(interactive=False)
 
 
135
  return
136
 
137
  batch_end = min(batch_start + batch_size, total)
@@ -140,16 +154,16 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
140
  batch_results = []
141
 
142
  for j, url in enumerate(batch_urls):
143
- # Check pause between each URL
144
  if audit_state.is_paused():
145
- # Save partial batch
146
  if batch_results:
147
  save_batch_results(sb, run_id, batch_results)
148
  completed = get_completed_count(sb, run_id)
149
  update_run_status(sb, run_id, "paused", completed)
150
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
151
  audit_state.set_running(False)
152
- yield "\n".join(log_lines[-40:]), "", gr.update(interactive=False), gr.update(interactive=False)
 
 
153
  return
154
 
155
  global_idx = batch_start + j + 1
@@ -179,13 +193,15 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
179
  flag_str = " Β· ".join(flags) if flags else "βœ…"
180
  log_lines.append(f"[{global_idx}/{total}] {short} β€” Int:{result['int_count']} Ext:{result['ext_count']} Β· {flag_str}")
181
 
182
- progress_text = f"πŸ“Š Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· Batch {batch_num} Β· ETA: {eta_str}"
183
- yield "\n".join(log_lines[-40:]), progress_text, gr.update(interactive=True), gr.update(interactive=False)
 
 
184
 
185
  if j < len(batch_urls) - 1:
186
  time.sleep(delay)
187
 
188
- # Save batch to Supabase
189
  if batch_results:
190
  try:
191
  save_batch_results(sb, run_id, batch_results)
@@ -195,12 +211,16 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
195
  except Exception as e:
196
  log_lines.append(f"⚠ Batch save error: {str(e)[:60]}")
197
 
198
- yield "\n".join(log_lines[-40:]), f"πŸ“Š Progress: {min(batch_end, total)}/{total} Β· Saved batch {batch_num}", gr.update(interactive=True), gr.update(interactive=False)
 
 
199
  del batch_results
200
 
201
  # ── ALL DONE β€” Orphan analysis ──
202
  log_lines.append("πŸ” Running orphan page analysis...")
203
- yield "\n".join(log_lines[-40:]), f"πŸ“Š Orphan analysis...", gr.update(interactive=False), gr.update(interactive=False)
 
 
204
 
205
  all_pages = get_all_page_results(sb, run_id)
206
  all_results = [p['result'] for p in all_pages]
@@ -233,15 +253,22 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
233
  log_lines.append("β†’ Go to Past Runs tab to generate report")
234
 
235
  audit_state.set_running(False)
236
- yield "\n".join(log_lines[-40:]), f"βœ… Complete β€” {len(all_results)} pages", gr.update(interactive=False), gr.update(interactive=False)
 
 
237
 
238
  except Exception as e:
239
  log_lines.append(f"❌ Error: {str(e)}")
240
  audit_state.set_running(False)
241
  if run_id:
242
- completed = get_completed_count(sb, run_id)
243
- update_run_status(sb, run_id, "paused", completed)
244
- yield "\n".join(log_lines[-40:]), f"❌ Error β€” saved progress to Supabase", gr.update(interactive=False), gr.update(interactive=False)
 
 
 
 
 
245
 
246
 
247
  def pause_audit():
@@ -259,11 +286,15 @@ def pause_audit():
259
  def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
260
  """Resume a paused/interrupted run."""
261
  if sb is None:
262
- yield "❌ Supabase not connected.", "", gr.update(), gr.update()
 
 
263
  return
264
 
265
  if not run_id:
266
- yield "⚠ No run selected.", "", gr.update(), gr.update()
 
 
267
  return
268
 
269
  all_urls_for_run = get_pending_urls(sb, run_id)
@@ -272,19 +303,35 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
272
 
273
  if not remaining:
274
  update_run_status(sb, run_id, "completed", len(done_urls))
275
- yield "βœ… All pages already audited!", "", gr.update(), gr.update()
 
 
276
  return
277
 
 
 
 
 
 
 
 
 
 
278
  audit_state.set_running(True, run_id)
279
  audit_state.resume()
280
  update_run_status(sb, run_id, "running")
281
 
282
  total = len(all_urls_for_run)
 
 
 
283
  start_time = time.time()
284
  batch_num = 0
285
  log_lines = [f"▢️ Resuming β€” {len(remaining)} pages remaining ({len(done_urls)} already done)"]
286
 
287
- yield "\n".join(log_lines), f"πŸ“Š Resuming: {len(done_urls)}/{total}", gr.update(interactive=True), gr.update(interactive=False)
 
 
288
 
289
  try:
290
  for batch_start in range(0, len(remaining), batch_size):
@@ -293,7 +340,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
293
  update_run_status(sb, run_id, "paused", completed)
294
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
295
  audit_state.set_running(False)
296
- yield "\n".join(log_lines[-40:]), "", gr.update(interactive=False), gr.update(interactive=False)
 
 
297
  return
298
 
299
  batch_end = min(batch_start + batch_size, len(remaining))
@@ -309,7 +358,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
309
  update_run_status(sb, run_id, "paused", completed)
310
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
311
  audit_state.set_running(False)
312
- yield "\n".join(log_lines[-40:]), "", gr.update(interactive=False), gr.update(interactive=False)
 
 
313
  return
314
 
315
  global_idx = len(done_urls) + batch_start + j + 1
@@ -334,7 +385,10 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
334
  flag_str = f"πŸ”΄ {b} broken" if b else "βœ…"
335
  log_lines.append(f"[{global_idx}/{total}] {short} Β· {flag_str}")
336
 
337
- yield "\n".join(log_lines[-40:]), f"πŸ“Š Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· ETA: {eta_str}", gr.update(interactive=True), gr.update(interactive=False)
 
 
 
338
  if j < len(batch_urls) - 1:
339
  time.sleep(delay)
340
 
@@ -347,7 +401,9 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
347
 
348
  # Orphan analysis
349
  log_lines.append("πŸ” Orphan analysis...")
350
- yield "\n".join(log_lines[-40:]), "πŸ“Š Orphan analysis...", gr.update(interactive=False), gr.update(interactive=False)
 
 
351
 
352
  all_pages = get_all_page_results(sb, run_id)
353
  all_results = [p['result'] for p in all_pages]
@@ -377,14 +433,21 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
377
  total_time = time.time() - start_time
378
  log_lines.append(f"βœ… COMPLETE! {len(all_results)} pages in {total_time:.0f}s Β· {len(orphans)} orphans")
379
  audit_state.set_running(False)
380
- yield "\n".join(log_lines[-40:]), f"βœ… Complete β€” {len(all_results)} pages", gr.update(interactive=False), gr.update(interactive=False)
 
 
381
 
382
  except Exception as e:
383
  log_lines.append(f"❌ Error: {str(e)}")
384
  audit_state.set_running(False)
385
- completed = get_completed_count(sb, run_id)
386
- update_run_status(sb, run_id, "paused", completed)
387
- yield "\n".join(log_lines[-40:]), "❌ Error", gr.update(interactive=False), gr.update(interactive=False)
 
 
 
 
 
388
 
389
 
390
  # ═══════════════════════════════════════════════════
@@ -393,13 +456,12 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
393
 
394
  def load_past_runs():
395
  if sb is None:
396
- return "<p>❌ Supabase not connected</p>", gr.update(choices=[], value=None)
397
 
398
  runs = get_all_runs(sb)
399
  if not runs:
400
- return "<p>No saved runs yet.</p>", gr.update(choices=[], value=None)
401
 
402
- # Build choices for dropdown
403
  choices = []
404
  for r in runs:
405
  status = r.get('status', 'unknown')
@@ -408,7 +470,6 @@ def load_past_runs():
408
  label = f"{r.get('name', 'Untitled')} [{status.upper()}] ({completed}/{total})"
409
  choices.append((label, r['id']))
410
 
411
- # Build HTML table
412
  html = '<div style="max-height:400px;overflow-y:auto;">'
413
  html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
414
  html += '<tr style="background:#f1f5f9;"><th style="padding:10px;text-align:left;">Run Name</th><th style="padding:10px;text-align:center;">Status</th><th style="padding:10px;text-align:center;">Pages</th><th style="padding:10px;text-align:center;">Broken</th><th style="padding:10px;text-align:center;">Flags</th><th style="padding:10px;text-align:center;">Dups</th><th style="padding:10px;text-align:center;">Orphans</th></tr>'
@@ -430,7 +491,7 @@ def load_past_runs():
430
  </tr>'''
431
 
432
  html += '</table></div>'
433
- return html, gr.update(choices=choices, value=choices[0][1] if choices else None)
434
 
435
 
436
  def generate_report_for_run(run_id, domain):
@@ -456,7 +517,6 @@ def generate_report_for_run(run_id, domain):
456
 
457
  report_html = generate_report(results, orphan_urls, report_domain)
458
 
459
- # Save to temp file
460
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Link_Audit_')
461
  tmp.write(report_html.encode('utf-8'))
462
  tmp.close()
@@ -516,21 +576,18 @@ css = """
516
  .main-header h1 { margin: 0 0 4px 0; font-size: 24px; }
517
  .main-header p { margin: 0; opacity: 0.8; font-size: 13px; }
518
  .status-bar { background: #f1f5f9; border: 1px solid #e2e8f0; border-radius: 8px; padding: 10px 16px; font-family: monospace; font-size: 13px; font-weight: 600; }
519
- .log-area textarea { font-family: 'JetBrains Mono', monospace !important; font-size: 12px !important; line-height: 1.6 !important; }
520
  """
521
 
522
  with gr.Blocks(css=css, title="πŸ”— Link Audit Tool", theme=gr.themes.Soft()) as app:
523
 
524
- # Header
525
  gr.HTML("""
526
  <div class="main-header">
527
- <p style="font-size:10px;font-weight:700;letter-spacing:1.5px;text-transform:uppercase;color:#93c5fd;margin-bottom:8px;">SEO Link Audit Tool</p>
528
  <h1>πŸ”— Bulk Link Audit</h1>
529
  <p>Upload URLs β†’ batch crawl with auto-save β†’ pause/resume anytime β†’ generate interactive report</p>
530
  </div>
531
  """)
532
 
533
- # Connection status
534
  conn_status = "βœ… Supabase Connected" if sb else "❌ Supabase Not Connected β€” add SUPABASE_URL and SUPABASE_KEY to Space secrets"
535
  gr.HTML(f'<div class="status-bar">πŸ—„οΈ {conn_status}</div>')
536
 
@@ -544,27 +601,24 @@ with gr.Blocks(css=css, title="πŸ”— Link Audit Tool", theme=gr.themes.Soft()) as
544
  pasted_urls = gr.Textbox(label="Or paste URLs (one per line)", lines=5, placeholder="https://www.example.com/blog/page1\nhttps://www.example.com/blog/page2")
545
 
546
  with gr.Column(scale=1):
547
- domain = gr.Textbox(label="Your Domain", value="edstellar.com")
548
- batch_size = gr.Slider(5, 50, value=25, step=5, label="Batch Size")
549
- timeout = gr.Slider(5, 60, value=15, step=5, label="Timeout (s)")
550
- delay = gr.Slider(0, 5, value=1.0, step=0.5, label="Delay between pages (s)")
551
- workers = gr.Slider(1, 10, value=5, step=1, label="Parallel link checks")
552
 
553
  with gr.Row():
554
  run_btn = gr.Button("πŸš€ Run Audit", variant="primary", scale=2)
555
  pause_btn = gr.Button("⏸️ Pause", variant="stop", scale=1, interactive=False)
556
 
557
- progress_text = gr.Textbox(label="Status", interactive=False, elem_classes=["status-bar"])
558
- log_output = gr.Textbox(label="Audit Log", lines=20, interactive=False, elem_classes=["log-area"])
559
 
560
- # Wire up run button (generator for streaming)
561
  run_btn.click(
562
  fn=run_audit,
563
- inputs=[file_input, pasted_urls, domain, batch_size, timeout, delay, workers],
564
  outputs=[log_output, progress_text, pause_btn, run_btn],
565
  )
566
-
567
- # Wire up pause button
568
  pause_btn.click(fn=pause_audit, outputs=[progress_text])
569
 
570
  # ═══ TAB 2: PAST RUNS ═══
@@ -583,33 +637,20 @@ with gr.Blocks(css=css, title="πŸ”— Link Audit Tool", theme=gr.themes.Soft()) as
583
  report_file = gr.File(label="Download Report", interactive=False)
584
  csv_file = gr.File(label="Download CSV", interactive=False)
585
 
586
- # Resume log & progress (shared with new audit display format)
587
- resume_progress = gr.Textbox(label="Resume Status", interactive=False, elem_classes=["status-bar"])
588
- resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False, elem_classes=["log-area"])
589
  resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop", interactive=False)
590
 
591
- # Refresh
592
  refresh_btn.click(fn=load_past_runs, outputs=[runs_html, run_dropdown])
593
 
594
- # Generate report
595
- def gen_report_wrapper(run_id, domain_val):
596
- filepath, msg = generate_report_for_run(run_id, domain_val)
597
- return filepath, msg
598
-
599
  report_btn.click(
600
- fn=gen_report_wrapper,
601
- inputs=[run_dropdown, domain],
602
  outputs=[report_file, action_status],
603
  )
604
 
605
- # CSV
606
- def csv_wrapper(run_id):
607
- filepath, msg = generate_csv_for_run(run_id)
608
- return filepath, msg
609
-
610
- csv_btn.click(fn=csv_wrapper, inputs=[run_dropdown], outputs=[csv_file, action_status])
611
 
612
- # Delete
613
  def delete_wrapper(run_id):
614
  msg = delete_selected_run(run_id)
615
  html, dropdown = load_past_runs()
@@ -617,15 +658,13 @@ with gr.Blocks(css=css, title="πŸ”— Link Audit Tool", theme=gr.themes.Soft()) as
617
 
618
  delete_btn.click(fn=delete_wrapper, inputs=[run_dropdown], outputs=[action_status, runs_html, run_dropdown])
619
 
620
- # Resume
621
  resume_btn.click(
622
  fn=resume_audit,
623
- inputs=[run_dropdown, domain, batch_size, timeout, delay, workers],
624
  outputs=[resume_log, resume_progress, resume_pause_btn, resume_btn],
625
  )
626
  resume_pause_btn.click(fn=pause_audit, outputs=[resume_progress])
627
 
628
- # Auto-load runs on startup
629
  app.load(fn=load_past_runs, outputs=[runs_html, run_dropdown])
630
 
631
 
 
71
  def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
72
  """Main audit generator β€” yields progress updates."""
73
  if sb is None:
74
+ yield ("❌ Supabase not connected. Set SUPABASE_URL and SUPABASE_KEY in Space secrets.",
75
+ "",
76
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
77
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
78
  return
79
 
80
  # Parse URLs
81
  urls = []
82
  if file is not None:
83
  try:
84
+ fpath = file.name if hasattr(file, 'name') else file
85
+ if str(fpath).endswith('.csv'):
86
+ df = pd.read_csv(fpath)
87
  else:
88
+ df = pd.read_excel(fpath)
89
  url_col = None
90
  for col in df.columns:
91
  sample = str(df[col].iloc[0]).strip().lower()
 
96
  url_col = df.columns[0]
97
  urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
98
  except Exception as e:
99
+ yield (f"❌ File error: {e}", "",
100
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
101
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
102
  return
103
  elif pasted_urls and pasted_urls.strip():
104
  urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
105
 
106
  if not urls:
107
+ yield ("⚠ No valid URLs found. Upload a file or paste URLs.", "",
108
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
109
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
110
  return
111
 
112
  # Deduplicate preserving order
 
125
  audit_state.resume() # Reset pause flag
126
 
127
  total = len(urls)
128
+ batch_size = int(batch_size)
129
+ timeout = int(timeout)
130
+ workers = int(workers)
131
  start_time = time.time()
132
  batch_num = 0
133
  log_lines = []
134
 
135
+ yield (f"πŸš€ Started: {run_name}\nπŸ“¦ {total} URLs Β· Batch size: {batch_size}", "",
136
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=True),
137
+ gr.Button(value="πŸš€ Running...", variant="primary", interactive=False))
138
 
139
  try:
140
  for batch_start in range(0, total, batch_size):
 
141
  if audit_state.is_paused():
142
  completed = get_completed_count(sb, run_id)
143
  update_run_status(sb, run_id, "paused", completed)
144
  log_lines.append(f"⏸️ PAUSED at {completed}/{total} β€” resume from Past Runs")
145
  audit_state.set_running(False)
146
+ yield ("\n".join(log_lines[-40:]), f"⏸️ Paused at {completed}/{total}",
147
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
148
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
149
  return
150
 
151
  batch_end = min(batch_start + batch_size, total)
 
154
  batch_results = []
155
 
156
  for j, url in enumerate(batch_urls):
 
157
  if audit_state.is_paused():
 
158
  if batch_results:
159
  save_batch_results(sb, run_id, batch_results)
160
  completed = get_completed_count(sb, run_id)
161
  update_run_status(sb, run_id, "paused", completed)
162
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
163
  audit_state.set_running(False)
164
+ yield ("\n".join(log_lines[-40:]), f"⏸️ Paused at {completed}/{total}",
165
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
166
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
167
  return
168
 
169
  global_idx = batch_start + j + 1
 
193
  flag_str = " Β· ".join(flags) if flags else "βœ…"
194
  log_lines.append(f"[{global_idx}/{total}] {short} β€” Int:{result['int_count']} Ext:{result['ext_count']} Β· {flag_str}")
195
 
196
+ progress_msg = f"πŸ“Š Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· Batch {batch_num} Β· ETA: {eta_str}"
197
+ yield ("\n".join(log_lines[-40:]), progress_msg,
198
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=True),
199
+ gr.Button(value="πŸš€ Running...", variant="primary", interactive=False))
200
 
201
  if j < len(batch_urls) - 1:
202
  time.sleep(delay)
203
 
204
+ # Save batch
205
  if batch_results:
206
  try:
207
  save_batch_results(sb, run_id, batch_results)
 
211
  except Exception as e:
212
  log_lines.append(f"⚠ Batch save error: {str(e)[:60]}")
213
 
214
+ yield ("\n".join(log_lines[-40:]), f"πŸ“Š Saved batch {batch_num} β€” {min(batch_end, total)}/{total}",
215
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=True),
216
+ gr.Button(value="πŸš€ Running...", variant="primary", interactive=False))
217
  del batch_results
218
 
219
  # ── ALL DONE β€” Orphan analysis ──
220
  log_lines.append("πŸ” Running orphan page analysis...")
221
+ yield ("\n".join(log_lines[-40:]), "πŸ“Š Orphan analysis...",
222
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
223
+ gr.Button(value="πŸš€ Running...", variant="primary", interactive=False))
224
 
225
  all_pages = get_all_page_results(sb, run_id)
226
  all_results = [p['result'] for p in all_pages]
 
253
  log_lines.append("β†’ Go to Past Runs tab to generate report")
254
 
255
  audit_state.set_running(False)
256
+ yield ("\n".join(log_lines[-40:]), f"βœ… Complete β€” {len(all_results)} pages",
257
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
258
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
259
 
260
  except Exception as e:
261
  log_lines.append(f"❌ Error: {str(e)}")
262
  audit_state.set_running(False)
263
  if run_id:
264
+ try:
265
+ completed = get_completed_count(sb, run_id)
266
+ update_run_status(sb, run_id, "paused", completed)
267
+ except:
268
+ pass
269
+ yield ("\n".join(log_lines[-40:]), f"❌ Error β€” saved progress to Supabase",
270
+ gr.Button(value="⏸️ Pause", variant="stop", interactive=False),
271
+ gr.Button(value="πŸš€ Run Audit", variant="primary", interactive=True))
272
 
273
 
274
  def pause_audit():
 
286
  def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
287
  """Resume a paused/interrupted run."""
288
  if sb is None:
289
+ yield ("❌ Supabase not connected.", "",
290
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
291
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
292
  return
293
 
294
  if not run_id:
295
+ yield ("⚠ No run selected.", "",
296
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
297
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
298
  return
299
 
300
  all_urls_for_run = get_pending_urls(sb, run_id)
 
303
 
304
  if not remaining:
305
  update_run_status(sb, run_id, "completed", len(done_urls))
306
+ yield ("βœ… All pages already audited!", "",
307
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
308
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
309
  return
310
 
311
+ # Get domain from run
312
+ try:
313
+ runs = get_all_runs(sb)
314
+ run_data = next((r for r in runs if r['id'] == run_id), None)
315
+ if run_data:
316
+ domain = run_data.get('domain', domain)
317
+ except:
318
+ pass
319
+
320
  audit_state.set_running(True, run_id)
321
  audit_state.resume()
322
  update_run_status(sb, run_id, "running")
323
 
324
  total = len(all_urls_for_run)
325
+ batch_size = int(batch_size)
326
+ timeout = int(timeout)
327
+ workers = int(workers)
328
  start_time = time.time()
329
  batch_num = 0
330
  log_lines = [f"▢️ Resuming β€” {len(remaining)} pages remaining ({len(done_urls)} already done)"]
331
 
332
+ yield ("\n".join(log_lines), f"πŸ“Š Resuming: {len(done_urls)}/{total}",
333
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=True),
334
+ gr.Button(value="▢️ Resuming...", variant="primary", interactive=False))
335
 
336
  try:
337
  for batch_start in range(0, len(remaining), batch_size):
 
340
  update_run_status(sb, run_id, "paused", completed)
341
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
342
  audit_state.set_running(False)
343
+ yield ("\n".join(log_lines[-40:]), f"⏸️ Paused at {completed}/{total}",
344
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
345
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
346
  return
347
 
348
  batch_end = min(batch_start + batch_size, len(remaining))
 
358
  update_run_status(sb, run_id, "paused", completed)
359
  log_lines.append(f"⏸️ PAUSED at {completed}/{total}")
360
  audit_state.set_running(False)
361
+ yield ("\n".join(log_lines[-40:]), f"⏸️ Paused at {completed}/{total}",
362
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
363
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
364
  return
365
 
366
  global_idx = len(done_urls) + batch_start + j + 1
 
385
  flag_str = f"πŸ”΄ {b} broken" if b else "βœ…"
386
  log_lines.append(f"[{global_idx}/{total}] {short} Β· {flag_str}")
387
 
388
+ yield ("\n".join(log_lines[-40:]),
389
+ f"πŸ“Š Progress: {global_idx}/{total} ({global_idx*100//total}%) Β· ETA: {eta_str}",
390
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=True),
391
+ gr.Button(value="▢️ Resuming...", variant="primary", interactive=False))
392
  if j < len(batch_urls) - 1:
393
  time.sleep(delay)
394
 
 
401
 
402
  # Orphan analysis
403
  log_lines.append("πŸ” Orphan analysis...")
404
+ yield ("\n".join(log_lines[-40:]), "πŸ“Š Orphan analysis...",
405
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
406
+ gr.Button(value="▢️ Resuming...", variant="primary", interactive=False))
407
 
408
  all_pages = get_all_page_results(sb, run_id)
409
  all_results = [p['result'] for p in all_pages]
 
433
  total_time = time.time() - start_time
434
  log_lines.append(f"βœ… COMPLETE! {len(all_results)} pages in {total_time:.0f}s Β· {len(orphans)} orphans")
435
  audit_state.set_running(False)
436
+ yield ("\n".join(log_lines[-40:]), f"βœ… Complete β€” {len(all_results)} pages",
437
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
438
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
439
 
440
  except Exception as e:
441
  log_lines.append(f"❌ Error: {str(e)}")
442
  audit_state.set_running(False)
443
+ try:
444
+ completed = get_completed_count(sb, run_id)
445
+ update_run_status(sb, run_id, "paused", completed)
446
+ except:
447
+ pass
448
+ yield ("\n".join(log_lines[-40:]), "❌ Error",
449
+ gr.Button(value="⏸️ Pause Resume", variant="stop", interactive=False),
450
+ gr.Button(value="▢️ Resume Audit", variant="primary", interactive=True))
451
 
452
 
453
  # ═══════════════════════════════════════════════════
 
456
 
457
  def load_past_runs():
458
  if sb is None:
459
+ return "<p>❌ Supabase not connected</p>", gr.Dropdown(choices=[], value=None)
460
 
461
  runs = get_all_runs(sb)
462
  if not runs:
463
+ return "<p>No saved runs yet.</p>", gr.Dropdown(choices=[], value=None)
464
 
 
465
  choices = []
466
  for r in runs:
467
  status = r.get('status', 'unknown')
 
470
  label = f"{r.get('name', 'Untitled')} [{status.upper()}] ({completed}/{total})"
471
  choices.append((label, r['id']))
472
 
 
473
  html = '<div style="max-height:400px;overflow-y:auto;">'
474
  html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
475
  html += '<tr style="background:#f1f5f9;"><th style="padding:10px;text-align:left;">Run Name</th><th style="padding:10px;text-align:center;">Status</th><th style="padding:10px;text-align:center;">Pages</th><th style="padding:10px;text-align:center;">Broken</th><th style="padding:10px;text-align:center;">Flags</th><th style="padding:10px;text-align:center;">Dups</th><th style="padding:10px;text-align:center;">Orphans</th></tr>'
 
491
  </tr>'''
492
 
493
  html += '</table></div>'
494
+ return html, gr.Dropdown(choices=choices, value=choices[0][1] if choices else None)
495
 
496
 
497
  def generate_report_for_run(run_id, domain):
 
517
 
518
  report_html = generate_report(results, orphan_urls, report_domain)
519
 
 
520
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Link_Audit_')
521
  tmp.write(report_html.encode('utf-8'))
522
  tmp.close()
 
576
  .main-header h1 { margin: 0 0 4px 0; font-size: 24px; }
577
  .main-header p { margin: 0; opacity: 0.8; font-size: 13px; }
578
  .status-bar { background: #f1f5f9; border: 1px solid #e2e8f0; border-radius: 8px; padding: 10px 16px; font-family: monospace; font-size: 13px; font-weight: 600; }
 
579
  """
580
 
581
  with gr.Blocks(css=css, title="πŸ”— Link Audit Tool", theme=gr.themes.Soft()) as app:
582
 
 
583
  gr.HTML("""
584
  <div class="main-header">
585
+ <p style="font-size:10px;font-weight:700;letter-spacing:1.5px;text-transform:uppercase;color:#93c5fd;margin-bottom:8px;">SEO LINK AUDIT TOOL</p>
586
  <h1>πŸ”— Bulk Link Audit</h1>
587
  <p>Upload URLs β†’ batch crawl with auto-save β†’ pause/resume anytime β†’ generate interactive report</p>
588
  </div>
589
  """)
590
 
 
591
  conn_status = "βœ… Supabase Connected" if sb else "❌ Supabase Not Connected β€” add SUPABASE_URL and SUPABASE_KEY to Space secrets"
592
  gr.HTML(f'<div class="status-bar">πŸ—„οΈ {conn_status}</div>')
593
 
 
601
  pasted_urls = gr.Textbox(label="Or paste URLs (one per line)", lines=5, placeholder="https://www.example.com/blog/page1\nhttps://www.example.com/blog/page2")
602
 
603
  with gr.Column(scale=1):
604
+ domain_input = gr.Textbox(label="Your Domain", value="edstellar.com")
605
+ batch_size_input = gr.Slider(5, 50, value=25, step=5, label="Batch Size")
606
+ timeout_input = gr.Slider(5, 60, value=15, step=5, label="Timeout (s)")
607
+ delay_input = gr.Slider(0, 5, value=1.0, step=0.5, label="Delay between pages (s)")
608
+ workers_input = gr.Slider(1, 10, value=5, step=1, label="Parallel link checks")
609
 
610
  with gr.Row():
611
  run_btn = gr.Button("πŸš€ Run Audit", variant="primary", scale=2)
612
  pause_btn = gr.Button("⏸️ Pause", variant="stop", scale=1, interactive=False)
613
 
614
+ progress_text = gr.Textbox(label="Status", interactive=False)
615
+ log_output = gr.Textbox(label="Audit Log", lines=20, interactive=False)
616
 
 
617
  run_btn.click(
618
  fn=run_audit,
619
+ inputs=[file_input, pasted_urls, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
620
  outputs=[log_output, progress_text, pause_btn, run_btn],
621
  )
 
 
622
  pause_btn.click(fn=pause_audit, outputs=[progress_text])
623
 
624
  # ═══ TAB 2: PAST RUNS ═══
 
637
  report_file = gr.File(label="Download Report", interactive=False)
638
  csv_file = gr.File(label="Download CSV", interactive=False)
639
 
640
+ resume_progress = gr.Textbox(label="Resume Status", interactive=False)
641
+ resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
 
642
  resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop", interactive=False)
643
 
 
644
  refresh_btn.click(fn=load_past_runs, outputs=[runs_html, run_dropdown])
645
 
 
 
 
 
 
646
  report_btn.click(
647
+ fn=generate_report_for_run,
648
+ inputs=[run_dropdown, domain_input],
649
  outputs=[report_file, action_status],
650
  )
651
 
652
+ csv_btn.click(fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])
 
 
 
 
 
653
 
 
654
  def delete_wrapper(run_id):
655
  msg = delete_selected_run(run_id)
656
  html, dropdown = load_past_runs()
 
658
 
659
  delete_btn.click(fn=delete_wrapper, inputs=[run_dropdown], outputs=[action_status, runs_html, run_dropdown])
660
 
 
661
  resume_btn.click(
662
  fn=resume_audit,
663
+ inputs=[run_dropdown, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
664
  outputs=[resume_log, resume_progress, resume_pause_btn, resume_btn],
665
  )
666
  resume_pause_btn.click(fn=pause_audit, outputs=[resume_progress])
667
 
 
668
  app.load(fn=load_past_runs, outputs=[runs_html, run_dropdown])
669
 
670