vijaykumaredstellar commited on
Commit
828b2ac
Β·
verified Β·
1 Parent(s): 9869300

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -111
app.py CHANGED
@@ -1,6 +1,5 @@
1
  """
2
  Link Audit Tool β€” Gradio UI for Hugging Face Spaces
3
- Proper pause/resume via threading + Supabase persistence.
4
  """
5
 
6
  import gradio as gr
@@ -19,7 +18,6 @@ from db import (
19
  save_batch_results, update_run_status, delete_run,
20
  )
21
 
22
- # ─── Supabase Connection ───
23
  SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
24
  SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
25
  sb = None
@@ -29,10 +27,10 @@ if SUPABASE_URL and SUPABASE_KEY:
29
  sb.table("audit_runs").select("id").limit(1).execute()
30
  print("βœ… Supabase connected")
31
  except Exception as e:
32
- print(f"❌ Supabase connection failed: {e}")
33
  sb = None
34
 
35
- # ─── Audit State (thread-safe) ───
36
  class AuditState:
37
  def __init__(self):
38
  self.lock = threading.Lock()
@@ -41,30 +39,48 @@ class AuditState:
41
  self.run_id = None
42
 
43
  def request_pause(self):
44
- with self.lock:
45
- self.paused = True
46
 
47
  def resume(self):
48
- with self.lock:
49
- self.paused = False
50
 
51
  def is_paused(self):
52
- with self.lock:
53
- return self.paused
54
 
55
  def set_running(self, val, run_id=None):
56
  with self.lock:
57
  self.running = val
58
- if run_id:
59
- self.run_id = run_id
60
 
61
  def is_running(self):
62
- with self.lock:
63
- return self.running
64
 
65
  audit_state = AuditState()
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
68
  def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
69
  if sb is None:
70
  yield "❌ Supabase not connected.", ""
@@ -79,27 +95,21 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
79
  for col in df.columns:
80
  sample = str(df[col].iloc[0]).strip().lower()
81
  if sample.startswith('http') or domain in sample:
82
- url_col = col
83
- break
84
- if url_col is None:
85
- url_col = df.columns[0]
86
  urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
87
  except Exception as e:
88
- yield f"❌ File error: {e}", ""
89
- return
90
  elif pasted_urls and pasted_urls.strip():
91
  urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
92
 
93
  if not urls:
94
- yield "⚠ No valid URLs.", ""
95
- return
96
 
97
  seen = set()
98
  unique = []
99
  for u in urls:
100
- if u not in seen:
101
- seen.add(u)
102
- unique.append(u)
103
  urls = unique
104
 
105
  run_name = f"{domain} Audit β€” {datetime.now().strftime('%b %d %H:%M')} β€” {len(urls)} pages"
@@ -122,8 +132,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
122
  update_run_status(sb, run_id, "paused", c)
123
  log.append(f"⏸️ PAUSED at {c}/{total}")
124
  audit_state.set_running(False)
125
- yield "\n".join(log[-40:]), f"⏸️ Paused β€” {c}/{total}"
126
- return
127
 
128
  be = min(bs + batch_size, total)
129
  batch_urls = urls[bs:be]
@@ -132,14 +141,12 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
132
 
133
  for j, url in enumerate(batch_urls):
134
  if audit_state.is_paused():
135
- if batch_results:
136
- save_batch_results(sb, run_id, batch_results)
137
  c = get_completed_count(sb, run_id)
138
  update_run_status(sb, run_id, "paused", c)
139
  log.append(f"⏸️ PAUSED at {c}/{total}")
140
  audit_state.set_running(False)
141
- yield "\n".join(log[-40:]), f"⏸️ Paused β€” {c}/{total}"
142
- return
143
 
144
  gi = bs + j + 1
145
  elapsed = time.time() - start_time
@@ -165,8 +172,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
165
  log.append(f"[{gi}/{total}] {short} β€” Int:{result['int_count']} Ext:{result['ext_count']} {fs}")
166
 
167
  yield "\n".join(log[-40:]), f"πŸ“Š {gi}/{total} ({gi*100//total}%) Batch{batch_num} ETA:{eta_s}"
168
- if j < len(batch_urls) - 1:
169
- time.sleep(delay)
170
 
171
  if batch_results:
172
  try:
@@ -187,21 +193,18 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
187
  targets, pg_urls = set(), set()
188
  for r in all_results:
189
  pg_urls.add(r['url'].rstrip('/').split('?')[0])
190
- for lk in r.get('internal_links', []):
191
- targets.add(lk['url'].rstrip('/').split('?')[0])
192
  orphans = sorted([p for p in pg_urls if p not in targets])
193
 
194
  summary = {
195
- 'total_pages': len(all_results),
196
- 'total_int': sum(r.get('int_count', 0) for r in all_results),
197
- 'total_ext': sum(r.get('ext_count', 0) for r in all_results),
198
- 'total_broken': sum(r.get('broken_int_count', 0) + r.get('broken_ext_count', 0) for r in all_results),
199
- 'total_redirects': sum(r.get('redirect_int_count', 0) + r.get('redirect_ext_count', 0) for r in all_results),
200
- 'total_flags': sum(r.get('follow_flag_count', 0) for r in all_results),
201
- 'total_dups': sum(r.get('duplicate_count', 0) for r in all_results),
202
- 'total_sug': sum(len(r.get('suggestions', [])) for r in all_results),
203
- 'orphan_count': len(orphans),
204
- 'orphan_urls': orphans[:100],
205
  }
206
  update_run_status(sb, run_id, "completed", len(all_results), summary)
207
  tt = time.time() - start_time
@@ -214,9 +217,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
214
  except Exception as e:
215
  log.append(f"❌ {str(e)}")
216
  audit_state.set_running(False)
217
- try:
218
- c = get_completed_count(sb, run_id)
219
- update_run_status(sb, run_id, "paused", c)
220
  except: pass
221
  yield "\n".join(log[-40:]), "❌ Error β€” progress saved"
222
 
@@ -228,13 +229,16 @@ def pause_audit():
228
  return "No audit running."
229
 
230
 
231
- def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
 
232
  if sb is None:
233
- yield "❌ Supabase not connected.", ""
234
- return
 
 
 
235
  if not run_id:
236
- yield "⚠ Select a run first.", ""
237
- return
238
 
239
  all_urls = get_pending_urls(sb, run_id)
240
  done = get_completed_urls(sb, run_id)
@@ -242,12 +246,10 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
242
 
243
  if not remaining:
244
  update_run_status(sb, run_id, "completed", len(done))
245
- yield "βœ… Already complete!", ""
246
- return
247
 
248
  try:
249
- runs = get_all_runs(sb)
250
- rd = next((r for r in runs if r['id'] == run_id), None)
251
  if rd: domain = rd.get('domain', domain)
252
  except: pass
253
 
@@ -269,13 +271,11 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
269
  update_run_status(sb, run_id, "paused", c)
270
  log.append(f"⏸️ PAUSED {c}/{total}")
271
  audit_state.set_running(False)
272
- yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"
273
- return
274
 
275
  be = min(bs + batch_size, len(remaining))
276
  bu = remaining[bs:be]
277
- bn += 1
278
- br = []
279
 
280
  for j, url in enumerate(bu):
281
  if audit_state.is_paused():
@@ -284,8 +284,7 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
284
  update_run_status(sb, run_id, "paused", c)
285
  log.append(f"⏸️ PAUSED {c}/{total}")
286
  audit_state.set_running(False)
287
- yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"
288
- return
289
 
290
  gi = len(done) + bs + j + 1
291
  elapsed = time.time() - start_time
@@ -344,84 +343,70 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
344
  except Exception as e:
345
  log.append(f"❌ {str(e)}")
346
  audit_state.set_running(False)
347
- try:
348
- c = get_completed_count(sb, run_id)
349
- update_run_status(sb, run_id, "paused", c)
350
  except: pass
351
  yield "\n".join(log[-40:]), "❌ Error"
352
 
353
 
354
  # ═══════════════════════════════════════════════════
355
- # PAST RUNS β€” returns ONLY strings, no component objects
356
  # ═══════════════════════════════════════════════════
357
 
358
  def load_runs_html():
359
- """Returns HTML table only."""
360
- if sb is None:
361
- return "<p>❌ Supabase not connected</p>"
362
- runs = get_all_runs(sb)
363
- if not runs:
364
- return "<p>No saved runs yet.</p>"
365
-
366
  html = '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
367
  html += '<tr style="background:#f1f5f9;"><th style="padding:8px;text-align:left;">Run</th><th style="padding:8px;text-align:center;">Status</th><th style="padding:8px;text-align:center;">Pages</th><th style="padding:8px;text-align:center;">Broken</th><th style="padding:8px;text-align:center;">Flags</th><th style="padding:8px;text-align:center;">Dups</th><th style="padding:8px;text-align:center;">Orphans</th></tr>'
368
-
369
- for r in runs:
370
  s = r.get('summary', {}) or {}
371
  st = r.get('status', '?')
372
  sc = {'completed':'#059669','paused':'#d97706','running':'#2563eb'}.get(st,'#888')
373
- sb2 = {'completed':'rgba(5,150,105,0.1)','paused':'rgba(217,119,6,0.1)','running':'rgba(37,99,235,0.1)'}.get(st,'rgba(136,136,136,0.1)')
374
  cr = r.get('created_at','')[:16].replace('T',' ')
375
- html += f'<tr style="border-bottom:1px solid #e2e8f0;"><td style="padding:8px;"><b>{r.get("name","?")}</b><br><span style="font-size:10px;color:#94a3b8;">{cr}</span></td><td style="padding:8px;text-align:center;"><span style="background:{sb2};color:{sc};padding:2px 8px;border-radius:10px;font-size:10px;font-weight:700;">{st.upper()}</span></td><td style="padding:8px;text-align:center;font-weight:700;">{r.get("completed_urls",0)}/{r.get("total_urls",0)}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_broken","β€”")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_flags","β€”")}</td><td style="padding:8px;text-align:center;color:#db2777;font-weight:700;">{s.get("total_dups","β€”")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("orphan_count","β€”")}</td></tr>'
376
  html += '</table>'
377
  return html
378
 
379
 
380
  def load_runs_choices():
381
- """Returns list of (label, value) tuples for dropdown."""
382
- if sb is None:
383
- return []
384
- runs = get_all_runs(sb)
385
- if not runs:
386
- return []
387
  choices = []
388
- for r in runs:
389
  st = r.get('status', '?')
390
  label = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
391
- choices.append((label, r['id']))
392
  return choices
393
 
394
 
395
- def generate_report_for_run(run_id, domain):
396
- if sb is None or not run_id:
397
  return None, "❌ No run selected."
 
 
 
398
  try:
399
- run = None
400
- for r in get_all_runs(sb):
401
- if r['id'] == run_id:
402
- run = r
403
- break
404
  pages = get_all_page_results(sb, run_id)
405
- if not pages:
406
- return None, "⚠ No data."
407
  results = [p['result'] for p in pages]
408
  s = (run.get('summary', {}) or {}) if run else {}
409
  rh = generate_report(results, s.get('orphan_urls', []), run.get('domain', domain) if run else domain)
410
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Audit_')
411
- tmp.write(rh.encode('utf-8'))
412
- tmp.close()
413
  return tmp.name, f"βœ… Report β€” {len(results)} pages"
414
  except Exception as e:
415
  return None, f"❌ {str(e)}"
416
 
417
 
418
- def generate_csv_for_run(run_id):
419
- if sb is None or not run_id:
420
  return None, "❌ No run selected."
 
 
421
  try:
422
  pages = get_all_page_results(sb, run_id)
423
- if not pages:
424
- return None, "⚠ No data."
425
  rows = [{'URL': p['result'].get('url',''), 'Internal': p['result'].get('int_count',0),
426
  'External': p['result'].get('ext_count',0),
427
  'Broken': p['result'].get('broken_int_count',0)+p['result'].get('broken_ext_count',0),
@@ -429,16 +414,17 @@ def generate_csv_for_run(run_id):
429
  'Flags': p['result'].get('follow_flag_count',0),
430
  'Dups': p['result'].get('duplicate_count',0)} for p in pages]
431
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', prefix='Audit_')
432
- pd.DataFrame(rows).to_csv(tmp.name, index=False)
433
- tmp.close()
434
  return tmp.name, f"βœ… CSV β€” {len(rows)} rows"
435
  except Exception as e:
436
  return None, f"❌ {str(e)}"
437
 
438
 
439
- def delete_selected_run(run_id):
440
- if sb is None or not run_id:
441
  return "❌ No run selected."
 
 
442
  try:
443
  delete_run(sb, run_id)
444
  return "πŸ—‘οΈ Deleted. Click Refresh."
@@ -487,10 +473,12 @@ with gr.Blocks(title="Link Audit Tool", theme=gr.themes.Soft()) as app:
487
  pause_btn.click(api_name=False, fn=pause_audit, outputs=[progress_text])
488
 
489
  with gr.Tab("πŸ“ Past Runs"):
490
- with gr.Row():
491
- refresh_btn = gr.Button("πŸ”„ Refresh", variant="secondary")
492
  runs_html = gr.HTML(value="<p>Click Refresh to load.</p>")
493
- run_dropdown = gr.Dropdown(label="Select Run", choices=[], interactive=True, type="value")
 
 
 
494
 
495
  with gr.Row():
496
  report_btn = gr.Button("πŸ“Š HTML Report", variant="primary")
@@ -508,9 +496,9 @@ with gr.Blocks(title="Link Audit Tool", theme=gr.themes.Soft()) as app:
508
  resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
509
  resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop")
510
 
511
- # Refresh: update HTML and dropdown separately
512
- refresh_btn.click(api_name=False, fn=load_runs_html, outputs=[runs_html])
513
- refresh_btn.click(api_name=False, fn=load_runs_choices, outputs=[run_dropdown])
514
 
515
  report_btn.click(api_name=False, fn=generate_report_for_run, inputs=[run_dropdown, domain_input], outputs=[report_file, action_status])
516
  csv_btn.click(api_name=False, fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])
 
1
  """
2
  Link Audit Tool β€” Gradio UI for Hugging Face Spaces
 
3
  """
4
 
5
  import gradio as gr
 
18
  save_batch_results, update_run_status, delete_run,
19
  )
20
 
 
21
  SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
22
  SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
23
  sb = None
 
27
  sb.table("audit_runs").select("id").limit(1).execute()
28
  print("βœ… Supabase connected")
29
  except Exception as e:
30
+ print(f"❌ Supabase failed: {e}")
31
  sb = None
32
 
33
+
34
  class AuditState:
35
  def __init__(self):
36
  self.lock = threading.Lock()
 
39
  self.run_id = None
40
 
41
  def request_pause(self):
42
+ with self.lock: self.paused = True
 
43
 
44
  def resume(self):
45
+ with self.lock: self.paused = False
 
46
 
47
  def is_paused(self):
48
+ with self.lock: return self.paused
 
49
 
50
  def set_running(self, val, run_id=None):
51
  with self.lock:
52
  self.running = val
53
+ if run_id: self.run_id = run_id
 
54
 
55
  def is_running(self):
56
+ with self.lock: return self.running
 
57
 
58
  audit_state = AuditState()
59
 
60
+ # ─── Global runs cache for dropdown ───
61
+ _runs_cache = []
62
+
63
+ def _refresh_cache():
64
+ global _runs_cache
65
+ if sb is None:
66
+ _runs_cache = []
67
+ return
68
+ _runs_cache = get_all_runs(sb) or []
69
+
70
+ def _get_run_id_by_label(label):
71
+ """Look up run ID from dropdown label."""
72
+ for r in _runs_cache:
73
+ st = r.get('status', '?')
74
+ expected = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
75
+ if label == expected:
76
+ return r['id']
77
+ # Maybe it's a raw UUID
78
+ if label and len(label) > 30:
79
+ return label
80
+ return None
81
+
82
 
83
+ # ═══════════════════════════════════════════════════
84
  def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
85
  if sb is None:
86
  yield "❌ Supabase not connected.", ""
 
95
  for col in df.columns:
96
  sample = str(df[col].iloc[0]).strip().lower()
97
  if sample.startswith('http') or domain in sample:
98
+ url_col = col; break
99
+ if not url_col: url_col = df.columns[0]
 
 
100
  urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
101
  except Exception as e:
102
+ yield f"❌ File error: {e}", ""; return
 
103
  elif pasted_urls and pasted_urls.strip():
104
  urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
105
 
106
  if not urls:
107
+ yield "⚠ No valid URLs.", ""; return
 
108
 
109
  seen = set()
110
  unique = []
111
  for u in urls:
112
+ if u not in seen: seen.add(u); unique.append(u)
 
 
113
  urls = unique
114
 
115
  run_name = f"{domain} Audit β€” {datetime.now().strftime('%b %d %H:%M')} β€” {len(urls)} pages"
 
132
  update_run_status(sb, run_id, "paused", c)
133
  log.append(f"⏸️ PAUSED at {c}/{total}")
134
  audit_state.set_running(False)
135
+ yield "\n".join(log[-40:]), f"⏸️ Paused β€” {c}/{total}"; return
 
136
 
137
  be = min(bs + batch_size, total)
138
  batch_urls = urls[bs:be]
 
141
 
142
  for j, url in enumerate(batch_urls):
143
  if audit_state.is_paused():
144
+ if batch_results: save_batch_results(sb, run_id, batch_results)
 
145
  c = get_completed_count(sb, run_id)
146
  update_run_status(sb, run_id, "paused", c)
147
  log.append(f"⏸️ PAUSED at {c}/{total}")
148
  audit_state.set_running(False)
149
+ yield "\n".join(log[-40:]), f"⏸️ Paused β€” {c}/{total}"; return
 
150
 
151
  gi = bs + j + 1
152
  elapsed = time.time() - start_time
 
172
  log.append(f"[{gi}/{total}] {short} β€” Int:{result['int_count']} Ext:{result['ext_count']} {fs}")
173
 
174
  yield "\n".join(log[-40:]), f"πŸ“Š {gi}/{total} ({gi*100//total}%) Batch{batch_num} ETA:{eta_s}"
175
+ if j < len(batch_urls) - 1: time.sleep(delay)
 
176
 
177
  if batch_results:
178
  try:
 
193
  targets, pg_urls = set(), set()
194
  for r in all_results:
195
  pg_urls.add(r['url'].rstrip('/').split('?')[0])
196
+ for lk in r.get('internal_links', []): targets.add(lk['url'].rstrip('/').split('?')[0])
 
197
  orphans = sorted([p for p in pg_urls if p not in targets])
198
 
199
  summary = {
200
+ 'total_pages': len(all_results), 'total_int': sum(r.get('int_count',0) for r in all_results),
201
+ 'total_ext': sum(r.get('ext_count',0) for r in all_results),
202
+ 'total_broken': sum(r.get('broken_int_count',0)+r.get('broken_ext_count',0) for r in all_results),
203
+ 'total_redirects': sum(r.get('redirect_int_count',0)+r.get('redirect_ext_count',0) for r in all_results),
204
+ 'total_flags': sum(r.get('follow_flag_count',0) for r in all_results),
205
+ 'total_dups': sum(r.get('duplicate_count',0) for r in all_results),
206
+ 'total_sug': sum(len(r.get('suggestions',[])) for r in all_results),
207
+ 'orphan_count': len(orphans), 'orphan_urls': orphans[:100],
 
 
208
  }
209
  update_run_status(sb, run_id, "completed", len(all_results), summary)
210
  tt = time.time() - start_time
 
217
  except Exception as e:
218
  log.append(f"❌ {str(e)}")
219
  audit_state.set_running(False)
220
+ try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
 
 
221
  except: pass
222
  yield "\n".join(log[-40:]), "❌ Error β€” progress saved"
223
 
 
229
  return "No audit running."
230
 
231
 
232
+ # ═══════════════════════════════════════════════════
233
+ def resume_audit(run_label, domain, batch_size, timeout, delay, workers):
234
  if sb is None:
235
+ yield "❌ Supabase not connected.", ""; return
236
+ if not run_label:
237
+ yield "⚠ Select a run first (click Refresh, then pick from dropdown).", ""; return
238
+
239
+ run_id = _get_run_id_by_label(run_label)
240
  if not run_id:
241
+ yield f"❌ Could not find run for: {run_label}", ""; return
 
242
 
243
  all_urls = get_pending_urls(sb, run_id)
244
  done = get_completed_urls(sb, run_id)
 
246
 
247
  if not remaining:
248
  update_run_status(sb, run_id, "completed", len(done))
249
+ yield "βœ… Already complete!", ""; return
 
250
 
251
  try:
252
+ rd = next((r for r in _runs_cache if r['id'] == run_id), None)
 
253
  if rd: domain = rd.get('domain', domain)
254
  except: pass
255
 
 
271
  update_run_status(sb, run_id, "paused", c)
272
  log.append(f"⏸️ PAUSED {c}/{total}")
273
  audit_state.set_running(False)
274
+ yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return
 
275
 
276
  be = min(bs + batch_size, len(remaining))
277
  bu = remaining[bs:be]
278
+ bn += 1; br = []
 
279
 
280
  for j, url in enumerate(bu):
281
  if audit_state.is_paused():
 
284
  update_run_status(sb, run_id, "paused", c)
285
  log.append(f"⏸️ PAUSED {c}/{total}")
286
  audit_state.set_running(False)
287
+ yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return
 
288
 
289
  gi = len(done) + bs + j + 1
290
  elapsed = time.time() - start_time
 
343
  except Exception as e:
344
  log.append(f"❌ {str(e)}")
345
  audit_state.set_running(False)
346
+ try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
 
 
347
  except: pass
348
  yield "\n".join(log[-40:]), "❌ Error"
349
 
350
 
351
  # ═══════════════════════════════════════════════════
352
+ # PAST RUNS
353
  # ═══════════════════════════════════════════════════
354
 
355
  def load_runs_html():
356
+ _refresh_cache()
357
+ if not _runs_cache:
358
+ return "<p>No saved runs.</p>"
 
 
 
 
359
  html = '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
360
  html += '<tr style="background:#f1f5f9;"><th style="padding:8px;text-align:left;">Run</th><th style="padding:8px;text-align:center;">Status</th><th style="padding:8px;text-align:center;">Pages</th><th style="padding:8px;text-align:center;">Broken</th><th style="padding:8px;text-align:center;">Flags</th><th style="padding:8px;text-align:center;">Dups</th><th style="padding:8px;text-align:center;">Orphans</th></tr>'
361
+ for r in _runs_cache:
 
362
  s = r.get('summary', {}) or {}
363
  st = r.get('status', '?')
364
  sc = {'completed':'#059669','paused':'#d97706','running':'#2563eb'}.get(st,'#888')
365
+ bg = {'completed':'rgba(5,150,105,0.1)','paused':'rgba(217,119,6,0.1)','running':'rgba(37,99,235,0.1)'}.get(st,'rgba(136,136,136,0.1)')
366
  cr = r.get('created_at','')[:16].replace('T',' ')
367
+ html += f'<tr style="border-bottom:1px solid #e2e8f0;"><td style="padding:8px;"><b>{r.get("name","?")}</b><br><span style="font-size:10px;color:#94a3b8;">{cr}</span></td><td style="padding:8px;text-align:center;"><span style="background:{bg};color:{sc};padding:2px 8px;border-radius:10px;font-size:10px;font-weight:700;">{st.upper()}</span></td><td style="padding:8px;text-align:center;font-weight:700;">{r.get("completed_urls",0)}/{r.get("total_urls",0)}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_broken","β€”")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_flags","β€”")}</td><td style="padding:8px;text-align:center;color:#db2777;font-weight:700;">{s.get("total_dups","β€”")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("orphan_count","β€”")}</td></tr>'
368
  html += '</table>'
369
  return html
370
 
371
 
372
  def load_runs_choices():
373
+ """Return plain list of label strings for dropdown. Uses cache from load_runs_html."""
 
 
 
 
 
374
  choices = []
375
+ for r in _runs_cache:
376
  st = r.get('status', '?')
377
  label = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
378
+ choices.append(label)
379
  return choices
380
 
381
 
382
+ def generate_report_for_run(run_label, domain):
383
+ if sb is None or not run_label:
384
  return None, "❌ No run selected."
385
+ run_id = _get_run_id_by_label(run_label)
386
+ if not run_id:
387
+ return None, "❌ Run not found."
388
  try:
389
+ run = next((r for r in _runs_cache if r['id'] == run_id), None)
 
 
 
 
390
  pages = get_all_page_results(sb, run_id)
391
+ if not pages: return None, "⚠ No data."
 
392
  results = [p['result'] for p in pages]
393
  s = (run.get('summary', {}) or {}) if run else {}
394
  rh = generate_report(results, s.get('orphan_urls', []), run.get('domain', domain) if run else domain)
395
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Audit_')
396
+ tmp.write(rh.encode('utf-8')); tmp.close()
 
397
  return tmp.name, f"βœ… Report β€” {len(results)} pages"
398
  except Exception as e:
399
  return None, f"❌ {str(e)}"
400
 
401
 
402
+ def generate_csv_for_run(run_label):
403
+ if sb is None or not run_label:
404
  return None, "❌ No run selected."
405
+ run_id = _get_run_id_by_label(run_label)
406
+ if not run_id: return None, "❌ Run not found."
407
  try:
408
  pages = get_all_page_results(sb, run_id)
409
+ if not pages: return None, "⚠ No data."
 
410
  rows = [{'URL': p['result'].get('url',''), 'Internal': p['result'].get('int_count',0),
411
  'External': p['result'].get('ext_count',0),
412
  'Broken': p['result'].get('broken_int_count',0)+p['result'].get('broken_ext_count',0),
 
414
  'Flags': p['result'].get('follow_flag_count',0),
415
  'Dups': p['result'].get('duplicate_count',0)} for p in pages]
416
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', prefix='Audit_')
417
+ pd.DataFrame(rows).to_csv(tmp.name, index=False); tmp.close()
 
418
  return tmp.name, f"βœ… CSV β€” {len(rows)} rows"
419
  except Exception as e:
420
  return None, f"❌ {str(e)}"
421
 
422
 
423
+ def delete_selected_run(run_label):
424
+ if sb is None or not run_label:
425
  return "❌ No run selected."
426
+ run_id = _get_run_id_by_label(run_label)
427
+ if not run_id: return "❌ Run not found."
428
  try:
429
  delete_run(sb, run_id)
430
  return "πŸ—‘οΈ Deleted. Click Refresh."
 
473
  pause_btn.click(api_name=False, fn=pause_audit, outputs=[progress_text])
474
 
475
  with gr.Tab("πŸ“ Past Runs"):
476
+ refresh_btn = gr.Button("πŸ”„ Refresh", variant="secondary")
 
477
  runs_html = gr.HTML(value="<p>Click Refresh to load.</p>")
478
+
479
+ # Dropdown uses plain string labels (no tuples, no UUIDs as values)
480
+ # We look up the UUID from the label when needed
481
+ run_dropdown = gr.Dropdown(label="Select Run", choices=[], interactive=True, allow_custom_value=True)
482
 
483
  with gr.Row():
484
  report_btn = gr.Button("πŸ“Š HTML Report", variant="primary")
 
496
  resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
497
  resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop")
498
 
499
+ # Refresh: load HTML first (which refreshes cache), then update dropdown choices
500
+ refresh_btn.click(api_name=False, fn=load_runs_html, outputs=[runs_html]).then(
501
+ api_name=False, fn=load_runs_choices, outputs=[run_dropdown])
502
 
503
  report_btn.click(api_name=False, fn=generate_report_for_run, inputs=[run_dropdown, domain_input], outputs=[report_file, action_status])
504
  csv_btn.click(api_name=False, fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])