Spaces:

edstellar
/

bulk-link-auditor

Sleeping

App Files Files Community

vijaykumaredstellar commited on Feb 25

Commit

828b2ac

verified ·

1 Parent(s): 9869300

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -111

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """
 Link Audit Tool — Gradio UI for Hugging Face Spaces
-Proper pause/resume via threading + Supabase persistence.
 """
 import gradio as gr
@@ -19,7 +18,6 @@ from db import (
     save_batch_results, update_run_status, delete_run,
 )
-# ─── Supabase Connection ───
 SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
 SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
 sb = None
@@ -29,10 +27,10 @@ if SUPABASE_URL and SUPABASE_KEY:
         sb.table("audit_runs").select("id").limit(1).execute()
         print("✅ Supabase connected")
     except Exception as e:
-        print(f"❌ Supabase connection failed: {e}")
         sb = None
-# ─── Audit State (thread-safe) ───
 class AuditState:
     def __init__(self):
         self.lock = threading.Lock()
@@ -41,30 +39,48 @@ class AuditState:
         self.run_id = None
     def request_pause(self):
-        with self.lock:
-            self.paused = True
     def resume(self):
-        with self.lock:
-            self.paused = False
     def is_paused(self):
-        with self.lock:
-            return self.paused
     def set_running(self, val, run_id=None):
         with self.lock:
             self.running = val
-            if run_id:
-                self.run_id = run_id
     def is_running(self):
-        with self.lock:
-            return self.running
 audit_state = AuditState()
 def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
     if sb is None:
         yield "❌ Supabase not connected.", ""
@@ -79,27 +95,21 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
             for col in df.columns:
                 sample = str(df[col].iloc[0]).strip().lower()
                 if sample.startswith('http') or domain in sample:
-                    url_col = col
-                    break
-            if url_col is None:
-                url_col = df.columns[0]
             urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
         except Exception as e:
-            yield f"❌ File error: {e}", ""
-            return
     elif pasted_urls and pasted_urls.strip():
         urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
     if not urls:
-        yield "⚠ No valid URLs.", ""
-        return
     seen = set()
     unique = []
     for u in urls:
-        if u not in seen:
-            seen.add(u)
-            unique.append(u)
     urls = unique
     run_name = f"{domain} Audit — {datetime.now().strftime('%b %d %H:%M')} — {len(urls)} pages"
@@ -122,8 +132,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
                 update_run_status(sb, run_id, "paused", c)
                 log.append(f"⏸️ PAUSED at {c}/{total}")
                 audit_state.set_running(False)
-                yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"
-                return
             be = min(bs + batch_size, total)
             batch_urls = urls[bs:be]
@@ -132,14 +141,12 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
             for j, url in enumerate(batch_urls):
                 if audit_state.is_paused():
-                    if batch_results:
-                        save_batch_results(sb, run_id, batch_results)
                     c = get_completed_count(sb, run_id)
                     update_run_status(sb, run_id, "paused", c)
                     log.append(f"⏸️ PAUSED at {c}/{total}")
                     audit_state.set_running(False)
-                    yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"
-                    return
                 gi = bs + j + 1
                 elapsed = time.time() - start_time
@@ -165,8 +172,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
                     log.append(f"[{gi}/{total}] {short} — Int:{result['int_count']} Ext:{result['ext_count']} {fs}")
                 yield "\n".join(log[-40:]), f"📊 {gi}/{total} ({gi*100//total}%) Batch{batch_num} ETA:{eta_s}"
-                if j < len(batch_urls) - 1:
-                    time.sleep(delay)
             if batch_results:
                 try:
@@ -187,21 +193,18 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
         targets, pg_urls = set(), set()
         for r in all_results:
             pg_urls.add(r['url'].rstrip('/').split('?')[0])
-            for lk in r.get('internal_links', []):
-                targets.add(lk['url'].rstrip('/').split('?')[0])
         orphans = sorted([p for p in pg_urls if p not in targets])
         summary = {
-            'total_pages': len(all_results),
-            'total_int': sum(r.get('int_count', 0) for r in all_results),
-            'total_ext': sum(r.get('ext_count', 0) for r in all_results),
-            'total_broken': sum(r.get('broken_int_count', 0) + r.get('broken_ext_count', 0) for r in all_results),
-            'total_redirects': sum(r.get('redirect_int_count', 0) + r.get('redirect_ext_count', 0) for r in all_results),
-            'total_flags': sum(r.get('follow_flag_count', 0) for r in all_results),
-            'total_dups': sum(r.get('duplicate_count', 0) for r in all_results),
-            'total_sug': sum(len(r.get('suggestions', [])) for r in all_results),
-            'orphan_count': len(orphans),
-            'orphan_urls': orphans[:100],
         }
         update_run_status(sb, run_id, "completed", len(all_results), summary)
         tt = time.time() - start_time
@@ -214,9 +217,7 @@ def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
     except Exception as e:
         log.append(f"❌ {str(e)}")
         audit_state.set_running(False)
-        try:
-            c = get_completed_count(sb, run_id)
-            update_run_status(sb, run_id, "paused", c)
         except: pass
         yield "\n".join(log[-40:]), "❌ Error — progress saved"
@@ -228,13 +229,16 @@ def pause_audit():
     return "No audit running."
-def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
     if sb is None:
-        yield "❌ Supabase not connected.", ""
-        return
     if not run_id:
-        yield "⚠ Select a run first.", ""
-        return
     all_urls = get_pending_urls(sb, run_id)
     done = get_completed_urls(sb, run_id)
@@ -242,12 +246,10 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
     if not remaining:
         update_run_status(sb, run_id, "completed", len(done))
-        yield "✅ Already complete!", ""
-        return
     try:
-        runs = get_all_runs(sb)
-        rd = next((r for r in runs if r['id'] == run_id), None)
         if rd: domain = rd.get('domain', domain)
     except: pass
@@ -269,13 +271,11 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
                 update_run_status(sb, run_id, "paused", c)
                 log.append(f"⏸️ PAUSED {c}/{total}")
                 audit_state.set_running(False)
-                yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"
-                return
             be = min(bs + batch_size, len(remaining))
             bu = remaining[bs:be]
-            bn += 1
-            br = []
             for j, url in enumerate(bu):
                 if audit_state.is_paused():
@@ -284,8 +284,7 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
                     update_run_status(sb, run_id, "paused", c)
                     log.append(f"⏸️ PAUSED {c}/{total}")
                     audit_state.set_running(False)
-                    yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"
-                    return
                 gi = len(done) + bs + j + 1
                 elapsed = time.time() - start_time
@@ -344,84 +343,70 @@ def resume_audit(run_id, domain, batch_size, timeout, delay, workers):
     except Exception as e:
         log.append(f"❌ {str(e)}")
         audit_state.set_running(False)
-        try:
-            c = get_completed_count(sb, run_id)
-            update_run_status(sb, run_id, "paused", c)
         except: pass
         yield "\n".join(log[-40:]), "❌ Error"
 # ═══════════════════════════════════════════════════
-#  PAST RUNS — returns ONLY strings, no component objects
 # ═══════════════════════════════════════════════════
 def load_runs_html():
-    """Returns HTML table only."""
-    if sb is None:
-        return "<p>❌ Supabase not connected</p>"
-    runs = get_all_runs(sb)
-    if not runs:
-        return "<p>No saved runs yet.</p>"
     html = '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
     html += '<tr style="background:#f1f5f9;"><th style="padding:8px;text-align:left;">Run</th><th style="padding:8px;text-align:center;">Status</th><th style="padding:8px;text-align:center;">Pages</th><th style="padding:8px;text-align:center;">Broken</th><th style="padding:8px;text-align:center;">Flags</th><th style="padding:8px;text-align:center;">Dups</th><th style="padding:8px;text-align:center;">Orphans</th></tr>'
-    for r in runs:
         s = r.get('summary', {}) or {}
         st = r.get('status', '?')
         sc = {'completed':'#059669','paused':'#d97706','running':'#2563eb'}.get(st,'#888')
-        sb2 = {'completed':'rgba(5,150,105,0.1)','paused':'rgba(217,119,6,0.1)','running':'rgba(37,99,235,0.1)'}.get(st,'rgba(136,136,136,0.1)')
         cr = r.get('created_at','')[:16].replace('T',' ')
-        html += f'<tr style="border-bottom:1px solid #e2e8f0;"><td style="padding:8px;"><b>{r.get("name","?")}</b><br><span style="font-size:10px;color:#94a3b8;">{cr}</span></td><td style="padding:8px;text-align:center;"><span style="background:{sb2};color:{sc};padding:2px 8px;border-radius:10px;font-size:10px;font-weight:700;">{st.upper()}</span></td><td style="padding:8px;text-align:center;font-weight:700;">{r.get("completed_urls",0)}/{r.get("total_urls",0)}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_broken","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_flags","—")}</td><td style="padding:8px;text-align:center;color:#db2777;font-weight:700;">{s.get("total_dups","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("orphan_count","—")}</td></tr>'
     html += '</table>'
     return html
 def load_runs_choices():
-    """Returns list of (label, value) tuples for dropdown."""
-    if sb is None:
-        return []
-    runs = get_all_runs(sb)
-    if not runs:
-        return []
     choices = []
-    for r in runs:
         st = r.get('status', '?')
         label = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
-        choices.append((label, r['id']))
     return choices
-def generate_report_for_run(run_id, domain):
-    if sb is None or not run_id:
         return None, "❌ No run selected."
     try:
-        run = None
-        for r in get_all_runs(sb):
-            if r['id'] == run_id:
-                run = r
-                break
         pages = get_all_page_results(sb, run_id)
-        if not pages:
-            return None, "⚠ No data."
         results = [p['result'] for p in pages]
         s = (run.get('summary', {}) or {}) if run else {}
         rh = generate_report(results, s.get('orphan_urls', []), run.get('domain', domain) if run else domain)
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Audit_')
-        tmp.write(rh.encode('utf-8'))
-        tmp.close()
         return tmp.name, f"✅ Report — {len(results)} pages"
     except Exception as e:
         return None, f"❌ {str(e)}"
-def generate_csv_for_run(run_id):
-    if sb is None or not run_id:
         return None, "❌ No run selected."
     try:
         pages = get_all_page_results(sb, run_id)
-        if not pages:
-            return None, "⚠ No data."
         rows = [{'URL': p['result'].get('url',''), 'Internal': p['result'].get('int_count',0),
                  'External': p['result'].get('ext_count',0),
                  'Broken': p['result'].get('broken_int_count',0)+p['result'].get('broken_ext_count',0),
@@ -429,16 +414,17 @@ def generate_csv_for_run(run_id):
                  'Flags': p['result'].get('follow_flag_count',0),
                  'Dups': p['result'].get('duplicate_count',0)} for p in pages]
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', prefix='Audit_')
-        pd.DataFrame(rows).to_csv(tmp.name, index=False)
-        tmp.close()
         return tmp.name, f"✅ CSV — {len(rows)} rows"
     except Exception as e:
         return None, f"❌ {str(e)}"
-def delete_selected_run(run_id):
-    if sb is None or not run_id:
         return "❌ No run selected."
     try:
         delete_run(sb, run_id)
         return "🗑️ Deleted. Click Refresh."
@@ -487,10 +473,12 @@ with gr.Blocks(title="Link Audit Tool", theme=gr.themes.Soft()) as app:
             pause_btn.click(api_name=False, fn=pause_audit, outputs=[progress_text])
         with gr.Tab("📁 Past Runs"):
-            with gr.Row():
-                refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
             runs_html = gr.HTML(value="<p>Click Refresh to load.</p>")
-            run_dropdown = gr.Dropdown(label="Select Run", choices=[], interactive=True, type="value")
             with gr.Row():
                 report_btn = gr.Button("📊 HTML Report", variant="primary")
@@ -508,9 +496,9 @@ with gr.Blocks(title="Link Audit Tool", theme=gr.themes.Soft()) as app:
             resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
             resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop")
-            # Refresh: update HTML and dropdown separately
-            refresh_btn.click(api_name=False, fn=load_runs_html, outputs=[runs_html])
-            refresh_btn.click(api_name=False, fn=load_runs_choices, outputs=[run_dropdown])
             report_btn.click(api_name=False, fn=generate_report_for_run, inputs=[run_dropdown, domain_input], outputs=[report_file, action_status])
             csv_btn.click(api_name=False, fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])

 """
 Link Audit Tool — Gradio UI for Hugging Face Spaces
 """
 import gradio as gr
     save_batch_results, update_run_status, delete_run,
 )
 SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
 SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
 sb = None
         sb.table("audit_runs").select("id").limit(1).execute()
         print("✅ Supabase connected")
     except Exception as e:
+        print(f"❌ Supabase failed: {e}")
         sb = None
 class AuditState:
     def __init__(self):
         self.lock = threading.Lock()
         self.run_id = None
     def request_pause(self):
+        with self.lock: self.paused = True
     def resume(self):
+        with self.lock: self.paused = False
     def is_paused(self):
+        with self.lock: return self.paused
     def set_running(self, val, run_id=None):
         with self.lock:
             self.running = val
+            if run_id: self.run_id = run_id
     def is_running(self):
+        with self.lock: return self.running
 audit_state = AuditState()
+# ─── Global runs cache for dropdown ───
+_runs_cache = []
+def _refresh_cache():
+    global _runs_cache
+    if sb is None:
+        _runs_cache = []
+        return
+    _runs_cache = get_all_runs(sb) or []
+def _get_run_id_by_label(label):
+    """Look up run ID from dropdown label."""
+    for r in _runs_cache:
+        st = r.get('status', '?')
+        expected = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
+        if label == expected:
+            return r['id']
+    # Maybe it's a raw UUID
+    if label and len(label) > 30:
+        return label
+    return None
+# ═══════════════════════════════════════════════════
 def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
     if sb is None:
         yield "❌ Supabase not connected.", ""
             for col in df.columns:
                 sample = str(df[col].iloc[0]).strip().lower()
                 if sample.startswith('http') or domain in sample:
+                    url_col = col; break
+            if not url_col: url_col = df.columns[0]
             urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
         except Exception as e:
+            yield f"❌ File error: {e}", ""; return
     elif pasted_urls and pasted_urls.strip():
         urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]
     if not urls:
+        yield "⚠ No valid URLs.", ""; return
     seen = set()
     unique = []
     for u in urls:
+        if u not in seen: seen.add(u); unique.append(u)
     urls = unique
     run_name = f"{domain} Audit — {datetime.now().strftime('%b %d %H:%M')} — {len(urls)} pages"
                 update_run_status(sb, run_id, "paused", c)
                 log.append(f"⏸️ PAUSED at {c}/{total}")
                 audit_state.set_running(False)
+                yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"; return
             be = min(bs + batch_size, total)
             batch_urls = urls[bs:be]
             for j, url in enumerate(batch_urls):
                 if audit_state.is_paused():
+                    if batch_results: save_batch_results(sb, run_id, batch_results)
                     c = get_completed_count(sb, run_id)
                     update_run_status(sb, run_id, "paused", c)
                     log.append(f"⏸️ PAUSED at {c}/{total}")
                     audit_state.set_running(False)
+                    yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"; return
                 gi = bs + j + 1
                 elapsed = time.time() - start_time
                     log.append(f"[{gi}/{total}] {short} — Int:{result['int_count']} Ext:{result['ext_count']} {fs}")
                 yield "\n".join(log[-40:]), f"📊 {gi}/{total} ({gi*100//total}%) Batch{batch_num} ETA:{eta_s}"
+                if j < len(batch_urls) - 1: time.sleep(delay)
             if batch_results:
                 try:
         targets, pg_urls = set(), set()
         for r in all_results:
             pg_urls.add(r['url'].rstrip('/').split('?')[0])
+            for lk in r.get('internal_links', []): targets.add(lk['url'].rstrip('/').split('?')[0])
         orphans = sorted([p for p in pg_urls if p not in targets])
         summary = {
+            'total_pages': len(all_results), 'total_int': sum(r.get('int_count',0) for r in all_results),
+            'total_ext': sum(r.get('ext_count',0) for r in all_results),
+            'total_broken': sum(r.get('broken_int_count',0)+r.get('broken_ext_count',0) for r in all_results),
+            'total_redirects': sum(r.get('redirect_int_count',0)+r.get('redirect_ext_count',0) for r in all_results),
+            'total_flags': sum(r.get('follow_flag_count',0) for r in all_results),
+            'total_dups': sum(r.get('duplicate_count',0) for r in all_results),
+            'total_sug': sum(len(r.get('suggestions',[])) for r in all_results),
+            'orphan_count': len(orphans), 'orphan_urls': orphans[:100],
         }
         update_run_status(sb, run_id, "completed", len(all_results), summary)
         tt = time.time() - start_time
     except Exception as e:
         log.append(f"❌ {str(e)}")
         audit_state.set_running(False)
+        try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
         except: pass
         yield "\n".join(log[-40:]), "❌ Error — progress saved"
     return "No audit running."
+# ═══════════════════════════════════════════════════
+def resume_audit(run_label, domain, batch_size, timeout, delay, workers):
     if sb is None:
+        yield "❌ Supabase not connected.", ""; return
+    if not run_label:
+        yield "⚠ Select a run first (click Refresh, then pick from dropdown).", ""; return
+    run_id = _get_run_id_by_label(run_label)
     if not run_id:
+        yield f"❌ Could not find run for: {run_label}", ""; return
     all_urls = get_pending_urls(sb, run_id)
     done = get_completed_urls(sb, run_id)
     if not remaining:
         update_run_status(sb, run_id, "completed", len(done))
+        yield "✅ Already complete!", ""; return
     try:
+        rd = next((r for r in _runs_cache if r['id'] == run_id), None)
         if rd: domain = rd.get('domain', domain)
     except: pass
                 update_run_status(sb, run_id, "paused", c)
                 log.append(f"⏸️ PAUSED {c}/{total}")
                 audit_state.set_running(False)
+                yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return
             be = min(bs + batch_size, len(remaining))
             bu = remaining[bs:be]
+            bn += 1; br = []
             for j, url in enumerate(bu):
                 if audit_state.is_paused():
                     update_run_status(sb, run_id, "paused", c)
                     log.append(f"⏸️ PAUSED {c}/{total}")
                     audit_state.set_running(False)
+                    yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return
                 gi = len(done) + bs + j + 1
                 elapsed = time.time() - start_time
     except Exception as e:
         log.append(f"❌ {str(e)}")
         audit_state.set_running(False)
+        try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
         except: pass
         yield "\n".join(log[-40:]), "❌ Error"
 # ═══════════════════════════════════════════════════
+#  PAST RUNS
 # ═══════════════════════════════════════════════════
 def load_runs_html():
+    _refresh_cache()
+    if not _runs_cache:
+        return "<p>No saved runs.</p>"
     html = '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
     html += '<tr style="background:#f1f5f9;"><th style="padding:8px;text-align:left;">Run</th><th style="padding:8px;text-align:center;">Status</th><th style="padding:8px;text-align:center;">Pages</th><th style="padding:8px;text-align:center;">Broken</th><th style="padding:8px;text-align:center;">Flags</th><th style="padding:8px;text-align:center;">Dups</th><th style="padding:8px;text-align:center;">Orphans</th></tr>'
+    for r in _runs_cache:
         s = r.get('summary', {}) or {}
         st = r.get('status', '?')
         sc = {'completed':'#059669','paused':'#d97706','running':'#2563eb'}.get(st,'#888')
+        bg = {'completed':'rgba(5,150,105,0.1)','paused':'rgba(217,119,6,0.1)','running':'rgba(37,99,235,0.1)'}.get(st,'rgba(136,136,136,0.1)')
         cr = r.get('created_at','')[:16].replace('T',' ')
+        html += f'<tr style="border-bottom:1px solid #e2e8f0;"><td style="padding:8px;"><b>{r.get("name","?")}</b><br><span style="font-size:10px;color:#94a3b8;">{cr}</span></td><td style="padding:8px;text-align:center;"><span style="background:{bg};color:{sc};padding:2px 8px;border-radius:10px;font-size:10px;font-weight:700;">{st.upper()}</span></td><td style="padding:8px;text-align:center;font-weight:700;">{r.get("completed_urls",0)}/{r.get("total_urls",0)}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_broken","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_flags","—")}</td><td style="padding:8px;text-align:center;color:#db2777;font-weight:700;">{s.get("total_dups","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("orphan_count","—")}</td></tr>'
     html += '</table>'
     return html
 def load_runs_choices():
+    """Return plain list of label strings for dropdown. Uses cache from load_runs_html."""
     choices = []
+    for r in _runs_cache:
         st = r.get('status', '?')
         label = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
+        choices.append(label)
     return choices
+def generate_report_for_run(run_label, domain):
+    if sb is None or not run_label:
         return None, "❌ No run selected."
+    run_id = _get_run_id_by_label(run_label)
+    if not run_id:
+        return None, "❌ Run not found."
     try:
+        run = next((r for r in _runs_cache if r['id'] == run_id), None)
         pages = get_all_page_results(sb, run_id)
+        if not pages: return None, "⚠ No data."
         results = [p['result'] for p in pages]
         s = (run.get('summary', {}) or {}) if run else {}
         rh = generate_report(results, s.get('orphan_urls', []), run.get('domain', domain) if run else domain)
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Audit_')
+        tmp.write(rh.encode('utf-8')); tmp.close()
         return tmp.name, f"✅ Report — {len(results)} pages"
     except Exception as e:
         return None, f"❌ {str(e)}"
+def generate_csv_for_run(run_label):
+    if sb is None or not run_label:
         return None, "❌ No run selected."
+    run_id = _get_run_id_by_label(run_label)
+    if not run_id: return None, "❌ Run not found."
     try:
         pages = get_all_page_results(sb, run_id)
+        if not pages: return None, "⚠ No data."
         rows = [{'URL': p['result'].get('url',''), 'Internal': p['result'].get('int_count',0),
                  'External': p['result'].get('ext_count',0),
                  'Broken': p['result'].get('broken_int_count',0)+p['result'].get('broken_ext_count',0),
                  'Flags': p['result'].get('follow_flag_count',0),
                  'Dups': p['result'].get('duplicate_count',0)} for p in pages]
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', prefix='Audit_')
+        pd.DataFrame(rows).to_csv(tmp.name, index=False); tmp.close()
         return tmp.name, f"✅ CSV — {len(rows)} rows"
     except Exception as e:
         return None, f"❌ {str(e)}"
+def delete_selected_run(run_label):
+    if sb is None or not run_label:
         return "❌ No run selected."
+    run_id = _get_run_id_by_label(run_label)
+    if not run_id: return "❌ Run not found."
     try:
         delete_run(sb, run_id)
         return "🗑️ Deleted. Click Refresh."
             pause_btn.click(api_name=False, fn=pause_audit, outputs=[progress_text])
         with gr.Tab("📁 Past Runs"):
+            refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
             runs_html = gr.HTML(value="<p>Click Refresh to load.</p>")
+            # Dropdown uses plain string labels (no tuples, no UUIDs as values)
+            # We look up the UUID from the label when needed
+            run_dropdown = gr.Dropdown(label="Select Run", choices=[], interactive=True, allow_custom_value=True)
             with gr.Row():
                 report_btn = gr.Button("📊 HTML Report", variant="primary")
             resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
             resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop")
+            # Refresh: load HTML first (which refreshes cache), then update dropdown choices
+            refresh_btn.click(api_name=False, fn=load_runs_html, outputs=[runs_html]).then(
+                api_name=False, fn=load_runs_choices, outputs=[run_dropdown])
             report_btn.click(api_name=False, fn=generate_report_for_run, inputs=[run_dropdown, domain_input], outputs=[report_file, action_status])
             csv_btn.click(api_name=False, fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])