"""
Link Audit Tool — Gradio UI for Hugging Face Spaces
"""

import gradio as gr
import pandas as pd
import time
import json
import os
import threading
import tempfile
from datetime import datetime
from audit_engine import audit_page, DEFAULT_BODY_SELECTORS, DEFAULT_SUGGESTION_MAP
from report_generator import generate_report
from db import (
    get_client, create_run, get_all_runs, get_all_page_results,
    get_completed_urls, get_pending_urls, get_completed_count,
    save_batch_results, update_run_status, delete_run,
)

SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
sb = None
if SUPABASE_URL and SUPABASE_KEY:
    try:
        sb = get_client(SUPABASE_URL, SUPABASE_KEY)
        sb.table("audit_runs").select("id").limit(1).execute()
        print("✅ Supabase connected")
    except Exception as e:
        print(f"❌ Supabase failed: {e}")
        sb = None


class AuditState:
    def __init__(self):
        self.lock = threading.Lock()
        self.paused = False
        self.running = False
        self.run_id = None

    def request_pause(self):
        with self.lock: self.paused = True

    def resume(self):
        with self.lock: self.paused = False

    def is_paused(self):
        with self.lock: return self.paused

    def set_running(self, val, run_id=None):
        with self.lock:
            self.running = val
            if run_id: self.run_id = run_id

    def is_running(self):
        with self.lock: return self.running

audit_state = AuditState()

# ─── Global runs cache for dropdown ───
_runs_cache = []

def _refresh_cache():
    global _runs_cache
    if sb is None:
        _runs_cache = []
        return
    _runs_cache = get_all_runs(sb) or []

def _get_run_id_by_label(label):
    """Look up run ID from dropdown label."""
    for r in _runs_cache:
        st = r.get('status', '?')
        expected = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
        if label == expected:
            return r['id']
    # Maybe it's a raw UUID
    if label and len(label) > 30:
        return label
    return None


# ═══════════════════════════════════════════════════
def run_audit(file, pasted_urls, domain, batch_size, timeout, delay, workers):
    if sb is None:
        yield "❌ Supabase not connected.", ""
        return

    urls = []
    if file is not None:
        try:
            fpath = file.name if hasattr(file, 'name') else file
            df = pd.read_csv(fpath) if str(fpath).endswith('.csv') else pd.read_excel(fpath)
            url_col = None
            for col in df.columns:
                sample = str(df[col].iloc[0]).strip().lower()
                if sample.startswith('http') or domain in sample:
                    url_col = col; break
            if not url_col: url_col = df.columns[0]
            urls = [u for u in df[url_col].dropna().astype(str).str.strip().tolist() if u.startswith('http')]
        except Exception as e:
            yield f"❌ File error: {e}", ""; return
    elif pasted_urls and pasted_urls.strip():
        urls = [u.strip() for u in pasted_urls.strip().split('\n') if u.strip().startswith('http')]

    if not urls:
        yield "⚠ No valid URLs.", ""; return

    seen = set()
    unique = []
    for u in urls:
        if u not in seen: seen.add(u); unique.append(u)
    urls = unique

    run_name = f"{domain} Audit — {datetime.now().strftime('%b %d %H:%M')} — {len(urls)} pages"
    run_id = create_run(sb, run_name, domain, len(urls), urls)
    audit_state.set_running(True, run_id)
    audit_state.resume()

    total = len(urls)
    batch_size, timeout, workers = int(batch_size), int(timeout), int(workers)
    start_time = time.time()
    batch_num = 0
    log = []

    yield f"🚀 {run_name}\n📦 {total} URLs · Batch: {batch_size}", "▶️ Running..."

    try:
        for bs in range(0, total, batch_size):
            if audit_state.is_paused():
                c = get_completed_count(sb, run_id)
                update_run_status(sb, run_id, "paused", c)
                log.append(f"⏸️ PAUSED at {c}/{total}")
                audit_state.set_running(False)
                yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"; return

            be = min(bs + batch_size, total)
            batch_urls = urls[bs:be]
            batch_num += 1
            batch_results = []

            for j, url in enumerate(batch_urls):
                if audit_state.is_paused():
                    if batch_results: save_batch_results(sb, run_id, batch_results)
                    c = get_completed_count(sb, run_id)
                    update_run_status(sb, run_id, "paused", c)
                    log.append(f"⏸️ PAUSED at {c}/{total}")
                    audit_state.set_running(False)
                    yield "\n".join(log[-40:]), f"⏸️ Paused — {c}/{total}"; return

                gi = bs + j + 1
                elapsed = time.time() - start_time
                eta = (elapsed / gi) * (total - gi)
                eta_s = f"{int(eta//60)}m{int(eta%60)}s" if eta > 60 else f"{eta:.0f}s"

                result = audit_page(url, domain, DEFAULT_BODY_SELECTORS,
                    suggestion_map=DEFAULT_SUGGESTION_MAP, timeout=timeout, concurrent_workers=workers)
                batch_results.append(result)

                short = url.replace('https://www.', '').replace('https://', '')[:70]
                if result['error']:
                    log.append(f"❌ [{gi}/{total}] {short} — {result['error'][:50]}")
                else:
                    b = result['broken_int_count'] + result['broken_ext_count']
                    fc = result['follow_flag_count']
                    d = result['duplicate_count']
                    fl = []
                    if b: fl.append(f"🔴{b}broken")
                    if fc: fl.append(f"🟡{fc}flags")
                    if d: fl.append(f"🟣{d}dups")
                    fs = " ".join(fl) if fl else "✅"
                    log.append(f"[{gi}/{total}] {short} — Int:{result['int_count']} Ext:{result['ext_count']} {fs}")

                yield "\n".join(log[-40:]), f"📊 {gi}/{total} ({gi*100//total}%) Batch{batch_num} ETA:{eta_s}"
                if j < len(batch_urls) - 1: time.sleep(delay)

            if batch_results:
                try:
                    save_batch_results(sb, run_id, batch_results)
                    c = get_completed_count(sb, run_id)
                    update_run_status(sb, run_id, "running", c)
                    log.append(f"💾 Batch {batch_num} saved — {c}/{total}")
                except Exception as e:
                    log.append(f"⚠ Save error: {str(e)[:60]}")
                yield "\n".join(log[-40:]), f"💾 Batch {batch_num} saved"
                del batch_results

        log.append("🔍 Orphan analysis...")
        yield "\n".join(log[-40:]), "🔍 Orphan analysis..."

        all_pages = get_all_page_results(sb, run_id)
        all_results = [p['result'] for p in all_pages]
        targets, pg_urls = set(), set()
        for r in all_results:
            pg_urls.add(r['url'].rstrip('/').split('?')[0])
            for lk in r.get('internal_links', []): targets.add(lk['url'].rstrip('/').split('?')[0])
        orphans = sorted([p for p in pg_urls if p not in targets])

        summary = {
            'total_pages': len(all_results), 'total_int': sum(r.get('int_count',0) for r in all_results),
            'total_ext': sum(r.get('ext_count',0) for r in all_results),
            'total_broken': sum(r.get('broken_int_count',0)+r.get('broken_ext_count',0) for r in all_results),
            'total_redirects': sum(r.get('redirect_int_count',0)+r.get('redirect_ext_count',0) for r in all_results),
            'total_flags': sum(r.get('follow_flag_count',0) for r in all_results),
            'total_dups': sum(r.get('duplicate_count',0) for r in all_results),
            'total_sug': sum(len(r.get('suggestions',[])) for r in all_results),
            'orphan_count': len(orphans), 'orphan_urls': orphans[:100],
        }
        update_run_status(sb, run_id, "completed", len(all_results), summary)
        tt = time.time() - start_time
        log.append(f"✅ DONE! {len(all_results)} pages in {tt:.0f}s · {len(orphans)} orphans")
        log.append(f"Broken:{summary['total_broken']} Redirects:{summary['total_redirects']} Flags:{summary['total_flags']} Dups:{summary['total_dups']}")
        log.append("→ Past Runs → Refresh → Generate Report")
        audit_state.set_running(False)
        yield "\n".join(log[-40:]), f"✅ Done — {len(all_results)} pages in {tt:.0f}s"

    except Exception as e:
        log.append(f"❌ {str(e)}")
        audit_state.set_running(False)
        try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
        except: pass
        yield "\n".join(log[-40:]), "❌ Error — progress saved"


def pause_audit():
    if audit_state.is_running():
        audit_state.request_pause()
        return "⏸️ Stopping after current page..."
    return "No audit running."


# ═══════════════════════════════════════════════════
def resume_audit(run_label, domain, batch_size, timeout, delay, workers):
    if sb is None:
        yield "❌ Supabase not connected.", ""; return
    if not run_label:
        yield "⚠ Select a run first (click Refresh, then pick from dropdown).", ""; return

    run_id = _get_run_id_by_label(run_label)
    if not run_id:
        yield f"❌ Could not find run for: {run_label}", ""; return

    all_urls = get_pending_urls(sb, run_id)
    done = get_completed_urls(sb, run_id)
    remaining = [u for u in all_urls if u not in done]

    if not remaining:
        update_run_status(sb, run_id, "completed", len(done))
        yield "✅ Already complete!", ""; return

    try:
        rd = next((r for r in _runs_cache if r['id'] == run_id), None)
        if rd: domain = rd.get('domain', domain)
    except: pass

    audit_state.set_running(True, run_id)
    audit_state.resume()
    update_run_status(sb, run_id, "running")

    total = len(all_urls)
    batch_size, timeout, workers = int(batch_size), int(timeout), int(workers)
    start_time = time.time()
    bn = 0
    log = [f"▶️ Resuming — {len(remaining)} left ({len(done)} done)"]
    yield "\n".join(log), f"Resuming: {len(done)}/{total}"

    try:
        for bs in range(0, len(remaining), batch_size):
            if audit_state.is_paused():
                c = get_completed_count(sb, run_id)
                update_run_status(sb, run_id, "paused", c)
                log.append(f"⏸️ PAUSED {c}/{total}")
                audit_state.set_running(False)
                yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return

            be = min(bs + batch_size, len(remaining))
            bu = remaining[bs:be]
            bn += 1; br = []

            for j, url in enumerate(bu):
                if audit_state.is_paused():
                    if br: save_batch_results(sb, run_id, br)
                    c = get_completed_count(sb, run_id)
                    update_run_status(sb, run_id, "paused", c)
                    log.append(f"⏸️ PAUSED {c}/{total}")
                    audit_state.set_running(False)
                    yield "\n".join(log[-40:]), f"⏸️ Paused {c}/{total}"; return

                gi = len(done) + bs + j + 1
                elapsed = time.time() - start_time
                proc = bs + j + 1
                eta = (elapsed / proc) * (len(remaining) - proc)
                eta_s = f"{int(eta//60)}m{int(eta%60)}s" if eta > 60 else f"{eta:.0f}s"

                result = audit_page(url, domain, DEFAULT_BODY_SELECTORS,
                    suggestion_map=DEFAULT_SUGGESTION_MAP, timeout=timeout, concurrent_workers=workers)
                br.append(result)

                short = url.replace('https://www.', '').replace('https://', '')[:70]
                if result['error']:
                    log.append(f"❌ [{gi}/{total}] {short}")
                else:
                    b = result['broken_int_count'] + result['broken_ext_count']
                    log.append(f"[{gi}/{total}] {short} {'🔴'+str(b) if b else '✅'}")

                yield "\n".join(log[-40:]), f"📊 {gi}/{total} ({gi*100//total}%) ETA:{eta_s}"
                if j < len(bu) - 1: time.sleep(delay)

            if br:
                save_batch_results(sb, run_id, br)
                c = get_completed_count(sb, run_id)
                update_run_status(sb, run_id, "running", c)
                log.append(f"💾 Batch {bn} — {c}/{total}")
                del br

        log.append("🔍 Orphan analysis...")
        yield "\n".join(log[-40:]), "🔍 Orphans..."

        ap = get_all_page_results(sb, run_id)
        ar = [p['result'] for p in ap]
        tgt, pg = set(), set()
        for r in ar:
            pg.add(r['url'].rstrip('/').split('?')[0])
            for lk in r.get('internal_links', []): tgt.add(lk['url'].rstrip('/').split('?')[0])
        orph = sorted([p for p in pg if p not in tgt])

        fs = {
            'total_pages': len(ar), 'total_int': sum(r.get('int_count',0) for r in ar),
            'total_ext': sum(r.get('ext_count',0) for r in ar),
            'total_broken': sum(r.get('broken_int_count',0)+r.get('broken_ext_count',0) for r in ar),
            'total_redirects': sum(r.get('redirect_int_count',0)+r.get('redirect_ext_count',0) for r in ar),
            'total_flags': sum(r.get('follow_flag_count',0) for r in ar),
            'total_dups': sum(r.get('duplicate_count',0) for r in ar),
            'total_sug': sum(len(r.get('suggestions',[])) for r in ar),
            'orphan_count': len(orph), 'orphan_urls': orph[:100],
        }
        update_run_status(sb, run_id, "completed", len(ar), fs)
        tt = time.time() - start_time
        log.append(f"✅ DONE! {len(ar)} pages in {tt:.0f}s · {len(orph)} orphans")
        audit_state.set_running(False)
        yield "\n".join(log[-40:]), f"✅ Done — {len(ar)} pages"

    except Exception as e:
        log.append(f"❌ {str(e)}")
        audit_state.set_running(False)
        try: c = get_completed_count(sb, run_id); update_run_status(sb, run_id, "paused", c)
        except: pass
        yield "\n".join(log[-40:]), "❌ Error"


# ═══════════════════════════════════════════════════
#  PAST RUNS
# ═══════════════════════════════════════════════════

def load_runs_html():
    _refresh_cache()
    if not _runs_cache:
        return "<p>No saved runs.</p>"
    html = '<table style="width:100%;border-collapse:collapse;font-size:13px;">'
    html += '<tr style="background:#f1f5f9;"><th style="padding:8px;text-align:left;">Run</th><th style="padding:8px;text-align:center;">Status</th><th style="padding:8px;text-align:center;">Pages</th><th style="padding:8px;text-align:center;">Broken</th><th style="padding:8px;text-align:center;">Flags</th><th style="padding:8px;text-align:center;">Dups</th><th style="padding:8px;text-align:center;">Orphans</th></tr>'
    for r in _runs_cache:
        s = r.get('summary', {}) or {}
        st = r.get('status', '?')
        sc = {'completed':'#059669','paused':'#d97706','running':'#2563eb'}.get(st,'#888')
        bg = {'completed':'rgba(5,150,105,0.1)','paused':'rgba(217,119,6,0.1)','running':'rgba(37,99,235,0.1)'}.get(st,'rgba(136,136,136,0.1)')
        cr = r.get('created_at','')[:16].replace('T',' ')
        html += f'<tr style="border-bottom:1px solid #e2e8f0;"><td style="padding:8px;"><b>{r.get("name","?")}</b><br><span style="font-size:10px;color:#94a3b8;">{cr}</span></td><td style="padding:8px;text-align:center;"><span style="background:{bg};color:{sc};padding:2px 8px;border-radius:10px;font-size:10px;font-weight:700;">{st.upper()}</span></td><td style="padding:8px;text-align:center;font-weight:700;">{r.get("completed_urls",0)}/{r.get("total_urls",0)}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_broken","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("total_flags","—")}</td><td style="padding:8px;text-align:center;color:#db2777;font-weight:700;">{s.get("total_dups","—")}</td><td style="padding:8px;text-align:center;color:#dc2626;font-weight:700;">{s.get("orphan_count","—")}</td></tr>'
    html += '</table>'
    return html


def load_runs_choices():
    """Return plain list of label strings for dropdown. Uses cache from load_runs_html."""
    choices = []
    for r in _runs_cache:
        st = r.get('status', '?')
        label = f"{r.get('name','?')} [{st.upper()}] ({r.get('completed_urls',0)}/{r.get('total_urls',0)})"
        choices.append(label)
    return choices


def generate_report_for_run(run_label, domain):
    if sb is None or not run_label:
        return None, "❌ No run selected."
    run_id = _get_run_id_by_label(run_label)
    if not run_id:
        return None, "❌ Run not found."
    try:
        run = next((r for r in _runs_cache if r['id'] == run_id), None)
        pages = get_all_page_results(sb, run_id)
        if not pages: return None, "⚠ No data."
        results = [p['result'] for p in pages]
        s = (run.get('summary', {}) or {}) if run else {}
        rh = generate_report(results, s.get('orphan_urls', []), run.get('domain', domain) if run else domain)
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.html', prefix='Audit_')
        tmp.write(rh.encode('utf-8')); tmp.close()
        return tmp.name, f"✅ Report — {len(results)} pages"
    except Exception as e:
        return None, f"❌ {str(e)}"


def generate_csv_for_run(run_label):
    if sb is None or not run_label:
        return None, "❌ No run selected."
    run_id = _get_run_id_by_label(run_label)
    if not run_id: return None, "❌ Run not found."
    try:
        pages = get_all_page_results(sb, run_id)
        if not pages: return None, "⚠ No data."
        rows = [{'URL': p['result'].get('url',''), 'Internal': p['result'].get('int_count',0),
                 'External': p['result'].get('ext_count',0),
                 'Broken': p['result'].get('broken_int_count',0)+p['result'].get('broken_ext_count',0),
                 'Redirects': p['result'].get('redirect_int_count',0)+p['result'].get('redirect_ext_count',0),
                 'Flags': p['result'].get('follow_flag_count',0),
                 'Dups': p['result'].get('duplicate_count',0)} for p in pages]
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', prefix='Audit_')
        pd.DataFrame(rows).to_csv(tmp.name, index=False); tmp.close()
        return tmp.name, f"✅ CSV — {len(rows)} rows"
    except Exception as e:
        return None, f"❌ {str(e)}"


def delete_selected_run(run_label):
    if sb is None or not run_label:
        return "❌ No run selected."
    run_id = _get_run_id_by_label(run_label)
    if not run_id: return "❌ Run not found."
    try:
        delete_run(sb, run_id)
        return "🗑️ Deleted. Click Refresh."
    except Exception as e:
        return f"❌ {str(e)}"


# ═══════════════════════════════════════════════════
#  UI
# ═══════════════════════════════════════════════════

with gr.Blocks(title="Link Audit Tool", theme=gr.themes.Soft()) as app:

    gr.HTML("""<div style="background:linear-gradient(135deg,#1e3a5f,#2563eb);padding:24px 28px;border-radius:12px;color:white;margin-bottom:16px;">
        <p style="font-size:10px;font-weight:700;letter-spacing:1.5px;text-transform:uppercase;color:#93c5fd;margin-bottom:8px;">SEO LINK AUDIT TOOL</p>
        <h1 style="margin:0 0 4px 0;font-size:24px;">🔗 Bulk Link Audit</h1>
        <p style="margin:0;opacity:0.8;font-size:13px;">Upload URLs → batch crawl → pause/resume → generate report</p></div>""")

    conn = "🗄️ ✅ Supabase Connected" if sb else "🗄️ ❌ Not Connected"
    gr.Markdown(f"**{conn}**")

    with gr.Tabs():

        with gr.Tab("🔍 New Audit"):
            with gr.Row():
                with gr.Column(scale=2):
                    file_input = gr.File(label="Upload Excel / CSV", file_types=[".xlsx", ".csv", ".xls"])
                    pasted_urls = gr.Textbox(label="Or paste URLs (one per line)", lines=5)
                with gr.Column(scale=1):
                    domain_input = gr.Textbox(label="Your Domain", value="edstellar.com")
                    batch_size_input = gr.Slider(5, 50, value=25, step=5, label="Batch Size")
                    timeout_input = gr.Slider(5, 60, value=15, step=5, label="Timeout (s)")
                    delay_input = gr.Slider(0, 5, value=1.0, step=0.5, label="Delay (s)")
                    workers_input = gr.Slider(1, 10, value=5, step=1, label="Parallel checks")

            with gr.Row():
                run_btn = gr.Button("🚀 Run Audit", variant="primary", scale=2)
                pause_btn = gr.Button("⏸️ Pause", variant="stop", scale=1)

            progress_text = gr.Textbox(label="Status", interactive=False)
            log_output = gr.Textbox(label="Audit Log", lines=20, interactive=False)

            run_btn.click(api_name=False, fn=run_audit,
                inputs=[file_input, pasted_urls, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
                outputs=[log_output, progress_text])
            pause_btn.click(api_name=False, fn=pause_audit, outputs=[progress_text])

        with gr.Tab("📁 Past Runs"):
            refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
            runs_html = gr.HTML(value="<p>Click Refresh to load.</p>")

            # Dropdown uses plain string labels (no tuples, no UUIDs as values)
            # We look up the UUID from the label when needed
            run_dropdown = gr.Dropdown(label="Select Run", choices=[], interactive=True, allow_custom_value=True)

            with gr.Row():
                report_btn = gr.Button("📊 HTML Report", variant="primary")
                csv_btn = gr.Button("📋 CSV", variant="secondary")
                resume_btn = gr.Button("▶️ Resume", variant="primary")
                delete_btn = gr.Button("🗑️ Delete", variant="stop")

            action_status = gr.Textbox(label="Status", interactive=False)
            with gr.Row():
                report_file = gr.File(label="Report Download", interactive=False)
                csv_file = gr.File(label="CSV Download", interactive=False)

            gr.Markdown("---\n### Resume Controls")
            resume_progress = gr.Textbox(label="Resume Status", interactive=False)
            resume_log = gr.Textbox(label="Resume Log", lines=15, interactive=False)
            resume_pause_btn = gr.Button("⏸️ Pause Resume", variant="stop")

            # Refresh: load HTML first (which refreshes cache), then update dropdown choices
            refresh_btn.click(api_name=False, fn=load_runs_html, outputs=[runs_html]).then(
                api_name=False, fn=load_runs_choices, outputs=[run_dropdown])

            report_btn.click(api_name=False, fn=generate_report_for_run, inputs=[run_dropdown, domain_input], outputs=[report_file, action_status])
            csv_btn.click(api_name=False, fn=generate_csv_for_run, inputs=[run_dropdown], outputs=[csv_file, action_status])
            delete_btn.click(api_name=False, fn=delete_selected_run, inputs=[run_dropdown], outputs=[action_status])

            resume_btn.click(api_name=False, fn=resume_audit,
                inputs=[run_dropdown, domain_input, batch_size_input, timeout_input, delay_input, workers_input],
                outputs=[resume_log, resume_progress])
            resume_pause_btn.click(api_name=False, fn=pause_audit, outputs=[resume_progress])


if __name__ == "__main__":
    app.queue().launch(server_name="0.0.0.0", server_port=7860)