"""HF Security & Performance Scanner v4.0 — modular.

Entrypoint: launches the Gradio UI. All scanning logic lives in core/scanner.py,
all rules in rules/, all reporting in report/.
"""
import json
import os
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path

import gradio as gr
import pandas as pd

# Make sibling packages importable when running from any directory
sys.path.insert(0, str(Path(__file__).parent))

from core import (
    bootstrap_binaries,
    comment_on_space,
    have_binary,
    list_user_spaces,
)
from core.scanner import scan_repo
from report import generate_html_report, generate_sarif

# Bootstrap binaries on import (idempotent)
BOOTSTRAP_RESULT = bootstrap_binaries()


# =========================================================
# UI orchestration (multi-target, progress, report file)
# =========================================================
def run_scan(target, hf_token, comment, crawl_user, deep_history, max_targets,
             run_security, run_performance, run_llm, run_supply_chain,
             progress=gr.Progress()):  # noqa: B008
    if not target.strip():
        return pd.DataFrame(), "❌ No target provided", "", None, None

    target = target.strip().rstrip("/")
    log_lines = []
    log_lines.append(
        f"[bootstrap] gitleaks: {BOOTSTRAP_RESULT.get('gitleaks')}, "
        f"hadolint: {BOOTSTRAP_RESULT.get('hadolint')}"
    )

    tools_status = {
        "semgrep": have_binary("semgrep"),
        "bandit": have_binary("bandit"),
        "detect-secrets": have_binary("detect-secrets"),
        "pip-audit": have_binary("pip-audit"),
        "ruff": have_binary("ruff"),
        "gitleaks": have_binary("gitleaks"),
        "hadolint": have_binary("hadolint"),
        "agent-audit": have_binary("agent-audit"),
    }
    log_lines.append(
        "[tools] " + ", ".join(f"{k}={'✓' if v else '✗'}"
                               for k, v in tools_status.items())
    )

    # Resolve targets. Bare username (no slash, no http) → treat as HF crawl.
    looks_like_username = (
        not crawl_user
        and not target.startswith("http")
        and not os.path.isdir(target)
        and "/" not in target
        and " " not in target
    )
    if looks_like_username:
        log_lines.append(f"[auto] '{target}' looks like an HF username; crawling its spaces")
        crawl_user = True

    if crawl_user:
        progress(0, desc=f"Listing spaces for {target}...")
        targets, msg = list_user_spaces(target, hf_token=hf_token)
        log_lines.append(f"[list] {msg}")
        if not targets:
            return pd.DataFrame(), "❌ " + msg, "\n".join(log_lines), None, None
        targets = targets[: int(max_targets)]
    else:
        targets = [target]

    # Scan each target
    all_findings = []
    n = len(targets)
    for i, t in enumerate(targets):
        progress(i / n, desc=f"Scanning {i+1}/{n}: {t}")

        def _make_cb(idx, total, label):
            def _cb(local_frac, desc=""):
                progress((idx + local_frac) / total,
                         desc=f"[{idx+1}/{total}] {label}: {desc}")
            return _cb

        try:
            findings, scan_log = scan_repo(
                t, hf_token=hf_token, deep_history=deep_history,
                run_security=run_security, run_performance=run_performance,
                run_llm=run_llm, run_supply_chain=run_supply_chain,
                progress_cb=_make_cb(i, n, t),
            )
            log_lines.extend(f"[scan {i+1}/{n}] {t} -> {line}" for line in scan_log)
            all_findings.extend(findings)

            if comment and findings and t.startswith("https://huggingface.co/spaces/"):
                cmt = comment_on_space(t, hf_token, findings)
                log_lines.append(f"[comment] {t} -> {cmt}")
        except Exception as e:
            log_lines.append(f"[scan {i+1}/{n}] {t} -> ERROR: {e}")

    progress(1.0, desc="Done")

    # Findings DataFrame
    if all_findings:
        df = pd.DataFrame(all_findings)
        df["owasp"] = df["owasp"].apply(
            lambda x: ", ".join(x) if isinstance(x, list) else x
        )
        df = df[["category", "severity", "confidence", "tool", "rule",
                 "file", "line", "message", "owasp", "remediation"]]
    else:
        df = pd.DataFrame(columns=[
            "category", "severity", "confidence", "tool", "rule",
            "file", "line", "message", "owasp", "remediation",
        ])

    # Summary
    by_sev, by_tool, by_conf, by_cat = {}, {}, {}, {}
    for f in all_findings:
        by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
        by_tool[f["tool"]] = by_tool.get(f["tool"], 0) + 1
        by_conf[f["confidence"]] = by_conf.get(f["confidence"], 0) + 1
        by_cat[f["category"]] = by_cat.get(f["category"], 0) + 1

    summary = (
        f"**Targets:** {n}  \n"
        f"**Total findings:** {len(all_findings)}  \n"
        f"**By category:** " + ", ".join(f"`{k}`={v}" for k, v in sorted(by_cat.items())) + "  \n"
        f"**By severity:** " + ", ".join(f"`{k}`={v}" for k, v in sorted(by_sev.items())) + "  \n"
        f"**By confidence:** " + ", ".join(f"`{k}`={v}" for k, v in sorted(by_conf.items())) + "  \n"
        f"**By tool:** " + ", ".join(f"`{k}`={v}" for k, v in sorted(by_tool.items())) + "  \n"
        f"**Timestamp:** {datetime.now(timezone.utc).isoformat()}Z"
    )

    # HTML + SARIF reports
    _now = datetime.now(timezone.utc)
    scan_meta = {
        "target": target,
        "n_targets": n,
        "timestamp": _now.isoformat() + "Z",
        "footer": "HF Security Scanner v4.0",
    }
    report_html = generate_html_report(all_findings, scan_meta)
    _ts = _now.strftime('%Y%m%d_%H%M%S')
    report_path = os.path.join(tempfile.gettempdir(), f"scan_report_{_ts}.html")
    with open(report_path, "w", encoding="utf-8") as fh:
        fh.write(report_html)

    sarif_doc = generate_sarif(all_findings, scan_meta)
    sarif_path = os.path.join(tempfile.gettempdir(), f"scan_report_{_ts}.sarif")
    with open(sarif_path, "w", encoding="utf-8") as fh:
        json.dump(sarif_doc, fh, indent=2)
    log_lines.append(f"[report] HTML → {report_path}")
    log_lines.append(f"[report] SARIF → {sarif_path}")

    return df, summary, "\n".join(log_lines), report_html, report_path


# =========================================================
# Gradio UI
# =========================================================
with gr.Blocks(title="HF Security & Performance Scanner v4.0") as demo:
    gr.Markdown(
        "# 🔐 Hugging Face & Git Security + Performance + LLM Scanner v4.0\n"
        f"**Bootstrap status:** gitleaks={BOOTSTRAP_RESULT.get('gitleaks')}, "
        f"hadolint={BOOTSTRAP_RESULT.get('hadolint')}"
    )

    with gr.Row():
        with gr.Column(scale=2):
            target = gr.Textbox(
                label="Target",
                placeholder=("https://github.com/org/repo  |  "
                             "https://huggingface.co/spaces/user/space  |  hf-username"),
            )
            hf_token = gr.Textbox(
                label="HF Token (gated repos / commenting / private listing)",
                type="password", placeholder="hf_...",
            )
        with gr.Column(scale=1):
            run_security = gr.Checkbox(label="Security scan", value=True)
            run_performance = gr.Checkbox(label="Performance scan", value=True)
            run_llm = gr.Checkbox(
                label="LLM/Agent scan (Semgrep:LLM + Agent Audit)",
                value=True,
            )
            run_supply_chain = gr.Checkbox(
                label="Supply-chain scan (Semgrep:SupplyChain + socket)",
                value=True,
            )
            crawl_user = gr.Checkbox(label="Scan all HF Spaces of this user", value=False)
            comment = gr.Checkbox(label="Post discussion to HF Space", value=False)
            deep_history = gr.Checkbox(label="Deep scan (full git history)", value=False)
            max_targets = gr.Slider(1, 100, value=20, step=1,
                                    label="Max targets when crawling")

    btn = gr.Button("🚀 Scan", variant="primary", size="lg")
    summary = gr.Markdown()

    with gr.Tabs():
        with gr.TabItem("📊 Findings"):
            findings_df = gr.Dataframe(label="Findings", wrap=True, interactive=False)
        with gr.TabItem("📄 Report"):
            report_file = gr.File(label="Download HTML report")
            report_html = gr.HTML(label="Preview")
        with gr.TabItem("📋 Run log"):
            log = gr.Textbox(label="Log", lines=20, max_lines=60)

    btn.click(
        run_scan,
        inputs=[target, hf_token, comment, crawl_user, deep_history, max_targets,
                run_security, run_performance, run_llm, run_supply_chain],
        outputs=[findings_df, summary, log, report_html, report_file],
    )

if __name__ == "__main__":
    # Explicitly bind to 127.0.0.1:7860 so Playwright tests target the expected port
    demo.launch(server_name="127.0.0.1", server_port=7860, share=False)