"""
Polymarket Factor Dashboard — Gradio app for Hugging Face Spaces.

Uses the public, non-trade Polymarket APIs:
  - Gamma API   https://gamma-api.polymarket.com
  - CLOB API    https://clob.polymarket.com
"""

from __future__ import annotations

import json
import math
from datetime import datetime, timezone
from typing import Any

import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import requests
from plotly.subplots import make_subplots

GAMMA = "https://gamma-api.polymarket.com"
CLOB = "https://clob.polymarket.com"

SESSION = requests.Session()
SESSION.headers.update({"User-Agent": "polymarket-dashboard/1.0"})
TIMEOUT = 20

CATEGORIES = {
    "Top 24h Volume":        {"order": "volume24hr",     "ascending": False},
    "Top Total Volume":      {"order": "volume",         "ascending": False},
    "Top Liquidity":         {"order": "liquidity",      "ascending": False},
    "Most Competitive":      {"order": "competitive",    "ascending": False},
    "Breaking Hot (24h/total ratio)": {"order": "volume24hr", "ascending": False, "hot": True},
    "Ending Soonest":        {"order": "end_date",       "ascending": True},
}


# ---------- API helpers ----------

def fetch_events(order: str, ascending: bool, limit: int = 40) -> list[dict]:
    r = SESSION.get(
        f"{GAMMA}/events",
        params={
            "active": "true",
            "closed": "false",
            "order": order,
            "ascending": str(ascending).lower(),
            "limit": limit,
        },
        timeout=TIMEOUT,
    )
    r.raise_for_status()
    return r.json()


def fetch_price_history(token_id: str, interval: str = "1m", fidelity: int = 60) -> pd.DataFrame:
    """Return DataFrame with columns ['t', 'p'] where t is a pandas Timestamp."""
    r = SESSION.get(
        f"{CLOB}/prices-history",
        params={"market": token_id, "interval": interval, "fidelity": fidelity},
        timeout=TIMEOUT,
    )
    r.raise_for_status()
    hist = r.json().get("history", [])
    if not hist:
        return pd.DataFrame(columns=["t", "p"])
    df = pd.DataFrame(hist)
    df["t"] = pd.to_datetime(df["t"], unit="s", utc=True)
    df["p"] = df["p"].astype(float)
    return df.set_index("t").sort_index()


# ---------- Data shaping ----------

def _parse_list(x: Any) -> list:
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        try:
            return json.loads(x)
        except Exception:
            return []
    return []


def flatten_markets(events: list[dict], hot: bool = False) -> pd.DataFrame:
    """One row per *market*, enriched with parent event info."""
    rows = []
    for evt in events:
        evt_title = evt.get("title", "")
        for m in evt.get("markets", []):
            vol = float(m.get("volumeNum") or m.get("volume") or 0)
            vol24 = float(m.get("volume24hr") or 0)
            liq = float(m.get("liquidityNum") or 0)
            token_ids = _parse_list(m.get("clobTokenIds"))
            outcomes = _parse_list(m.get("outcomes"))
            prices = _parse_list(m.get("outcomePrices"))
            if not token_ids or not outcomes:
                continue
            rows.append({
                "event": evt_title,
                "question": m.get("question", ""),
                "slug": m.get("slug", ""),
                "vol_total": vol,
                "vol_24h": vol24,
                "liquidity": liq,
                "hot_ratio": (vol24 / vol) if vol > 0 else 0.0,
                "token_yes": token_ids[0] if len(token_ids) > 0 else None,
                "token_no":  token_ids[1] if len(token_ids) > 1 else None,
                "outcomes": outcomes,
                "prices": [float(p) for p in prices] if prices else [],
                "end_date": m.get("endDate"),
                "condition_id": m.get("conditionId"),
            })
    df = pd.DataFrame(rows)
    if df.empty:
        return df
    if hot:
        df = df[df["vol_total"] > 50_000]  # filter noise
        df = df.sort_values("hot_ratio", ascending=False)
    return df


def market_label(row: pd.Series) -> str:
    q = row["question"]
    if len(q) > 85:
        q = q[:82] + "…"
    return f'{q}  •  24h ${row["vol_24h"]:,.0f}  •  liq ${row["liquidity"]:,.0f}'


# ---------- Factor computations ----------

def hurst_exponent(series: np.ndarray) -> float:
    """R/S Hurst. H<0.5 mean-reverting, 0.5 random walk, >0.5 trending."""
    series = np.asarray(series, dtype=float)
    n = len(series)
    if n < 20:
        return float("nan")
    lags = np.unique(np.logspace(0.7, np.log10(n // 2), 12).astype(int))
    lags = lags[lags >= 2]
    rs = []
    for lag in lags:
        chunks = n // lag
        if chunks < 1:
            continue
        vals = []
        for i in range(chunks):
            seg = series[i * lag:(i + 1) * lag]
            mean = seg.mean()
            dev = seg - mean
            Z = np.cumsum(dev)
            R = Z.max() - Z.min()
            S = seg.std(ddof=0)
            if S > 0:
                vals.append(R / S)
        if vals:
            rs.append((lag, np.mean(vals)))
    if len(rs) < 4:
        return float("nan")
    lags_arr = np.log([r[0] for r in rs])
    rs_arr = np.log([r[1] for r in rs])
    slope, _ = np.polyfit(lags_arr, rs_arr, 1)
    return float(slope)


def compute_factors(df: pd.DataFrame, sibling_series: list[pd.Series] | None = None) -> dict:
    """Momentum, mean-reversion, vol regime, correlation break."""
    out: dict = {}
    if df.empty or len(df) < 20:
        return {"error": "Not enough history to compute factors."}

    p = df["p"].astype(float)
    # log-diff returns; avoid p=0
    p_clip = p.clip(lower=1e-6, upper=1 - 1e-6)
    logit = np.log(p_clip / (1 - p_clip))
    ret = logit.diff().dropna()

    # --- Momentum: 24h and 7d cumulative logit change, normalized
    now = df.index[-1]
    def window_change(hours: int) -> float:
        cutoff = now - pd.Timedelta(hours=hours)
        seg = logit.loc[logit.index >= cutoff]
        if len(seg) < 2:
            return float("nan")
        return float(seg.iloc[-1] - seg.iloc[0])

    mom_24h = window_change(24)
    mom_7d = window_change(24 * 7)
    vol_all = float(ret.std()) if len(ret) > 2 else float("nan")
    mom_z_24h = mom_24h / (vol_all * math.sqrt(24)) if vol_all and not math.isnan(mom_24h) else float("nan")
    mom_z_7d = mom_7d / (vol_all * math.sqrt(24 * 7)) if vol_all and not math.isnan(mom_7d) else float("nan")

    out["momentum"] = {
        "24h_dlogit": mom_24h,
        "7d_dlogit": mom_7d,
        "24h_zscore": mom_z_24h,
        "7d_zscore": mom_z_7d,
        "label": _momentum_label(mom_z_24h, mom_z_7d),
    }

    # --- Mean reversion: Hurst + lag-1 autocorr of returns
    hurst = hurst_exponent(p.values)
    lag1 = float(ret.autocorr(lag=1)) if len(ret) > 10 else float("nan")
    out["mean_reversion"] = {
        "hurst": hurst,
        "lag1_autocorr": lag1,
        "label": _mr_label(hurst, lag1),
    }

    # --- Vol regime: recent vs baseline
    ret_24h = ret.loc[ret.index >= now - pd.Timedelta(hours=24)]
    ret_7d  = ret.loc[ret.index >= now - pd.Timedelta(days=7)]
    vol_24h = float(ret_24h.std()) if len(ret_24h) > 2 else float("nan")
    vol_7d  = float(ret_7d.std())  if len(ret_7d)  > 2 else float("nan")
    ratio = (vol_24h / vol_7d) if vol_7d and vol_7d > 0 else float("nan")
    out["vol_regime"] = {
        "vol_24h": vol_24h,
        "vol_7d": vol_7d,
        "ratio": ratio,
        "label": _vol_label(ratio),
    }

    # --- Correlation break: rolling corr between this market and sibling composite
    out["corr_break"] = _corr_break(p, sibling_series)

    return out


def _momentum_label(z24: float, z7: float) -> str:
    if any(math.isnan(x) for x in (z24, z7)):
        return "insufficient"
    if z24 > 1.5 and z7 > 0.5:   return "STRONG UP"
    if z24 < -1.5 and z7 < -0.5: return "STRONG DOWN"
    if z24 > 0.7:  return "up"
    if z24 < -0.7: return "down"
    return "flat"


def _mr_label(hurst: float, lag1: float) -> str:
    if math.isnan(hurst): return "insufficient"
    if hurst < 0.4 or (not math.isnan(lag1) and lag1 < -0.15):
        return "MEAN-REVERTING"
    if hurst > 0.6:
        return "TRENDING"
    return "random walk"


def _vol_label(ratio: float) -> str:
    if math.isnan(ratio): return "insufficient"
    if ratio > 1.6: return "HIGH VOL (regime shift up)"
    if ratio < 0.6: return "LOW VOL (calm)"
    return "normal"


def _corr_break(p: pd.Series, sibs: list[pd.Series] | None) -> dict:
    if not sibs:
        return {"label": "no siblings", "corr_recent": None, "corr_baseline": None, "delta": None}
    # Align all on common index via forward fill
    frame = pd.concat([p.rename("self")] + [s.rename(f"sib{i}") for i, s in enumerate(sibs)], axis=1)
    frame = frame.ffill().dropna()
    if len(frame) < 48:
        return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None}
    composite = frame.drop(columns="self").mean(axis=1)
    merged = pd.concat([frame["self"], composite.rename("comp")], axis=1)
    baseline = merged.iloc[:-24].corr().iloc[0, 1]
    recent = merged.iloc[-24:].corr().iloc[0, 1]
    if pd.isna(baseline) or pd.isna(recent):
        return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None}
    delta = recent - baseline
    label = "CORR BREAK" if abs(delta) > 0.35 else "stable"
    return {
        "corr_recent": float(recent),
        "corr_baseline": float(baseline),
        "delta": float(delta),
        "label": label,
    }


# ---------- Plotting ----------

def build_plot(df: pd.DataFrame, question: str, factors: dict) -> go.Figure:
    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True,
        row_heights=[0.65, 0.35],
        vertical_spacing=0.06,
        subplot_titles=("Implied Probability (%)", "Rolling Volatility of Logit Returns"),
    )

    p = df["p"] * 100
    fig.add_trace(
        go.Scatter(x=df.index, y=p, mode="lines", name="YES %",
                   line=dict(color="#1f77b4", width=2)),
        row=1, col=1,
    )
    # 24h rolling mean
    ma = p.rolling("24h").mean()
    fig.add_trace(
        go.Scatter(x=df.index, y=ma, mode="lines", name="24h MA",
                   line=dict(color="#ff7f0e", width=1, dash="dash")),
        row=1, col=1,
    )

    # Vol panel
    p_clip = df["p"].clip(1e-6, 1 - 1e-6)
    logit = np.log(p_clip / (1 - p_clip))
    ret = logit.diff()
    roll_vol = ret.rolling("24h").std()
    fig.add_trace(
        go.Scatter(x=roll_vol.index, y=roll_vol, mode="lines", name="24h rolling σ",
                   line=dict(color="#d62728", width=1.5)),
        row=2, col=1,
    )

    title = question if len(question) <= 110 else question[:107] + "…"
    fig.update_layout(
        title=title,
        height=620,
        hovermode="x unified",
        margin=dict(l=40, r=20, t=60, b=40),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        template="plotly_white",
    )
    fig.update_yaxes(title_text="Prob (%)", row=1, col=1, range=[0, 100])
    fig.update_yaxes(title_text="σ", row=2, col=1)
    return fig


# ---------- Gradio glue ----------

MARKET_CACHE: dict[str, pd.DataFrame] = {}


def refresh_markets(category: str):
    cfg = CATEGORIES[category]
    hot = cfg.get("hot", False)
    events = fetch_events(cfg["order"], cfg["ascending"], limit=40)
    df = flatten_markets(events, hot=hot)
    if df.empty:
        return gr.Dropdown(choices=[], value=None), "No markets found."
    df = df.head(30).reset_index(drop=True)
    MARKET_CACHE[category] = df
    labels = [market_label(r) for _, r in df.iterrows()]
    return gr.Dropdown(choices=labels, value=labels[0]), f"Loaded {len(df)} markets."


def analyze_selected(category: str, label: str, interval: str, fidelity: int):
    if category not in MARKET_CACHE or not label:
        return None, "Refresh markets first, then pick one.", pd.DataFrame()

    df_markets = MARKET_CACHE[category]
    labels = [market_label(r) for _, r in df_markets.iterrows()]
    if label not in labels:
        return None, "Selection out of sync — refresh markets.", pd.DataFrame()
    row = df_markets.iloc[labels.index(label)]

    token_id = row["token_yes"]
    if not token_id:
        return None, "No YES token id.", pd.DataFrame()

    # Fetch target history
    df = fetch_price_history(token_id, interval=interval, fidelity=int(fidelity))
    if df.empty:
        return None, "No price history returned for this market.", pd.DataFrame()

    # Fetch a few siblings from the same event for corr-break
    siblings = []
    try:
        evt_title = row["event"]
        sibs = df_markets[df_markets["event"] == evt_title]
        sibs = sibs[sibs["question"] != row["question"]].head(4)
        for _, sr in sibs.iterrows():
            try:
                sdf = fetch_price_history(sr["token_yes"], interval=interval, fidelity=int(fidelity))
                if not sdf.empty:
                    siblings.append(sdf["p"])
            except Exception:
                continue
    except Exception:
        pass

    factors = compute_factors(df, sibling_series=siblings)
    fig = build_plot(df, row["question"], factors)

    # Build human summary
    summary = _format_summary(row, df, factors)

    # Factor table
    tbl = _factor_table(factors)

    return fig, summary, tbl


def _format_summary(row: pd.Series, df: pd.DataFrame, factors: dict) -> str:
    last_p = float(df["p"].iloc[-1]) * 100
    first_p = float(df["p"].iloc[0]) * 100
    end = row.get("end_date") or "?"
    lines = [
        f"### {row['question']}",
        f"**Event:** {row['event']}",
        f"**Current:** YES = {last_p:.1f}%  •  NO = {100 - last_p:.1f}%",
        f"**Period change:** {first_p:.1f}% → {last_p:.1f}%  ({last_p - first_p:+.1f} pp)",
        f"**24h volume:** ${row['vol_24h']:,.0f}  •  **Total volume:** ${row['vol_total']:,.0f}  •  **Liquidity:** ${row['liquidity']:,.0f}",
        f"**Ends:** {end}",
        f"**Samples:** {len(df)}",
    ]
    if "error" not in factors:
        m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"]
        lines += [
            "",
            "#### Factor signals",
            f"- **Momentum:** {m['label']}  (24h z={m['24h_zscore']:+.2f}, 7d z={m['7d_zscore']:+.2f})",
            f"- **Mean reversion:** {mr['label']}  (Hurst={mr['hurst']:.2f}, ρ₁={mr['lag1_autocorr']:+.2f})",
            f"- **Vol regime:** {v['label']}  (σ₂₄h/σ₇d = {v['ratio']:.2f})",
            f"- **Correlation vs event siblings:** {c['label']}" + (
                f"  (baseline={c['corr_baseline']:+.2f}, recent={c['corr_recent']:+.2f}, Δ={c['delta']:+.2f})"
                if c.get("delta") is not None else ""
            ),
        ]
    return "\n".join(lines)


def _factor_table(factors: dict) -> pd.DataFrame:
    if "error" in factors:
        return pd.DataFrame([{"factor": "—", "value": factors["error"], "signal": ""}])
    m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"]
    rows = [
        {"factor": "Momentum 24h (z)",  "value": f"{m['24h_zscore']:+.2f}", "signal": m["label"]},
        {"factor": "Momentum 7d (z)",   "value": f"{m['7d_zscore']:+.2f}",  "signal": ""},
        {"factor": "Hurst exponent",    "value": f"{mr['hurst']:.2f}",      "signal": mr["label"]},
        {"factor": "Lag-1 autocorr",    "value": f"{mr['lag1_autocorr']:+.2f}", "signal": ""},
        {"factor": "σ₂₄h / σ₇d",        "value": f"{v['ratio']:.2f}",       "signal": v["label"]},
        {"factor": "Corr vs siblings (Δ)",
         "value": f"{c['delta']:+.2f}" if c.get("delta") is not None else "n/a",
         "signal": c["label"]},
    ]
    return pd.DataFrame(rows)


# ---------- UI ----------

with gr.Blocks(title="Polymarket Factor Dashboard", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        "# Polymarket Factor Dashboard\n"
        "Pick a category, load markets, then analyze any market's price behavior "
        "with momentum / mean-reversion / volatility-regime / correlation-break detection."
    )

    with gr.Row():
        category = gr.Dropdown(
            label="Category",
            choices=list(CATEGORIES.keys()),
            value="Top 24h Volume",
            scale=2,
        )
        load_btn = gr.Button("Load markets", variant="primary", scale=1)
        status = gr.Markdown("", elem_id="status")

    market = gr.Dropdown(label="Market", choices=[], value=None, interactive=True)

    with gr.Row():
        interval = gr.Dropdown(
            label="History interval",
            choices=["1h", "6h", "1d", "1w", "1m", "max"],
            value="1w",
        )
        fidelity = gr.Number(label="Fidelity (minutes per sample)", value=60, precision=0)
        analyze_btn = gr.Button("Analyze", variant="primary")

    with gr.Row():
        with gr.Column(scale=3):
            plot = gr.Plot(label="Price & Volatility")
        with gr.Column(scale=2):
            summary = gr.Markdown("")
            table = gr.Dataframe(
                headers=["factor", "value", "signal"],
                label="Factor signals",
                interactive=False,
                wrap=True,
            )

    load_btn.click(
        fn=refresh_markets,
        inputs=[category],
        outputs=[market, status],
    )
    category.change(
        fn=refresh_markets,
        inputs=[category],
        outputs=[market, status],
    )
    analyze_btn.click(
        fn=analyze_selected,
        inputs=[category, market, interval, fidelity],
        outputs=[plot, summary, table],
    )

    # Auto-load on start
    demo.load(
        fn=refresh_markets,
        inputs=[category],
        outputs=[market, status],
    )


if __name__ == "__main__":
    demo.launch()