""" Polymarket Factor Dashboard — Gradio app for Hugging Face Spaces. Uses the public, non-trade Polymarket APIs: - Gamma API https://gamma-api.polymarket.com - CLOB API https://clob.polymarket.com """ from __future__ import annotations import json import math from datetime import datetime, timezone from typing import Any import gradio as gr import numpy as np import pandas as pd import plotly.graph_objects as go import requests from plotly.subplots import make_subplots GAMMA = "https://gamma-api.polymarket.com" CLOB = "https://clob.polymarket.com" SESSION = requests.Session() SESSION.headers.update({"User-Agent": "polymarket-dashboard/1.0"}) TIMEOUT = 20 CATEGORIES = { "Top 24h Volume": {"order": "volume24hr", "ascending": False}, "Top Total Volume": {"order": "volume", "ascending": False}, "Top Liquidity": {"order": "liquidity", "ascending": False}, "Most Competitive": {"order": "competitive", "ascending": False}, "Breaking Hot (24h/total ratio)": {"order": "volume24hr", "ascending": False, "hot": True}, "Ending Soonest": {"order": "end_date", "ascending": True}, } # ---------- API helpers ---------- def fetch_events(order: str, ascending: bool, limit: int = 40) -> list[dict]: r = SESSION.get( f"{GAMMA}/events", params={ "active": "true", "closed": "false", "order": order, "ascending": str(ascending).lower(), "limit": limit, }, timeout=TIMEOUT, ) r.raise_for_status() return r.json() def fetch_price_history(token_id: str, interval: str = "1m", fidelity: int = 60) -> pd.DataFrame: """Return DataFrame with columns ['t', 'p'] where t is a pandas Timestamp.""" r = SESSION.get( f"{CLOB}/prices-history", params={"market": token_id, "interval": interval, "fidelity": fidelity}, timeout=TIMEOUT, ) r.raise_for_status() hist = r.json().get("history", []) if not hist: return pd.DataFrame(columns=["t", "p"]) df = pd.DataFrame(hist) df["t"] = pd.to_datetime(df["t"], unit="s", utc=True) df["p"] = df["p"].astype(float) return df.set_index("t").sort_index() # ---------- Data shaping ---------- def _parse_list(x: Any) -> list: if isinstance(x, list): return x if isinstance(x, str): try: return json.loads(x) except Exception: return [] return [] def flatten_markets(events: list[dict], hot: bool = False) -> pd.DataFrame: """One row per *market*, enriched with parent event info.""" rows = [] for evt in events: evt_title = evt.get("title", "") for m in evt.get("markets", []): vol = float(m.get("volumeNum") or m.get("volume") or 0) vol24 = float(m.get("volume24hr") or 0) liq = float(m.get("liquidityNum") or 0) token_ids = _parse_list(m.get("clobTokenIds")) outcomes = _parse_list(m.get("outcomes")) prices = _parse_list(m.get("outcomePrices")) if not token_ids or not outcomes: continue rows.append({ "event": evt_title, "question": m.get("question", ""), "slug": m.get("slug", ""), "vol_total": vol, "vol_24h": vol24, "liquidity": liq, "hot_ratio": (vol24 / vol) if vol > 0 else 0.0, "token_yes": token_ids[0] if len(token_ids) > 0 else None, "token_no": token_ids[1] if len(token_ids) > 1 else None, "outcomes": outcomes, "prices": [float(p) for p in prices] if prices else [], "end_date": m.get("endDate"), "condition_id": m.get("conditionId"), }) df = pd.DataFrame(rows) if df.empty: return df if hot: df = df[df["vol_total"] > 50_000] # filter noise df = df.sort_values("hot_ratio", ascending=False) return df def market_label(row: pd.Series) -> str: q = row["question"] if len(q) > 85: q = q[:82] + "…" return f'{q} • 24h ${row["vol_24h"]:,.0f} • liq ${row["liquidity"]:,.0f}' # ---------- Factor computations ---------- def hurst_exponent(series: np.ndarray) -> float: """R/S Hurst. H<0.5 mean-reverting, 0.5 random walk, >0.5 trending.""" series = np.asarray(series, dtype=float) n = len(series) if n < 20: return float("nan") lags = np.unique(np.logspace(0.7, np.log10(n // 2), 12).astype(int)) lags = lags[lags >= 2] rs = [] for lag in lags: chunks = n // lag if chunks < 1: continue vals = [] for i in range(chunks): seg = series[i * lag:(i + 1) * lag] mean = seg.mean() dev = seg - mean Z = np.cumsum(dev) R = Z.max() - Z.min() S = seg.std(ddof=0) if S > 0: vals.append(R / S) if vals: rs.append((lag, np.mean(vals))) if len(rs) < 4: return float("nan") lags_arr = np.log([r[0] for r in rs]) rs_arr = np.log([r[1] for r in rs]) slope, _ = np.polyfit(lags_arr, rs_arr, 1) return float(slope) def compute_factors(df: pd.DataFrame, sibling_series: list[pd.Series] | None = None) -> dict: """Momentum, mean-reversion, vol regime, correlation break.""" out: dict = {} if df.empty or len(df) < 20: return {"error": "Not enough history to compute factors."} p = df["p"].astype(float) # log-diff returns; avoid p=0 p_clip = p.clip(lower=1e-6, upper=1 - 1e-6) logit = np.log(p_clip / (1 - p_clip)) ret = logit.diff().dropna() # --- Momentum: 24h and 7d cumulative logit change, normalized now = df.index[-1] def window_change(hours: int) -> float: cutoff = now - pd.Timedelta(hours=hours) seg = logit.loc[logit.index >= cutoff] if len(seg) < 2: return float("nan") return float(seg.iloc[-1] - seg.iloc[0]) mom_24h = window_change(24) mom_7d = window_change(24 * 7) vol_all = float(ret.std()) if len(ret) > 2 else float("nan") mom_z_24h = mom_24h / (vol_all * math.sqrt(24)) if vol_all and not math.isnan(mom_24h) else float("nan") mom_z_7d = mom_7d / (vol_all * math.sqrt(24 * 7)) if vol_all and not math.isnan(mom_7d) else float("nan") out["momentum"] = { "24h_dlogit": mom_24h, "7d_dlogit": mom_7d, "24h_zscore": mom_z_24h, "7d_zscore": mom_z_7d, "label": _momentum_label(mom_z_24h, mom_z_7d), } # --- Mean reversion: Hurst + lag-1 autocorr of returns hurst = hurst_exponent(p.values) lag1 = float(ret.autocorr(lag=1)) if len(ret) > 10 else float("nan") out["mean_reversion"] = { "hurst": hurst, "lag1_autocorr": lag1, "label": _mr_label(hurst, lag1), } # --- Vol regime: recent vs baseline ret_24h = ret.loc[ret.index >= now - pd.Timedelta(hours=24)] ret_7d = ret.loc[ret.index >= now - pd.Timedelta(days=7)] vol_24h = float(ret_24h.std()) if len(ret_24h) > 2 else float("nan") vol_7d = float(ret_7d.std()) if len(ret_7d) > 2 else float("nan") ratio = (vol_24h / vol_7d) if vol_7d and vol_7d > 0 else float("nan") out["vol_regime"] = { "vol_24h": vol_24h, "vol_7d": vol_7d, "ratio": ratio, "label": _vol_label(ratio), } # --- Correlation break: rolling corr between this market and sibling composite out["corr_break"] = _corr_break(p, sibling_series) return out def _momentum_label(z24: float, z7: float) -> str: if any(math.isnan(x) for x in (z24, z7)): return "insufficient" if z24 > 1.5 and z7 > 0.5: return "STRONG UP" if z24 < -1.5 and z7 < -0.5: return "STRONG DOWN" if z24 > 0.7: return "up" if z24 < -0.7: return "down" return "flat" def _mr_label(hurst: float, lag1: float) -> str: if math.isnan(hurst): return "insufficient" if hurst < 0.4 or (not math.isnan(lag1) and lag1 < -0.15): return "MEAN-REVERTING" if hurst > 0.6: return "TRENDING" return "random walk" def _vol_label(ratio: float) -> str: if math.isnan(ratio): return "insufficient" if ratio > 1.6: return "HIGH VOL (regime shift up)" if ratio < 0.6: return "LOW VOL (calm)" return "normal" def _corr_break(p: pd.Series, sibs: list[pd.Series] | None) -> dict: if not sibs: return {"label": "no siblings", "corr_recent": None, "corr_baseline": None, "delta": None} # Align all on common index via forward fill frame = pd.concat([p.rename("self")] + [s.rename(f"sib{i}") for i, s in enumerate(sibs)], axis=1) frame = frame.ffill().dropna() if len(frame) < 48: return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None} composite = frame.drop(columns="self").mean(axis=1) merged = pd.concat([frame["self"], composite.rename("comp")], axis=1) baseline = merged.iloc[:-24].corr().iloc[0, 1] recent = merged.iloc[-24:].corr().iloc[0, 1] if pd.isna(baseline) or pd.isna(recent): return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None} delta = recent - baseline label = "CORR BREAK" if abs(delta) > 0.35 else "stable" return { "corr_recent": float(recent), "corr_baseline": float(baseline), "delta": float(delta), "label": label, } # ---------- Plotting ---------- def build_plot(df: pd.DataFrame, question: str, factors: dict) -> go.Figure: fig = make_subplots( rows=2, cols=1, shared_xaxes=True, row_heights=[0.65, 0.35], vertical_spacing=0.06, subplot_titles=("Implied Probability (%)", "Rolling Volatility of Logit Returns"), ) p = df["p"] * 100 fig.add_trace( go.Scatter(x=df.index, y=p, mode="lines", name="YES %", line=dict(color="#1f77b4", width=2)), row=1, col=1, ) # 24h rolling mean ma = p.rolling("24h").mean() fig.add_trace( go.Scatter(x=df.index, y=ma, mode="lines", name="24h MA", line=dict(color="#ff7f0e", width=1, dash="dash")), row=1, col=1, ) # Vol panel p_clip = df["p"].clip(1e-6, 1 - 1e-6) logit = np.log(p_clip / (1 - p_clip)) ret = logit.diff() roll_vol = ret.rolling("24h").std() fig.add_trace( go.Scatter(x=roll_vol.index, y=roll_vol, mode="lines", name="24h rolling σ", line=dict(color="#d62728", width=1.5)), row=2, col=1, ) title = question if len(question) <= 110 else question[:107] + "…" fig.update_layout( title=title, height=620, hovermode="x unified", margin=dict(l=40, r=20, t=60, b=40), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), template="plotly_white", ) fig.update_yaxes(title_text="Prob (%)", row=1, col=1, range=[0, 100]) fig.update_yaxes(title_text="σ", row=2, col=1) return fig # ---------- Gradio glue ---------- MARKET_CACHE: dict[str, pd.DataFrame] = {} def refresh_markets(category: str): cfg = CATEGORIES[category] hot = cfg.get("hot", False) events = fetch_events(cfg["order"], cfg["ascending"], limit=40) df = flatten_markets(events, hot=hot) if df.empty: return gr.Dropdown(choices=[], value=None), "No markets found." df = df.head(30).reset_index(drop=True) MARKET_CACHE[category] = df labels = [market_label(r) for _, r in df.iterrows()] return gr.Dropdown(choices=labels, value=labels[0]), f"Loaded {len(df)} markets." def analyze_selected(category: str, label: str, interval: str, fidelity: int): if category not in MARKET_CACHE or not label: return None, "Refresh markets first, then pick one.", pd.DataFrame() df_markets = MARKET_CACHE[category] labels = [market_label(r) for _, r in df_markets.iterrows()] if label not in labels: return None, "Selection out of sync — refresh markets.", pd.DataFrame() row = df_markets.iloc[labels.index(label)] token_id = row["token_yes"] if not token_id: return None, "No YES token id.", pd.DataFrame() # Fetch target history df = fetch_price_history(token_id, interval=interval, fidelity=int(fidelity)) if df.empty: return None, "No price history returned for this market.", pd.DataFrame() # Fetch a few siblings from the same event for corr-break siblings = [] try: evt_title = row["event"] sibs = df_markets[df_markets["event"] == evt_title] sibs = sibs[sibs["question"] != row["question"]].head(4) for _, sr in sibs.iterrows(): try: sdf = fetch_price_history(sr["token_yes"], interval=interval, fidelity=int(fidelity)) if not sdf.empty: siblings.append(sdf["p"]) except Exception: continue except Exception: pass factors = compute_factors(df, sibling_series=siblings) fig = build_plot(df, row["question"], factors) # Build human summary summary = _format_summary(row, df, factors) # Factor table tbl = _factor_table(factors) return fig, summary, tbl def _format_summary(row: pd.Series, df: pd.DataFrame, factors: dict) -> str: last_p = float(df["p"].iloc[-1]) * 100 first_p = float(df["p"].iloc[0]) * 100 end = row.get("end_date") or "?" lines = [ f"### {row['question']}", f"**Event:** {row['event']}", f"**Current:** YES = {last_p:.1f}% • NO = {100 - last_p:.1f}%", f"**Period change:** {first_p:.1f}% → {last_p:.1f}% ({last_p - first_p:+.1f} pp)", f"**24h volume:** ${row['vol_24h']:,.0f} • **Total volume:** ${row['vol_total']:,.0f} • **Liquidity:** ${row['liquidity']:,.0f}", f"**Ends:** {end}", f"**Samples:** {len(df)}", ] if "error" not in factors: m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"] lines += [ "", "#### Factor signals", f"- **Momentum:** {m['label']} (24h z={m['24h_zscore']:+.2f}, 7d z={m['7d_zscore']:+.2f})", f"- **Mean reversion:** {mr['label']} (Hurst={mr['hurst']:.2f}, ρ₁={mr['lag1_autocorr']:+.2f})", f"- **Vol regime:** {v['label']} (σ₂₄h/σ₇d = {v['ratio']:.2f})", f"- **Correlation vs event siblings:** {c['label']}" + ( f" (baseline={c['corr_baseline']:+.2f}, recent={c['corr_recent']:+.2f}, Δ={c['delta']:+.2f})" if c.get("delta") is not None else "" ), ] return "\n".join(lines) def _factor_table(factors: dict) -> pd.DataFrame: if "error" in factors: return pd.DataFrame([{"factor": "—", "value": factors["error"], "signal": ""}]) m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"] rows = [ {"factor": "Momentum 24h (z)", "value": f"{m['24h_zscore']:+.2f}", "signal": m["label"]}, {"factor": "Momentum 7d (z)", "value": f"{m['7d_zscore']:+.2f}", "signal": ""}, {"factor": "Hurst exponent", "value": f"{mr['hurst']:.2f}", "signal": mr["label"]}, {"factor": "Lag-1 autocorr", "value": f"{mr['lag1_autocorr']:+.2f}", "signal": ""}, {"factor": "σ₂₄h / σ₇d", "value": f"{v['ratio']:.2f}", "signal": v["label"]}, {"factor": "Corr vs siblings (Δ)", "value": f"{c['delta']:+.2f}" if c.get("delta") is not None else "n/a", "signal": c["label"]}, ] return pd.DataFrame(rows) # ---------- UI ---------- with gr.Blocks(title="Polymarket Factor Dashboard", theme=gr.themes.Soft()) as demo: gr.Markdown( "# Polymarket Factor Dashboard\n" "Pick a category, load markets, then analyze any market's price behavior " "with momentum / mean-reversion / volatility-regime / correlation-break detection." ) with gr.Row(): category = gr.Dropdown( label="Category", choices=list(CATEGORIES.keys()), value="Top 24h Volume", scale=2, ) load_btn = gr.Button("Load markets", variant="primary", scale=1) status = gr.Markdown("", elem_id="status") market = gr.Dropdown(label="Market", choices=[], value=None, interactive=True) with gr.Row(): interval = gr.Dropdown( label="History interval", choices=["1h", "6h", "1d", "1w", "1m", "max"], value="1w", ) fidelity = gr.Number(label="Fidelity (minutes per sample)", value=60, precision=0) analyze_btn = gr.Button("Analyze", variant="primary") with gr.Row(): with gr.Column(scale=3): plot = gr.Plot(label="Price & Volatility") with gr.Column(scale=2): summary = gr.Markdown("") table = gr.Dataframe( headers=["factor", "value", "signal"], label="Factor signals", interactive=False, wrap=True, ) load_btn.click( fn=refresh_markets, inputs=[category], outputs=[market, status], ) category.change( fn=refresh_markets, inputs=[category], outputs=[market, status], ) analyze_btn.click( fn=analyze_selected, inputs=[category, market, interval, fidelity], outputs=[plot, summary, table], ) # Auto-load on start demo.load( fn=refresh_markets, inputs=[category], outputs=[market, status], ) if __name__ == "__main__": demo.launch()