Spaces:

pentarosarium
/

polystare

Sleeping

App Files Files Community

polystare / app.py

pentarosarium

Upload app.py

265ed23 verified about 2 months ago

raw

history blame contribute delete

18 kB

	"""
	Polymarket Factor Dashboard — Gradio app for Hugging Face Spaces.

	Uses the public, non-trade Polymarket APIs:
	- Gamma API https://gamma-api.polymarket.com
	- CLOB API https://clob.polymarket.com
	"""

	from __future__ import annotations

	import json
	import math
	from datetime import datetime, timezone
	from typing import Any

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.graph_objects as go
	import requests
	from plotly.subplots import make_subplots

	GAMMA = "https://gamma-api.polymarket.com"
	CLOB = "https://clob.polymarket.com"

	SESSION = requests.Session()
	SESSION.headers.update({"User-Agent": "polymarket-dashboard/1.0"})
	TIMEOUT = 20

	CATEGORIES = {
	"Top 24h Volume": {"order": "volume24hr", "ascending": False},
	"Top Total Volume": {"order": "volume", "ascending": False},
	"Top Liquidity": {"order": "liquidity", "ascending": False},
	"Most Competitive": {"order": "competitive", "ascending": False},
	"Breaking Hot (24h/total ratio)": {"order": "volume24hr", "ascending": False, "hot": True},
	"Ending Soonest": {"order": "end_date", "ascending": True},
	}


	# ---------- API helpers ----------

	def fetch_events(order: str, ascending: bool, limit: int = 40) -> list[dict]:
	r = SESSION.get(
	f"{GAMMA}/events",
	params={
	"active": "true",
	"closed": "false",
	"order": order,
	"ascending": str(ascending).lower(),
	"limit": limit,
	},
	timeout=TIMEOUT,
	)
	r.raise_for_status()
	return r.json()


	def fetch_price_history(token_id: str, interval: str = "1m", fidelity: int = 60) -> pd.DataFrame:
	"""Return DataFrame with columns ['t', 'p'] where t is a pandas Timestamp."""
	r = SESSION.get(
	f"{CLOB}/prices-history",
	params={"market": token_id, "interval": interval, "fidelity": fidelity},
	timeout=TIMEOUT,
	)
	r.raise_for_status()
	hist = r.json().get("history", [])
	if not hist:
	return pd.DataFrame(columns=["t", "p"])
	df = pd.DataFrame(hist)
	df["t"] = pd.to_datetime(df["t"], unit="s", utc=True)
	df["p"] = df["p"].astype(float)
	return df.set_index("t").sort_index()


	# ---------- Data shaping ----------

	def _parse_list(x: Any) -> list:
	if isinstance(x, list):
	return x
	if isinstance(x, str):
	try:
	return json.loads(x)
	except Exception:
	return []
	return []


	def flatten_markets(events: list[dict], hot: bool = False) -> pd.DataFrame:
	"""One row per market, enriched with parent event info."""
	rows = []
	for evt in events:
	evt_title = evt.get("title", "")
	for m in evt.get("markets", []):
	vol = float(m.get("volumeNum") or m.get("volume") or 0)
	vol24 = float(m.get("volume24hr") or 0)
	liq = float(m.get("liquidityNum") or 0)
	token_ids = _parse_list(m.get("clobTokenIds"))
	outcomes = _parse_list(m.get("outcomes"))
	prices = _parse_list(m.get("outcomePrices"))
	if not token_ids or not outcomes:
	continue
	rows.append({
	"event": evt_title,
	"question": m.get("question", ""),
	"slug": m.get("slug", ""),
	"vol_total": vol,
	"vol_24h": vol24,
	"liquidity": liq,
	"hot_ratio": (vol24 / vol) if vol > 0 else 0.0,
	"token_yes": token_ids[0] if len(token_ids) > 0 else None,
	"token_no": token_ids[1] if len(token_ids) > 1 else None,
	"outcomes": outcomes,
	"prices": [float(p) for p in prices] if prices else [],
	"end_date": m.get("endDate"),
	"condition_id": m.get("conditionId"),
	})
	df = pd.DataFrame(rows)
	if df.empty:
	return df
	if hot:
	df = df[df["vol_total"] > 50_000] # filter noise
	df = df.sort_values("hot_ratio", ascending=False)
	return df


	def market_label(row: pd.Series) -> str:
	q = row["question"]
	if len(q) > 85:
	q = q[:82] + "…"
	return f'{q} • 24h ${row["vol_24h"]:,.0f} • liq ${row["liquidity"]:,.0f}'


	# ---------- Factor computations ----------

	def hurst_exponent(series: np.ndarray) -> float:
	"""R/S Hurst. H<0.5 mean-reverting, 0.5 random walk, >0.5 trending."""
	series = np.asarray(series, dtype=float)
	n = len(series)
	if n < 20:
	return float("nan")
	lags = np.unique(np.logspace(0.7, np.log10(n // 2), 12).astype(int))
	lags = lags[lags >= 2]
	rs = []
	for lag in lags:
	chunks = n // lag
	if chunks < 1:
	continue
	vals = []
	for i in range(chunks):
	seg = series[i * lag:(i + 1) * lag]
	mean = seg.mean()
	dev = seg - mean
	Z = np.cumsum(dev)
	R = Z.max() - Z.min()
	S = seg.std(ddof=0)
	if S > 0:
	vals.append(R / S)
	if vals:
	rs.append((lag, np.mean(vals)))
	if len(rs) < 4:
	return float("nan")
	lags_arr = np.log([r[0] for r in rs])
	rs_arr = np.log([r[1] for r in rs])
	slope, _ = np.polyfit(lags_arr, rs_arr, 1)
	return float(slope)


	def compute_factors(df: pd.DataFrame, sibling_series: list[pd.Series] \| None = None) -> dict:
	"""Momentum, mean-reversion, vol regime, correlation break."""
	out: dict = {}
	if df.empty or len(df) < 20:
	return {"error": "Not enough history to compute factors."}

	p = df["p"].astype(float)
	# log-diff returns; avoid p=0
	p_clip = p.clip(lower=1e-6, upper=1 - 1e-6)
	logit = np.log(p_clip / (1 - p_clip))
	ret = logit.diff().dropna()

	# --- Momentum: 24h and 7d cumulative logit change, normalized
	now = df.index[-1]
	def window_change(hours: int) -> float:
	cutoff = now - pd.Timedelta(hours=hours)
	seg = logit.loc[logit.index >= cutoff]
	if len(seg) < 2:
	return float("nan")
	return float(seg.iloc[-1] - seg.iloc[0])

	mom_24h = window_change(24)
	mom_7d = window_change(24 * 7)
	vol_all = float(ret.std()) if len(ret) > 2 else float("nan")
	mom_z_24h = mom_24h / (vol_all * math.sqrt(24)) if vol_all and not math.isnan(mom_24h) else float("nan")
	mom_z_7d = mom_7d / (vol_all * math.sqrt(24 * 7)) if vol_all and not math.isnan(mom_7d) else float("nan")

	out["momentum"] = {
	"24h_dlogit": mom_24h,
	"7d_dlogit": mom_7d,
	"24h_zscore": mom_z_24h,
	"7d_zscore": mom_z_7d,
	"label": _momentum_label(mom_z_24h, mom_z_7d),
	}

	# --- Mean reversion: Hurst + lag-1 autocorr of returns
	hurst = hurst_exponent(p.values)
	lag1 = float(ret.autocorr(lag=1)) if len(ret) > 10 else float("nan")
	out["mean_reversion"] = {
	"hurst": hurst,
	"lag1_autocorr": lag1,
	"label": _mr_label(hurst, lag1),
	}

	# --- Vol regime: recent vs baseline
	ret_24h = ret.loc[ret.index >= now - pd.Timedelta(hours=24)]
	ret_7d = ret.loc[ret.index >= now - pd.Timedelta(days=7)]
	vol_24h = float(ret_24h.std()) if len(ret_24h) > 2 else float("nan")
	vol_7d = float(ret_7d.std()) if len(ret_7d) > 2 else float("nan")
	ratio = (vol_24h / vol_7d) if vol_7d and vol_7d > 0 else float("nan")
	out["vol_regime"] = {
	"vol_24h": vol_24h,
	"vol_7d": vol_7d,
	"ratio": ratio,
	"label": _vol_label(ratio),
	}

	# --- Correlation break: rolling corr between this market and sibling composite
	out["corr_break"] = _corr_break(p, sibling_series)

	return out


	def _momentum_label(z24: float, z7: float) -> str:
	if any(math.isnan(x) for x in (z24, z7)):
	return "insufficient"
	if z24 > 1.5 and z7 > 0.5: return "STRONG UP"
	if z24 < -1.5 and z7 < -0.5: return "STRONG DOWN"
	if z24 > 0.7: return "up"
	if z24 < -0.7: return "down"
	return "flat"


	def _mr_label(hurst: float, lag1: float) -> str:
	if math.isnan(hurst): return "insufficient"
	if hurst < 0.4 or (not math.isnan(lag1) and lag1 < -0.15):
	return "MEAN-REVERTING"
	if hurst > 0.6:
	return "TRENDING"
	return "random walk"


	def _vol_label(ratio: float) -> str:
	if math.isnan(ratio): return "insufficient"
	if ratio > 1.6: return "HIGH VOL (regime shift up)"
	if ratio < 0.6: return "LOW VOL (calm)"
	return "normal"


	def _corr_break(p: pd.Series, sibs: list[pd.Series] \| None) -> dict:
	if not sibs:
	return {"label": "no siblings", "corr_recent": None, "corr_baseline": None, "delta": None}
	# Align all on common index via forward fill
	frame = pd.concat([p.rename("self")] + [s.rename(f"sib{i}") for i, s in enumerate(sibs)], axis=1)
	frame = frame.ffill().dropna()
	if len(frame) < 48:
	return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None}
	composite = frame.drop(columns="self").mean(axis=1)
	merged = pd.concat([frame["self"], composite.rename("comp")], axis=1)
	baseline = merged.iloc[:-24].corr().iloc[0, 1]
	recent = merged.iloc[-24:].corr().iloc[0, 1]
	if pd.isna(baseline) or pd.isna(recent):
	return {"label": "insufficient", "corr_recent": None, "corr_baseline": None, "delta": None}
	delta = recent - baseline
	label = "CORR BREAK" if abs(delta) > 0.35 else "stable"
	return {
	"corr_recent": float(recent),
	"corr_baseline": float(baseline),
	"delta": float(delta),
	"label": label,
	}


	# ---------- Plotting ----------

	def build_plot(df: pd.DataFrame, question: str, factors: dict) -> go.Figure:
	fig = make_subplots(
	rows=2, cols=1, shared_xaxes=True,
	row_heights=[0.65, 0.35],
	vertical_spacing=0.06,
	subplot_titles=("Implied Probability (%)", "Rolling Volatility of Logit Returns"),
	)

	p = df["p"] * 100
	fig.add_trace(
	go.Scatter(x=df.index, y=p, mode="lines", name="YES %",
	line=dict(color="#1f77b4", width=2)),
	row=1, col=1,
	)
	# 24h rolling mean
	ma = p.rolling("24h").mean()
	fig.add_trace(
	go.Scatter(x=df.index, y=ma, mode="lines", name="24h MA",
	line=dict(color="#ff7f0e", width=1, dash="dash")),
	row=1, col=1,
	)

	# Vol panel
	p_clip = df["p"].clip(1e-6, 1 - 1e-6)
	logit = np.log(p_clip / (1 - p_clip))
	ret = logit.diff()
	roll_vol = ret.rolling("24h").std()
	fig.add_trace(
	go.Scatter(x=roll_vol.index, y=roll_vol, mode="lines", name="24h rolling σ",
	line=dict(color="#d62728", width=1.5)),
	row=2, col=1,
	)

	title = question if len(question) <= 110 else question[:107] + "…"
	fig.update_layout(
	title=title,
	height=620,
	hovermode="x unified",
	margin=dict(l=40, r=20, t=60, b=40),
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
	template="plotly_white",
	)
	fig.update_yaxes(title_text="Prob (%)", row=1, col=1, range=[0, 100])
	fig.update_yaxes(title_text="σ", row=2, col=1)
	return fig


	# ---------- Gradio glue ----------

	MARKET_CACHE: dict[str, pd.DataFrame] = {}


	def refresh_markets(category: str):
	cfg = CATEGORIES[category]
	hot = cfg.get("hot", False)
	events = fetch_events(cfg["order"], cfg["ascending"], limit=40)
	df = flatten_markets(events, hot=hot)
	if df.empty:
	return gr.Dropdown(choices=[], value=None), "No markets found."
	df = df.head(30).reset_index(drop=True)
	MARKET_CACHE[category] = df
	labels = [market_label(r) for _, r in df.iterrows()]
	return gr.Dropdown(choices=labels, value=labels[0]), f"Loaded {len(df)} markets."


	def analyze_selected(category: str, label: str, interval: str, fidelity: int):
	if category not in MARKET_CACHE or not label:
	return None, "Refresh markets first, then pick one.", pd.DataFrame()

	df_markets = MARKET_CACHE[category]
	labels = [market_label(r) for _, r in df_markets.iterrows()]
	if label not in labels:
	return None, "Selection out of sync — refresh markets.", pd.DataFrame()
	row = df_markets.iloc[labels.index(label)]

	token_id = row["token_yes"]
	if not token_id:
	return None, "No YES token id.", pd.DataFrame()

	# Fetch target history
	df = fetch_price_history(token_id, interval=interval, fidelity=int(fidelity))
	if df.empty:
	return None, "No price history returned for this market.", pd.DataFrame()

	# Fetch a few siblings from the same event for corr-break
	siblings = []
	try:
	evt_title = row["event"]
	sibs = df_markets[df_markets["event"] == evt_title]
	sibs = sibs[sibs["question"] != row["question"]].head(4)
	for _, sr in sibs.iterrows():
	try:
	sdf = fetch_price_history(sr["token_yes"], interval=interval, fidelity=int(fidelity))
	if not sdf.empty:
	siblings.append(sdf["p"])
	except Exception:
	continue
	except Exception:
	pass

	factors = compute_factors(df, sibling_series=siblings)
	fig = build_plot(df, row["question"], factors)

	# Build human summary
	summary = _format_summary(row, df, factors)

	# Factor table
	tbl = _factor_table(factors)

	return fig, summary, tbl


	def _format_summary(row: pd.Series, df: pd.DataFrame, factors: dict) -> str:
	last_p = float(df["p"].iloc[-1]) * 100
	first_p = float(df["p"].iloc[0]) * 100
	end = row.get("end_date") or "?"
	lines = [
	f"### {row['question']}",
	f"Event: {row['event']}",
	f"Current: YES = {last_p:.1f}% • NO = {100 - last_p:.1f}%",
	f"Period change: {first_p:.1f}% → {last_p:.1f}% ({last_p - first_p:+.1f} pp)",
	f"24h volume: ${row['vol_24h']:,.0f} • Total volume: ${row['vol_total']:,.0f} • Liquidity: ${row['liquidity']:,.0f}",
	f"Ends: {end}",
	f"Samples: {len(df)}",
	]
	if "error" not in factors:
	m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"]
	lines += [
	"",
	"#### Factor signals",
	f"- Momentum: {m['label']} (24h z={m['24h_zscore']:+.2f}, 7d z={m['7d_zscore']:+.2f})",
	f"- Mean reversion: {mr['label']} (Hurst={mr['hurst']:.2f}, ρ₁={mr['lag1_autocorr']:+.2f})",
	f"- Vol regime: {v['label']} (σ₂₄h/σ₇d = {v['ratio']:.2f})",
	f"- Correlation vs event siblings: {c['label']}" + (
	f" (baseline={c['corr_baseline']:+.2f}, recent={c['corr_recent']:+.2f}, Δ={c['delta']:+.2f})"
	if c.get("delta") is not None else ""
	),
	]
	return "\n".join(lines)


	def _factor_table(factors: dict) -> pd.DataFrame:
	if "error" in factors:
	return pd.DataFrame([{"factor": "—", "value": factors["error"], "signal": ""}])
	m = factors["momentum"]; mr = factors["mean_reversion"]; v = factors["vol_regime"]; c = factors["corr_break"]
	rows = [
	{"factor": "Momentum 24h (z)", "value": f"{m['24h_zscore']:+.2f}", "signal": m["label"]},
	{"factor": "Momentum 7d (z)", "value": f"{m['7d_zscore']:+.2f}", "signal": ""},
	{"factor": "Hurst exponent", "value": f"{mr['hurst']:.2f}", "signal": mr["label"]},
	{"factor": "Lag-1 autocorr", "value": f"{mr['lag1_autocorr']:+.2f}", "signal": ""},
	{"factor": "σ₂₄h / σ₇d", "value": f"{v['ratio']:.2f}", "signal": v["label"]},
	{"factor": "Corr vs siblings (Δ)",
	"value": f"{c['delta']:+.2f}" if c.get("delta") is not None else "n/a",
	"signal": c["label"]},
	]
	return pd.DataFrame(rows)


	# ---------- UI ----------

	with gr.Blocks(title="Polymarket Factor Dashboard", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"# Polymarket Factor Dashboard\n"
	"Pick a category, load markets, then analyze any market's price behavior "
	"with momentum / mean-reversion / volatility-regime / correlation-break detection."
	)

	with gr.Row():
	category = gr.Dropdown(
	label="Category",
	choices=list(CATEGORIES.keys()),
	value="Top 24h Volume",
	scale=2,
	)
	load_btn = gr.Button("Load markets", variant="primary", scale=1)
	status = gr.Markdown("", elem_id="status")

	market = gr.Dropdown(label="Market", choices=[], value=None, interactive=True)

	with gr.Row():
	interval = gr.Dropdown(
	label="History interval",
	choices=["1h", "6h", "1d", "1w", "1m", "max"],
	value="1w",
	)
	fidelity = gr.Number(label="Fidelity (minutes per sample)", value=60, precision=0)
	analyze_btn = gr.Button("Analyze", variant="primary")

	with gr.Row():
	with gr.Column(scale=3):
	plot = gr.Plot(label="Price & Volatility")
	with gr.Column(scale=2):
	summary = gr.Markdown("")
	table = gr.Dataframe(
	headers=["factor", "value", "signal"],
	label="Factor signals",
	interactive=False,
	wrap=True,
	)

	load_btn.click(
	fn=refresh_markets,
	inputs=[category],
	outputs=[market, status],
	)
	category.change(
	fn=refresh_markets,
	inputs=[category],
	outputs=[market, status],
	)
	analyze_btn.click(
	fn=analyze_selected,
	inputs=[category, market, interval, fidelity],
	outputs=[plot, summary, table],
	)

	# Auto-load on start
	demo.load(
	fn=refresh_markets,
	inputs=[category],
	outputs=[market, status],
	)


	if __name__ == "__main__":
	demo.launch()