"""QAgent HuggingFace Space - QAOA tool selection, runs on CPU Basic.
The Space serves *precomputed* QAOA results (p=4, 160 steps, seed 0 - the
documented v0.2 config) for the 50 qagent-mini tasks, so the experience is
instant and needs no torch / pennylane in the image. The pure-Python classical
solvers (brute-force exact, greedy top-k) run *live* on demand so visitors can
confirm the served numbers are real. There is no IBM hardware path and no OAuth:
the Space is fully open.
Design (see README.md): a hero with the QAOA ansatz, a task explorer with a 4x4
tool grid that highlights which tools each solver picks, score cards with
approximation ratios, a score-landscape chart showing where QAOA / greedy /
optimal fall among all size-k subsets, an exploration history, and the
qagent-mini-50 leaderboard.
"""
from __future__ import annotations
import json
import os
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from safety import RateLimiter, default_persist_path
from qagent.oracle.bruteforce import brute_force_best
from qagent.oracle.greedy import greedy_top_k
from qagent.qaoa.encoding import ToolScoring
HERE = Path(__file__).resolve().parent
RESULTS_PATH = HERE / "precomputed" / "benchmark_results.json"
ASSETS = HERE / "assets"
REPO_URL = "https://github.com/Quantum-Labor/qagent"
_RATE_LIMITER = RateLimiter(window_seconds=3, daily_cap=5000, persist_path=default_persist_path())
def _load_results() -> dict[str, Any]:
try:
return json.loads(RESULTS_PATH.read_text(encoding="utf-8")) # type: ignore[no-any-return]
except (FileNotFoundError, json.JSONDecodeError):
return {"config": {}, "summary": {}, "tasks": []}
RESULTS = _load_results()
TASKS: list[dict[str, Any]] = RESULTS.get("tasks", [])
TASK_BY_ID: dict[str, dict[str, Any]] = {t["id"]: t for t in TASKS}
# The source dataset (weights + full synergy matrix) is bundled so the live
# brute-force / greedy check reconstructs the exact scoring behind each task.
_DATASET_PATH = HERE / "precomputed" / "dataset.json"
def _load_dataset() -> dict[str, dict[str, Any]]:
try:
rows = json.loads(_DATASET_PATH.read_text(encoding="utf-8"))
return {r["id"]: r for r in rows}
except (FileNotFoundError, json.JSONDecodeError):
return {}
DATASET_BY_ID = _load_dataset()
def _read_asset(name: str) -> str:
try:
return (ASSETS / name).read_text(encoding="utf-8")
except FileNotFoundError:
return ""
def _client_ip(request: gr.Request | None) -> str:
if request is None:
return "unknown"
fwd = request.headers.get("x-forwarded-for") if request.headers else None
if fwd:
return fwd.split(",")[0].strip()
return getattr(request.client, "host", "unknown") or "unknown"
# --- rendering helpers (pure, unit-testable) --------------------------------
def task_label(task: dict[str, Any]) -> str:
return f"{task['id']} · {task['tier']} · N={task['n_tools']} k={task['k']}"
def task_choices() -> list[str]:
return [task_label(t) for t in TASKS]
def _label_to_id(label: str) -> str:
return label.split(" ", 1)[0]
def render_tool_grid(task: dict[str, Any]) -> str:
optimal = set(task["optimal"]["subset"])
qaoa = set(task["qaoa"]["subset"])
greedy = set(task["greedy"]["subset"])
cards: list[str] = []
for tool in task["tools"]:
i = tool["index"]
w = tool["weight"]
classes = "qa-tool-card"
if i in optimal:
classes += " sel-optimal"
if i in qaoa:
classes += " sel-qaoa"
tags = []
if i in optimal:
tags.append('optimal')
if i in qaoa:
tags.append('qaoa')
if i in greedy:
tags.append('greedy')
tags_html = f'
{"".join(tags)}
' if tags else ""
bar = f'
'
cards.append(
f'{tool["name"]}
'
f'
w = {w:.3f}
{bar}{tags_html}
'
)
legend = (
''
'optimal (brute-force)'
'QAOA'
'greedy'
"
"
)
return f'{"".join(cards)}
{legend}'
def render_scores(task: dict[str, Any]) -> str:
opt = task["optimal"]
qa = task["qaoa"]
gd = task["greedy"]
exact = "exact match" if qa["exact_match"] else "approximate"
def card(kind: str, title: str, score: float, sub: str) -> str:
return (
f'{title}
'
f'
{score:.3f}
'
f'
{sub}
'
)
return (
''
+ card(
"optimal",
"Optimal (brute-force)",
opt["score"],
f"{task['k']} of {task['n_tools']} tools",
)
+ card("qaoa", "QAOA", qa["score"], f"ratio {qa['approx_ratio']:.3f} · {exact}")
+ card("greedy", "Greedy top-k", gd["score"], f"ratio {gd['approx_ratio']:.3f}")
+ "
"
)
def landscape_figure(task: dict[str, Any]) -> Any:
hist = task["score_hist"]
edges = hist["bin_edges"]
counts = hist["counts"]
centers = [(edges[i] + edges[i + 1]) / 2 for i in range(len(counts))]
width = (edges[1] - edges[0]) * 0.9 if len(edges) > 1 else 0.1
plt.style.use("dark_background")
fig, ax = plt.subplots(figsize=(7.6, 3.3), dpi=110)
fig.patch.set_facecolor("#0B0B16")
ax.set_facecolor("#0B0B16")
ax.bar(
centers, counts, width=width, color="#3A3460", edgecolor="none", label="all size-k subsets"
)
for score, color, label in [
(task["greedy"]["score"], "#F59E0B", "greedy"),
(task["qaoa"]["score"], "#22D3EE", "QAOA"),
(task["optimal"]["score"], "#34D399", "optimal"),
]:
ax.axvline(score, color=color, linewidth=2.2, label=label)
ax.set_xlabel("subset score", color="#9CA3AF", fontsize=10)
ax.set_ylabel(f"# subsets ({hist['n_subsets']})", color="#9CA3AF", fontsize=10)
ax.set_title("Score landscape: where each solver lands", color="#E5E7EB", fontsize=12, pad=10)
ax.tick_params(colors="#6B6788", labelsize=8)
for spine in ax.spines.values():
spine.set_color("#2A2440")
ax.legend(
facecolor="#15152A", edgecolor="#2A2440", labelcolor="#E5E7EB", fontsize=9, loc="upper left"
)
fig.tight_layout()
return fig
def history_figure(history: list[dict[str, Any]]) -> Any:
plt.style.use("dark_background")
fig, ax = plt.subplots(figsize=(7.6, 2.8), dpi=110)
fig.patch.set_facecolor("#0B0B16")
ax.set_facecolor("#0B0B16")
if history:
xs = list(range(len(history)))
ax.plot(xs, [h["qaoa"] for h in history], "-o", color="#22D3EE", label="QAOA", linewidth=2)
ax.plot(
xs, [h["greedy"] for h in history], "-o", color="#F59E0B", label="greedy", linewidth=2
)
ax.axhline(1.0, color="#34D399", linewidth=1.2, linestyle="--", label="optimal")
ax.set_xticks(xs)
ax.set_xticklabels([h["id"] for h in history], rotation=45, ha="right", fontsize=7)
ax.set_ylim(0.5, 1.05)
ax.legend(
facecolor="#15152A",
edgecolor="#2A2440",
labelcolor="#E5E7EB",
fontsize=9,
loc="lower left",
)
else:
ax.text(
0.5,
0.5,
"Explore tasks to build a history",
ha="center",
va="center",
color="#6B6788",
fontsize=11,
transform=ax.transAxes,
)
ax.set_ylabel("approx ratio", color="#9CA3AF", fontsize=10)
ax.set_title("Approximation-ratio history (this session)", color="#E5E7EB", fontsize=12, pad=8)
ax.tick_params(colors="#6B6788", labelsize=8)
for spine in ax.spines.values():
spine.set_color("#2A2440")
fig.tight_layout()
return fig
def _task_description(task: dict[str, Any]) -> str:
return (
f"**{task['id']}** · {task['tier']} pool · select **{task['k']}** of "
f"**{task['n_tools']}** tools. {task['description']}"
)
def select_task(
label: str, history: list[dict[str, Any]]
) -> tuple[str, str, str, Any, Any, list[dict[str, Any]]]:
task = TASK_BY_ID[_label_to_id(label)]
history = [h for h in history if h["id"] != task["id"]]
history.append(
{
"id": task["id"],
"qaoa": task["qaoa"]["approx_ratio"],
"greedy": task["greedy"]["approx_ratio"],
}
)
history = history[-12:]
return (
_task_description(task),
render_tool_grid(task),
render_scores(task),
landscape_figure(task),
history_figure(history),
history,
)
def verify_live(label: str, request: gr.Request | None = None) -> str:
"""Run brute-force + greedy live to prove the served numbers are real."""
verdict = _RATE_LIMITER.check_and_register(ip=_client_ip(request), now=datetime.now(UTC))
if not verdict.allowed:
return f"_{verdict.detail}_"
task = TASK_BY_ID[_label_to_id(label)]
entry = DATASET_BY_ID.get(task["id"])
if entry is None:
return "_Source scoring unavailable for live verification._"
sc = ToolScoring(
weights=tuple(entry["weights"]),
synergy=tuple(tuple(row) for row in entry["synergy"]),
)
k = task["k"]
bf_sub, bf_score = brute_force_best(sc, k)
gd_sub, gd_score = greedy_top_k(sc, k)
opt = task["optimal"]["score"]
bf_ok = abs(bf_score - opt) < 1e-6
return (
f"Live solve of **{task['id']}** (pure-Python, no precomputed lookup):\n\n"
f"- brute-force optimum: `{sorted(bf_sub)}` score **{bf_score:.4f}** "
f"{'matches' if bf_ok else 'DIFFERS FROM'} the served optimal `{opt:.4f}`\n"
f"- greedy top-k: `{sorted(gd_sub)}` score **{gd_score:.4f}**\n\n"
f"The served QAOA result (score {task['qaoa']['score']:.4f}, ratio "
f"{task['qaoa']['approx_ratio']:.3f}) was precomputed with p={RESULTS['config'].get('p')} "
f"on the PennyLane simulator."
)
def leaderboard_markdown() -> str:
s = RESULTS.get("summary", {})
if not s:
return "_Leaderboard unavailable._"
def cell(exact: int, ratio: float, n: int) -> str:
return f"{100 * exact // n}% ({exact}/{n}) · {ratio:.3f}"
def row(group: str, g: dict[str, Any]) -> str:
n = g["n"]
gd = cell(g["greedy_exact"], g["greedy_ratio"], n)
qa = cell(g["qaoa_exact"], g["qaoa_ratio"], n)
return f"| {group} | 100% ({n}/{n}) | {gd} | {qa} |"
lines = [
"| Group | Brute-force | Greedy | QAOA |",
"| --- | --- | --- | --- |",
]
for key, name in [("small", "small (N=8, k=3)"), ("full", "full (N=16, k=5)"), ("all", "all")]:
if key in s:
lines.append(row(name, s[key]))
cfg = RESULTS.get("config", {})
note = (
"\n\nMetric: exact-match (and mean approximation ratio). QAOA config: "
f"p={cfg.get('p')}, {cfg.get('steps')} steps, {cfg.get('shots')} shots, "
f"seed {cfg.get('seed')}, x mixer. Brute-force is the oracle (100% by construction)."
)
return "\n".join(lines) + note
# --- UI ---------------------------------------------------------------------
_THEME = gr.themes.Base(
primary_hue="purple",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
).set(
body_background_fill="#0B0B16",
body_text_color="#E5E7EB",
background_fill_primary="#15152A",
background_fill_secondary="#0B0B16",
border_color_primary="#2A2440",
button_primary_background_fill="#7C3AED",
button_primary_background_fill_hover="#8B5CF6",
button_primary_text_color="#FFFFFF",
block_background_fill="#15152A",
block_border_color="#2A2440",
input_background_fill="#15152A",
)
def _hero_html() -> str:
hero_svg = _read_asset("hero_qaoa.svg")
art = (
f'{hero_svg}
'
if hero_svg
else ''
)
return f"""
{art}
QAgent
Quantum-optimized tool selection for LLM agents.
Pick the best subset of k tools from N with QAOA.
Tools interact - some synergise, some are redundant - so the
optimal set is a combinatorial optimization, not a ranking. QAgent encodes
"select k of N" on one qubit per tool and solves it with the Quantum
Approximate Optimization Algorithm.
v0.2.0
GitHub
Project 2 of 3
simulator
Apache-2.0
"""
_WHAT = """
### What is this?
An LLM agent with many tools should only be handed a few per task - to fit a
context budget, cut cost, and reduce confusion. Picking the *k* individually most
relevant tools is greedy and often wrong, because tools have **pairwise
interactions**: a retriever plus a summariser synergise; two overlapping search
tools are redundant. The value of a tool set is
```
score(S) = sum of tool relevances + sum of pairwise interactions
```
and the task is to maximise it subject to choosing exactly *k* tools - a quadratic
binary optimization, exactly the structure QAOA targets.
"""
_HOW = """
### How it works (in 30 seconds)
- **Encode** the task as per-tool relevances and a pairwise interaction matrix.
- **Map** "select k of N" to N qubits (one-hot: qubit i = tool i in/out), folding
the cardinality constraint into the cost Hamiltonian as a penalty.
- **Optimize** a QAOA ansatz (cost layer + mixer) with PyTorch on the PennyLane
simulator; measure and keep the best valid subset.
- **Compare** against an exact brute-force oracle and a greedy top-k baseline.
This Space serves results precomputed with that pipeline; the classical solvers
run live so you can check the numbers.
"""
_ABOUT = f"""
### About
QAgent is project 2 of 3 in the Quantum Co-Processor program, after
[QVerify](https://github.com/Quantum-Labor/qverify) (quantum-assisted reasoning
verification) and alongside
[QRoute](https://github.com/Quantum-Labor/qroute) (a VQC mixture-of-experts
router). Source and full benchmarks: [{REPO_URL}]({REPO_URL}).
**Honest scope.** Simulator only; no quantum advantage is claimed. On
qagent-mini-50, QAOA matches the optimum on every N=8 task and reaches a 0.915
mean approximation ratio at N=16, but exact-match at N=16 remains hard (the
documented next step is Dicke-initialised XY-QAOA). The point is the architecture
and an honest baseline.
"""
def build_demo() -> gr.Blocks:
# In Gradio 6.x, theme and css are passed to launch(), not the Blocks
# constructor (where they are silently ignored) -- mirrors the QVerify Space.
with gr.Blocks(title="QAgent - quantum tool selection") as demo:
gr.HTML(_hero_html())
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(_WHAT)
with gr.Column(scale=1):
gr.Markdown(_HOW)
gr.HTML('Try it now
')
history = gr.State([])
default_label = task_choices()[0] if TASKS else None
task_dd = gr.Dropdown(
choices=task_choices(),
value=default_label,
label="qagent-mini-50 task",
info="Pick a task to see which tools each solver selects.",
)
task_desc = gr.Markdown()
grid = gr.HTML()
scores = gr.HTML()
with gr.Row():
verify_btn = gr.Button("Verify live (brute-force + greedy)", variant="primary")
verify_out = gr.Markdown()
with gr.Row():
landscape = gr.Plot(label="Score landscape")
history_plot = gr.Plot(label="History")
gr.HTML('qagent-mini-50 leaderboard
')
gr.Markdown(leaderboard_markdown())
gr.Markdown(_ABOUT)
outputs = [task_desc, grid, scores, landscape, history_plot, history]
task_dd.change(select_task, inputs=[task_dd, history], outputs=outputs)
verify_btn.click(verify_live, inputs=[task_dd], outputs=[verify_out])
if default_label is not None:
demo.load(select_task, inputs=[task_dd, history], outputs=outputs)
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch(
theme=_THEME,
css=_read_asset("styles.css"),
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
)