sync from GitHub: fallback panel + benchmarks tab + path fix
Browse files
app.py
CHANGED
|
@@ -151,6 +151,48 @@ IBM_INSTANCE_PRESENT = bool(os.environ.get("IBM_QUANTUM_INSTANCE"))
|
|
| 151 |
IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
def _format_cnf(cnf: CNF) -> str:
|
| 155 |
"""Render a CNF as a readable multi-line string."""
|
| 156 |
if not cnf.clauses:
|
|
@@ -639,10 +681,22 @@ with gr.Blocks(title="QVerify") as demo:
|
|
| 639 |
"### IBM Hardware Jobs\n\n"
|
| 640 |
"Job execution happens on IBM Quantum hardware. After clicking "
|
| 641 |
"**Verify on IBM Heron r2**, copy the Job ID from the Result and "
|
| 642 |
-
"view live status at the
|
|
|
|
| 643 |
"All jobs are publicly verifiable there."
|
| 644 |
)
|
| 645 |
gr.Markdown("---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
gr.Markdown(
|
| 647 |
"### About\n\n"
|
| 648 |
"**This Space demos the verifier component of QVerify.** A CNF "
|
|
|
|
| 151 |
IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
|
| 152 |
|
| 153 |
|
| 154 |
+
def _load_benchmark_summaries() -> str:
|
| 155 |
+
"""Read every benchmarks/results/*/report.json checked into the repo and
|
| 156 |
+
render a summary Markdown table. Called once at Space load time."""
|
| 157 |
+
import json
|
| 158 |
+
from pathlib import Path
|
| 159 |
+
|
| 160 |
+
# On the HF Space app.py sits at the repo root, so benchmarks/ is one
|
| 161 |
+
# directory down from app.py. In the local repo app.py lives under
|
| 162 |
+
# space/, so benchmarks/ is two directories up. Try both layouts.
|
| 163 |
+
here = Path(__file__).resolve().parent
|
| 164 |
+
for candidate in (here / "benchmarks" / "results", here.parent / "benchmarks" / "results"):
|
| 165 |
+
if candidate.exists():
|
| 166 |
+
results_root = candidate
|
| 167 |
+
break
|
| 168 |
+
else:
|
| 169 |
+
return "_No benchmark reports checked in yet._"
|
| 170 |
+
|
| 171 |
+
rows: list[str] = []
|
| 172 |
+
for report_path in sorted(results_root.glob("*/report.json")):
|
| 173 |
+
try:
|
| 174 |
+
data = json.loads(report_path.read_text(encoding="utf-8"))
|
| 175 |
+
except Exception:
|
| 176 |
+
continue
|
| 177 |
+
rows.append(
|
| 178 |
+
f"| {data.get('dataset', '?')} "
|
| 179 |
+
f"| {data.get('backend', '?')} "
|
| 180 |
+
f"| {data.get('n_examples', 0)} "
|
| 181 |
+
f"| {data.get('accuracy', 0.0) * 100:.1f}% "
|
| 182 |
+
f"| {data.get('avg_seconds', 0.0):.3f} s "
|
| 183 |
+
f"| {data.get('p95_seconds', 0.0):.3f} s |"
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if not rows:
|
| 187 |
+
return "_No benchmark reports checked in yet._"
|
| 188 |
+
|
| 189 |
+
header = (
|
| 190 |
+
"| Dataset | Backend | Examples | Accuracy | Avg | P95 |\n"
|
| 191 |
+
"| --- | --- | --- | --- | --- | --- |\n"
|
| 192 |
+
)
|
| 193 |
+
return header + "\n".join(rows)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
def _format_cnf(cnf: CNF) -> str:
|
| 197 |
"""Render a CNF as a readable multi-line string."""
|
| 198 |
if not cnf.clauses:
|
|
|
|
| 681 |
"### IBM Hardware Jobs\n\n"
|
| 682 |
"Job execution happens on IBM Quantum hardware. After clicking "
|
| 683 |
"**Verify on IBM Heron r2**, copy the Job ID from the Result and "
|
| 684 |
+
"view live status at the "
|
| 685 |
+
"[IBM Quantum Workloads dashboard](https://quantum.cloud.ibm.com/workloads). "
|
| 686 |
"All jobs are publicly verifiable there."
|
| 687 |
)
|
| 688 |
gr.Markdown("---")
|
| 689 |
+
gr.Markdown(
|
| 690 |
+
"### Benchmarks\n\n"
|
| 691 |
+
"Verifier accuracy against the PySAT Glucose3 oracle on three "
|
| 692 |
+
"logic-reasoning datasets (ProofWriter, RuleTaker, FOLIO). "
|
| 693 |
+
"Reports are generated by `scripts/run_benchmarks.py` and "
|
| 694 |
+
"checked into `benchmarks/results/`. See "
|
| 695 |
+
"[docs/benchmarks.md]"
|
| 696 |
+
"(https://github.com/Quantum-Labor/qverify/blob/main/docs/benchmarks.md)"
|
| 697 |
+
" for methodology.\n\n" + _load_benchmark_summaries()
|
| 698 |
+
)
|
| 699 |
+
gr.Markdown("---")
|
| 700 |
gr.Markdown(
|
| 701 |
"### About\n\n"
|
| 702 |
"**This Space demos the verifier component of QVerify.** A CNF "
|