Spaces:

Laborator
/

qverify

Running

App Files Files Community

Laborator commited on 28 days ago

Commit

c3ae39d

verified ·

1 Parent(s): 7ba2968

sync from GitHub: fallback panel + benchmarks tab + path fix

Browse files

Files changed (1) hide show

app.py +55 -1

app.py CHANGED Viewed

@@ -151,6 +151,48 @@ IBM_INSTANCE_PRESENT = bool(os.environ.get("IBM_QUANTUM_INSTANCE"))
 IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
 def _format_cnf(cnf: CNF) -> str:
     """Render a CNF as a readable multi-line string."""
     if not cnf.clauses:
@@ -639,10 +681,22 @@ with gr.Blocks(title="QVerify") as demo:
         "### IBM Hardware Jobs\n\n"
         "Job execution happens on IBM Quantum hardware. After clicking "
         "**Verify on IBM Heron r2**, copy the Job ID from the Result and "
-        "view live status at the [IBM Quantum Workloads dashboard](https://quantum.cloud.ibm.com/workloads). "
         "All jobs are publicly verifiable there."
     )
     gr.Markdown("---")
     gr.Markdown(
         "### About\n\n"
         "**This Space demos the verifier component of QVerify.** A CNF "

 IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
+def _load_benchmark_summaries() -> str:
+    """Read every benchmarks/results/*/report.json checked into the repo and
+    render a summary Markdown table. Called once at Space load time."""
+    import json
+    from pathlib import Path
+    # On the HF Space app.py sits at the repo root, so benchmarks/ is one
+    # directory down from app.py. In the local repo app.py lives under
+    # space/, so benchmarks/ is two directories up. Try both layouts.
+    here = Path(__file__).resolve().parent
+    for candidate in (here / "benchmarks" / "results", here.parent / "benchmarks" / "results"):
+        if candidate.exists():
+            results_root = candidate
+            break
+    else:
+        return "_No benchmark reports checked in yet._"
+    rows: list[str] = []
+    for report_path in sorted(results_root.glob("*/report.json")):
+        try:
+            data = json.loads(report_path.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+        rows.append(
+            f"| {data.get('dataset', '?')} "
+            f"| {data.get('backend', '?')} "
+            f"| {data.get('n_examples', 0)} "
+            f"| {data.get('accuracy', 0.0) * 100:.1f}% "
+            f"| {data.get('avg_seconds', 0.0):.3f} s "
+            f"| {data.get('p95_seconds', 0.0):.3f} s |"
+        )
+    if not rows:
+        return "_No benchmark reports checked in yet._"
+    header = (
+        "| Dataset | Backend | Examples | Accuracy | Avg | P95 |\n"
+        "| --- | --- | --- | --- | --- | --- |\n"
+    )
+    return header + "\n".join(rows)
 def _format_cnf(cnf: CNF) -> str:
     """Render a CNF as a readable multi-line string."""
     if not cnf.clauses:
         "### IBM Hardware Jobs\n\n"
         "Job execution happens on IBM Quantum hardware. After clicking "
         "**Verify on IBM Heron r2**, copy the Job ID from the Result and "
+        "view live status at the "
+        "[IBM Quantum Workloads dashboard](https://quantum.cloud.ibm.com/workloads). "
         "All jobs are publicly verifiable there."
     )
     gr.Markdown("---")
+    gr.Markdown(
+        "### Benchmarks\n\n"
+        "Verifier accuracy against the PySAT Glucose3 oracle on three "
+        "logic-reasoning datasets (ProofWriter, RuleTaker, FOLIO). "
+        "Reports are generated by `scripts/run_benchmarks.py` and "
+        "checked into `benchmarks/results/`. See "
+        "[docs/benchmarks.md]"
+        "(https://github.com/Quantum-Labor/qverify/blob/main/docs/benchmarks.md)"
+        " for methodology.\n\n" + _load_benchmark_summaries()
+    )
+    gr.Markdown("---")
     gr.Markdown(
         "### About\n\n"
         "**This Space demos the verifier component of QVerify.** A CNF "