Laborator commited on
Commit
c3ae39d
·
verified ·
1 Parent(s): 7ba2968

sync from GitHub: fallback panel + benchmarks tab + path fix

Browse files
Files changed (1) hide show
  1. app.py +55 -1
app.py CHANGED
@@ -151,6 +151,48 @@ IBM_INSTANCE_PRESENT = bool(os.environ.get("IBM_QUANTUM_INSTANCE"))
151
  IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
152
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def _format_cnf(cnf: CNF) -> str:
155
  """Render a CNF as a readable multi-line string."""
156
  if not cnf.clauses:
@@ -639,10 +681,22 @@ with gr.Blocks(title="QVerify") as demo:
639
  "### IBM Hardware Jobs\n\n"
640
  "Job execution happens on IBM Quantum hardware. After clicking "
641
  "**Verify on IBM Heron r2**, copy the Job ID from the Result and "
642
- "view live status at the [IBM Quantum Workloads dashboard](https://quantum.cloud.ibm.com/workloads). "
 
643
  "All jobs are publicly verifiable there."
644
  )
645
  gr.Markdown("---")
 
 
 
 
 
 
 
 
 
 
 
646
  gr.Markdown(
647
  "### About\n\n"
648
  "**This Space demos the verifier component of QVerify.** A CNF "
 
151
  IBM_AVAILABLE = IBM_TOKEN_PRESENT and IBM_INSTANCE_PRESENT
152
 
153
 
154
+ def _load_benchmark_summaries() -> str:
155
+ """Read every benchmarks/results/*/report.json checked into the repo and
156
+ render a summary Markdown table. Called once at Space load time."""
157
+ import json
158
+ from pathlib import Path
159
+
160
+ # On the HF Space app.py sits at the repo root, so benchmarks/ is one
161
+ # directory down from app.py. In the local repo app.py lives under
162
+ # space/, so benchmarks/ is two directories up. Try both layouts.
163
+ here = Path(__file__).resolve().parent
164
+ for candidate in (here / "benchmarks" / "results", here.parent / "benchmarks" / "results"):
165
+ if candidate.exists():
166
+ results_root = candidate
167
+ break
168
+ else:
169
+ return "_No benchmark reports checked in yet._"
170
+
171
+ rows: list[str] = []
172
+ for report_path in sorted(results_root.glob("*/report.json")):
173
+ try:
174
+ data = json.loads(report_path.read_text(encoding="utf-8"))
175
+ except Exception:
176
+ continue
177
+ rows.append(
178
+ f"| {data.get('dataset', '?')} "
179
+ f"| {data.get('backend', '?')} "
180
+ f"| {data.get('n_examples', 0)} "
181
+ f"| {data.get('accuracy', 0.0) * 100:.1f}% "
182
+ f"| {data.get('avg_seconds', 0.0):.3f} s "
183
+ f"| {data.get('p95_seconds', 0.0):.3f} s |"
184
+ )
185
+
186
+ if not rows:
187
+ return "_No benchmark reports checked in yet._"
188
+
189
+ header = (
190
+ "| Dataset | Backend | Examples | Accuracy | Avg | P95 |\n"
191
+ "| --- | --- | --- | --- | --- | --- |\n"
192
+ )
193
+ return header + "\n".join(rows)
194
+
195
+
196
  def _format_cnf(cnf: CNF) -> str:
197
  """Render a CNF as a readable multi-line string."""
198
  if not cnf.clauses:
 
681
  "### IBM Hardware Jobs\n\n"
682
  "Job execution happens on IBM Quantum hardware. After clicking "
683
  "**Verify on IBM Heron r2**, copy the Job ID from the Result and "
684
+ "view live status at the "
685
+ "[IBM Quantum Workloads dashboard](https://quantum.cloud.ibm.com/workloads). "
686
  "All jobs are publicly verifiable there."
687
  )
688
  gr.Markdown("---")
689
+ gr.Markdown(
690
+ "### Benchmarks\n\n"
691
+ "Verifier accuracy against the PySAT Glucose3 oracle on three "
692
+ "logic-reasoning datasets (ProofWriter, RuleTaker, FOLIO). "
693
+ "Reports are generated by `scripts/run_benchmarks.py` and "
694
+ "checked into `benchmarks/results/`. See "
695
+ "[docs/benchmarks.md]"
696
+ "(https://github.com/Quantum-Labor/qverify/blob/main/docs/benchmarks.md)"
697
+ " for methodology.\n\n" + _load_benchmark_summaries()
698
+ )
699
+ gr.Markdown("---")
700
  gr.Markdown(
701
  "### About\n\n"
702
  "**This Space demos the verifier component of QVerify.** A CNF "