NathanRoll commited on
Commit
dd41a2d
·
verified ·
1 Parent(s): 7c0c53a

Separate reference rendering tolerance from record metrics

Browse files
Files changed (2) hide show
  1. app.py +6 -45
  2. packing_benchmark/store.py +14 -3
app.py CHANGED
@@ -20,7 +20,7 @@ from packing_benchmark.dates import (
20
  parse_friedman_date_text,
21
  )
22
  from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
23
- from packing_benchmark.store import SolutionStore, is_trivial_record, metric_matches_reference
24
  from packing_benchmark.verifier import (
25
  DEFAULT_TOLERANCE,
26
  load_solution_json,
@@ -96,8 +96,6 @@ CSS = """
96
  --line-soft: #d4cab8;
97
  --green: #b9e7bd;
98
  --green-strong: #2e7d32;
99
- --orange: #f4b36a;
100
- --orange-soft: #ffe1ba;
101
  --link: #174f8a;
102
  --tri: #e69f00;
103
  --cir: #56b4e9;
@@ -444,25 +442,6 @@ footer,
444
  text-decoration: none !important;
445
  }
446
 
447
- .record-card.needs-recovery {
448
- background: var(--orange-soft);
449
- border-color: #a65b0b;
450
- }
451
-
452
- .record-card.needs-recovery .record-case::after {
453
- content: "needs recovery";
454
- display: inline-block;
455
- margin-left: 7px;
456
- padding: 1px 5px;
457
- border: 1px solid #a65b0b;
458
- border-radius: 3px;
459
- color: #5c3100;
460
- font-family: Helvetica, Arial, sans-serif !important;
461
- font-size: 10px;
462
- font-weight: 700;
463
- text-transform: uppercase;
464
- }
465
-
466
  .record-top {
467
  padding: 9px 10px 0;
468
  }
@@ -2169,11 +2148,10 @@ def visual_provenance(record: dict[str, Any], visual_record: dict[str, Any]) ->
2169
  if visual_metric != shown_metric:
2170
  suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
2171
  if "image-seeded" in notes or "image seeded" in notes:
2172
- status = "within the benchmark recovery band" if not needs_recovery(record, visual_record) else "outside the benchmark recovery band"
2173
- return f"Image-seeded from the Friedman source image and verified ({status})." + suffix
2174
  if visual_record is record or visual_record.get("id") == record.get("id"):
2175
  return "Verified coordinate layout credited under the public attribution policy." + suffix
2176
- return "Generated feasible coordinate rendering, not recovered from the Friedman source image." + suffix
2177
 
2178
  if visual_record is record or visual_record.get("id") == record.get("id"):
2179
  if visual_record.get("frontend_seed"):
@@ -2187,16 +2165,6 @@ def display_metric_record(record: dict[str, Any], visual_record: dict[str, Any])
2187
  return record
2188
 
2189
 
2190
- def needs_recovery(record: dict[str, Any], visual_record: dict[str, Any]) -> bool:
2191
- if record.get("record_type") != "reference":
2192
- return False
2193
- reference = numeric_metric(record)
2194
- visual = numeric_metric(visual_record)
2195
- if reference is None or visual is None:
2196
- return True
2197
- return not metric_matches_reference(visual_record, record)
2198
-
2199
-
2200
  def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
2201
  rows: list[str] = []
2202
  rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
@@ -2292,8 +2260,6 @@ def record_card(record: dict[str, Any], coordinates: dict[str, dict[str, Any]])
2292
  analytical = friedman_reference(record).get("analytical_or_proved")
2293
  visible_author = display_author(record)
2294
  card_class = "verified" if verified else "reference"
2295
- if needs_recovery(record, visual_record):
2296
- card_class += " needs-recovery"
2297
  if is_recent_record(record):
2298
  card_class += " recent-record"
2299
  recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
@@ -2389,9 +2355,8 @@ def family_description(summary: dict[str, Any]) -> str:
2389
  return (
2390
  f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
2391
  f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
2392
- f"for the rendering; click a card to view the Friedman reference value and source information. "
2393
- f"Orange cards need more recovery work because the verified coordinate JSON is still outside "
2394
- f"the benchmark recovery band for the listed reference.{updated}"
2395
  )
2396
 
2397
 
@@ -2681,7 +2646,6 @@ def leaderboard_author_modal(author: str, stats: dict[str, Any], rank: int, moda
2681
  <div class="leaderboard-stat-grid">
2682
  <div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
2683
  <div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
2684
- <div class="leaderboard-stat"><strong>{int(stats["orange"])}</strong><span>needs recovery</span></div>
2685
  <div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
2686
  </div>
2687
  <div class="leaderboard-class-strip">{class_badges}</div>
@@ -2701,15 +2665,12 @@ def leaderboard_html() -> str:
2701
  author = display_author(record)
2702
  if author.strip().lower() == "trivial":
2703
  continue
2704
- entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "orange": 0, "cases": [], "class_counts": {}})
2705
  entry["total_top"] += 1
2706
  item_code = setup_item_code(record)
2707
  entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
2708
  if record.get("record_type") == "verified":
2709
  entry["verified"] += 1
2710
- visual = visual_record_for(record, coordinates)
2711
- if needs_recovery(record, visual):
2712
- entry["orange"] += 1
2713
  entry["cases"].append(str(record.get("case")))
2714
 
2715
  class_columns = leaderboard_class_columns(rows)
 
20
  parse_friedman_date_text,
21
  )
22
  from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
23
+ from packing_benchmark.store import SolutionStore, is_trivial_record
24
  from packing_benchmark.verifier import (
25
  DEFAULT_TOLERANCE,
26
  load_solution_json,
 
96
  --line-soft: #d4cab8;
97
  --green: #b9e7bd;
98
  --green-strong: #2e7d32;
 
 
99
  --link: #174f8a;
100
  --tri: #e69f00;
101
  --cir: #56b4e9;
 
442
  text-decoration: none !important;
443
  }
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  .record-top {
446
  padding: 9px 10px 0;
447
  }
 
2148
  if visual_metric != shown_metric:
2149
  suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
2150
  if "image-seeded" in notes or "image seeded" in notes:
2151
+ return "Image-seeded from the Friedman source image and verified for rendering." + suffix
 
2152
  if visual_record is record or visual_record.get("id") == record.get("id"):
2153
  return "Verified coordinate layout credited under the public attribution policy." + suffix
2154
+ return "Verified coordinate rendering for the listed reference." + suffix
2155
 
2156
  if visual_record is record or visual_record.get("id") == record.get("id"):
2157
  if visual_record.get("frontend_seed"):
 
2165
  return record
2166
 
2167
 
 
 
 
 
 
 
 
 
 
 
2168
  def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
2169
  rows: list[str] = []
2170
  rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
 
2260
  analytical = friedman_reference(record).get("analytical_or_proved")
2261
  visible_author = display_author(record)
2262
  card_class = "verified" if verified else "reference"
 
 
2263
  if is_recent_record(record):
2264
  card_class += " recent-record"
2265
  recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
 
2355
  return (
2356
  f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
2357
  f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
2358
+ f"for current coordinate records, or the listed reference metric when that is the better value. "
2359
+ f"Click a card to view the coordinate JSON metric, source information, and previous bests.{updated}"
 
2360
  )
2361
 
2362
 
 
2646
  <div class="leaderboard-stat-grid">
2647
  <div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
2648
  <div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
 
2649
  <div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
2650
  </div>
2651
  <div class="leaderboard-class-strip">{class_badges}</div>
 
2665
  author = display_author(record)
2666
  if author.strip().lower() == "trivial":
2667
  continue
2668
+ entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "cases": [], "class_counts": {}})
2669
  entry["total_top"] += 1
2670
  item_code = setup_item_code(record)
2671
  entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
2672
  if record.get("record_type") == "verified":
2673
  entry["verified"] += 1
 
 
 
2674
  entry["cases"].append(str(record.get("case")))
2675
 
2676
  class_columns = leaderboard_class_columns(rows)
packing_benchmark/store.py CHANGED
@@ -20,6 +20,7 @@ from .verifier import (
20
 
21
  PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
22
  PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
 
23
 
24
 
25
  def metric_float(record: dict[str, Any]) -> float:
@@ -33,6 +34,14 @@ def metric_float(record: dict[str, Any]) -> float:
33
 
34
 
35
  def reference_metric_tolerance(record: dict[str, Any]) -> float:
 
 
 
 
 
 
 
 
36
  try:
37
  absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
38
  except (TypeError, ValueError):
@@ -44,8 +53,6 @@ def reference_metric_tolerance(record: dict[str, Any]) -> float:
44
  value = metric_float(record)
45
  if value == float("inf"):
46
  return absolute
47
- if record.get("metric_value_source") == "exact_expression" or record.get("friedman_analytical_or_proved"):
48
- return absolute
49
  return max(absolute, abs(value) * max(0.0, relative))
50
 
51
 
@@ -53,6 +60,10 @@ def metric_matches_reference(coordinate: dict[str, Any], reference: dict[str, An
53
  return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
54
 
55
 
 
 
 
 
56
  def is_trivial_record(record: dict[str, Any]) -> bool:
57
  try:
58
  if int(record.get("n") or 0) == 1:
@@ -175,7 +186,7 @@ class SolutionStore:
175
  if is_trivial_record(reference) and is_trivial_record(coordinate):
176
  best[key] = reference
177
  continue
178
- if metric_matches_reference(coordinate, reference):
179
  best[key] = coordinate
180
  else:
181
  best[key] = reference
 
20
 
21
  PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
22
  PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
23
+ STRICT_RECORD_IMPROVEMENT_EPS = 1.0e-12
24
 
25
 
26
  def metric_float(record: dict[str, Any]) -> float:
 
34
 
35
 
36
  def reference_metric_tolerance(record: dict[str, Any]) -> float:
37
+ """Tolerance for using coordinate JSON as a rendering of a reference row.
38
+
39
+ This is deliberately a display/provenance tolerance, not the evaluator's
40
+ geometric feasibility tolerance and not permission to change an exact
41
+ Friedman metric. Exact analytical rows can still use slightly padded
42
+ coordinate JSON for rendering without being marked as unrecovered.
43
+ """
44
+
45
  try:
46
  absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
47
  except (TypeError, ValueError):
 
53
  value = metric_float(record)
54
  if value == float("inf"):
55
  return absolute
 
 
56
  return max(absolute, abs(value) * max(0.0, relative))
57
 
58
 
 
60
  return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
61
 
62
 
63
+ def metric_strictly_better(candidate: dict[str, Any], incumbent: dict[str, Any]) -> bool:
64
+ return metric_float(candidate) < metric_float(incumbent) - STRICT_RECORD_IMPROVEMENT_EPS
65
+
66
+
67
  def is_trivial_record(record: dict[str, Any]) -> bool:
68
  try:
69
  if int(record.get("n") or 0) == 1:
 
186
  if is_trivial_record(reference) and is_trivial_record(coordinate):
187
  best[key] = reference
188
  continue
189
+ if metric_strictly_better(coordinate, reference):
190
  best[key] = coordinate
191
  else:
192
  best[key] = reference