Spaces:

NathanRoll
/

packing-benchmark

Sleeping

App Files Files Community

NathanRoll commited on 26 days ago

Commit

dd41a2d

verified ·

1 Parent(s): 7c0c53a

Separate reference rendering tolerance from record metrics

Browse files

Files changed (2) hide show

app.py +6 -45
packing_benchmark/store.py +14 -3

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from packing_benchmark.dates import (
     parse_friedman_date_text,
 )
 from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
-from packing_benchmark.store import SolutionStore, is_trivial_record, metric_matches_reference
 from packing_benchmark.verifier import (
     DEFAULT_TOLERANCE,
     load_solution_json,
@@ -96,8 +96,6 @@ CSS = """
   --line-soft: #d4cab8;
   --green: #b9e7bd;
   --green-strong: #2e7d32;
-  --orange: #f4b36a;
-  --orange-soft: #ffe1ba;
   --link: #174f8a;
   --tri: #e69f00;
   --cir: #56b4e9;
@@ -444,25 +442,6 @@ footer,
   text-decoration: none !important;
 }
-.record-card.needs-recovery {
-  background: var(--orange-soft);
-  border-color: #a65b0b;
-}
-.record-card.needs-recovery .record-case::after {
-  content: "needs recovery";
-  display: inline-block;
-  margin-left: 7px;
-  padding: 1px 5px;
-  border: 1px solid #a65b0b;
-  border-radius: 3px;
-  color: #5c3100;
-  font-family: Helvetica, Arial, sans-serif !important;
-  font-size: 10px;
-  font-weight: 700;
-  text-transform: uppercase;
-}
 .record-top {
   padding: 9px 10px 0;
 }
@@ -2169,11 +2148,10 @@ def visual_provenance(record: dict[str, Any], visual_record: dict[str, Any]) ->
         if visual_metric != shown_metric:
             suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
         if "image-seeded" in notes or "image seeded" in notes:
-            status = "within the benchmark recovery band" if not needs_recovery(record, visual_record) else "outside the benchmark recovery band"
-            return f"Image-seeded from the Friedman source image and verified ({status})." + suffix
         if visual_record is record or visual_record.get("id") == record.get("id"):
             return "Verified coordinate layout credited under the public attribution policy." + suffix
-        return "Generated feasible coordinate rendering, not recovered from the Friedman source image." + suffix
     if visual_record is record or visual_record.get("id") == record.get("id"):
         if visual_record.get("frontend_seed"):
@@ -2187,16 +2165,6 @@ def display_metric_record(record: dict[str, Any], visual_record: dict[str, Any])
     return record
-def needs_recovery(record: dict[str, Any], visual_record: dict[str, Any]) -> bool:
-    if record.get("record_type") != "reference":
-        return False
-    reference = numeric_metric(record)
-    visual = numeric_metric(visual_record)
-    if reference is None or visual is None:
-        return True
-    return not metric_matches_reference(visual_record, record)
 def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
     rows: list[str] = []
     rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
@@ -2292,8 +2260,6 @@ def record_card(record: dict[str, Any], coordinates: dict[str, dict[str, Any]])
     analytical = friedman_reference(record).get("analytical_or_proved")
     visible_author = display_author(record)
     card_class = "verified" if verified else "reference"
-    if needs_recovery(record, visual_record):
-        card_class += " needs-recovery"
     if is_recent_record(record):
         card_class += " recent-record"
     recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
@@ -2389,9 +2355,8 @@ def family_description(summary: dict[str, Any]) -> str:
     return (
         f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
         f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
-        f"for the rendering; click a card to view the Friedman reference value and source information. "
-        f"Orange cards need more recovery work because the verified coordinate JSON is still outside "
-        f"the benchmark recovery band for the listed reference.{updated}"
     )
@@ -2681,7 +2646,6 @@ def leaderboard_author_modal(author: str, stats: dict[str, Any], rank: int, moda
             <div class="leaderboard-stat-grid">
               <div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
               <div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
-              <div class="leaderboard-stat"><strong>{int(stats["orange"])}</strong><span>needs recovery</span></div>
               <div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
             </div>
             <div class="leaderboard-class-strip">{class_badges}</div>
@@ -2701,15 +2665,12 @@ def leaderboard_html() -> str:
         author = display_author(record)
         if author.strip().lower() == "trivial":
             continue
-        entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "orange": 0, "cases": [], "class_counts": {}})
         entry["total_top"] += 1
         item_code = setup_item_code(record)
         entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
         if record.get("record_type") == "verified":
             entry["verified"] += 1
-        visual = visual_record_for(record, coordinates)
-        if needs_recovery(record, visual):
-            entry["orange"] += 1
         entry["cases"].append(str(record.get("case")))
     class_columns = leaderboard_class_columns(rows)

     parse_friedman_date_text,
 )
 from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
+from packing_benchmark.store import SolutionStore, is_trivial_record
 from packing_benchmark.verifier import (
     DEFAULT_TOLERANCE,
     load_solution_json,
   --line-soft: #d4cab8;
   --green: #b9e7bd;
   --green-strong: #2e7d32;
   --link: #174f8a;
   --tri: #e69f00;
   --cir: #56b4e9;
   text-decoration: none !important;
 }
 .record-top {
   padding: 9px 10px 0;
 }
         if visual_metric != shown_metric:
             suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
         if "image-seeded" in notes or "image seeded" in notes:
+            return "Image-seeded from the Friedman source image and verified for rendering." + suffix
         if visual_record is record or visual_record.get("id") == record.get("id"):
             return "Verified coordinate layout credited under the public attribution policy." + suffix
+        return "Verified coordinate rendering for the listed reference." + suffix
     if visual_record is record or visual_record.get("id") == record.get("id"):
         if visual_record.get("frontend_seed"):
     return record
 def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
     rows: list[str] = []
     rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
     analytical = friedman_reference(record).get("analytical_or_proved")
     visible_author = display_author(record)
     card_class = "verified" if verified else "reference"
     if is_recent_record(record):
         card_class += " recent-record"
     recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
     return (
         f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
         f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
+        f"for current coordinate records, or the listed reference metric when that is the better value. "
+        f"Click a card to view the coordinate JSON metric, source information, and previous bests.{updated}"
     )
             <div class="leaderboard-stat-grid">
               <div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
               <div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
               <div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
             </div>
             <div class="leaderboard-class-strip">{class_badges}</div>
         author = display_author(record)
         if author.strip().lower() == "trivial":
             continue
+        entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "cases": [], "class_counts": {}})
         entry["total_top"] += 1
         item_code = setup_item_code(record)
         entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
         if record.get("record_type") == "verified":
             entry["verified"] += 1
         entry["cases"].append(str(record.get("case")))
     class_columns = leaderboard_class_columns(rows)

packing_benchmark/store.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .verifier import (
 PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
 PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
 def metric_float(record: dict[str, Any]) -> float:
@@ -33,6 +34,14 @@ def metric_float(record: dict[str, Any]) -> float:
 def reference_metric_tolerance(record: dict[str, Any]) -> float:
     try:
         absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
     except (TypeError, ValueError):
@@ -44,8 +53,6 @@ def reference_metric_tolerance(record: dict[str, Any]) -> float:
     value = metric_float(record)
     if value == float("inf"):
         return absolute
-    if record.get("metric_value_source") == "exact_expression" or record.get("friedman_analytical_or_proved"):
-        return absolute
     return max(absolute, abs(value) * max(0.0, relative))
@@ -53,6 +60,10 @@ def metric_matches_reference(coordinate: dict[str, Any], reference: dict[str, An
     return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
 def is_trivial_record(record: dict[str, Any]) -> bool:
     try:
         if int(record.get("n") or 0) == 1:
@@ -175,7 +186,7 @@ class SolutionStore:
             if is_trivial_record(reference) and is_trivial_record(coordinate):
                 best[key] = reference
                 continue
-            if metric_matches_reference(coordinate, reference):
                 best[key] = coordinate
             else:
                 best[key] = reference

 PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
 PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
+STRICT_RECORD_IMPROVEMENT_EPS = 1.0e-12
 def metric_float(record: dict[str, Any]) -> float:
 def reference_metric_tolerance(record: dict[str, Any]) -> float:
+    """Tolerance for using coordinate JSON as a rendering of a reference row.
+    This is deliberately a display/provenance tolerance, not the evaluator's
+    geometric feasibility tolerance and not permission to change an exact
+    Friedman metric.  Exact analytical rows can still use slightly padded
+    coordinate JSON for rendering without being marked as unrecovered.
+    """
     try:
         absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
     except (TypeError, ValueError):
     value = metric_float(record)
     if value == float("inf"):
         return absolute
     return max(absolute, abs(value) * max(0.0, relative))
     return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
+def metric_strictly_better(candidate: dict[str, Any], incumbent: dict[str, Any]) -> bool:
+    return metric_float(candidate) < metric_float(incumbent) - STRICT_RECORD_IMPROVEMENT_EPS
 def is_trivial_record(record: dict[str, Any]) -> bool:
     try:
         if int(record.get("n") or 0) == 1:
             if is_trivial_record(reference) and is_trivial_record(coordinate):
                 best[key] = reference
                 continue
+            if metric_strictly_better(coordinate, reference):
                 best[key] = coordinate
             else:
                 best[key] = reference