Spaces:
Sleeping
Sleeping
Separate reference rendering tolerance from record metrics
Browse files- app.py +6 -45
- packing_benchmark/store.py +14 -3
app.py
CHANGED
|
@@ -20,7 +20,7 @@ from packing_benchmark.dates import (
|
|
| 20 |
parse_friedman_date_text,
|
| 21 |
)
|
| 22 |
from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
|
| 23 |
-
from packing_benchmark.store import SolutionStore, is_trivial_record
|
| 24 |
from packing_benchmark.verifier import (
|
| 25 |
DEFAULT_TOLERANCE,
|
| 26 |
load_solution_json,
|
|
@@ -96,8 +96,6 @@ CSS = """
|
|
| 96 |
--line-soft: #d4cab8;
|
| 97 |
--green: #b9e7bd;
|
| 98 |
--green-strong: #2e7d32;
|
| 99 |
-
--orange: #f4b36a;
|
| 100 |
-
--orange-soft: #ffe1ba;
|
| 101 |
--link: #174f8a;
|
| 102 |
--tri: #e69f00;
|
| 103 |
--cir: #56b4e9;
|
|
@@ -444,25 +442,6 @@ footer,
|
|
| 444 |
text-decoration: none !important;
|
| 445 |
}
|
| 446 |
|
| 447 |
-
.record-card.needs-recovery {
|
| 448 |
-
background: var(--orange-soft);
|
| 449 |
-
border-color: #a65b0b;
|
| 450 |
-
}
|
| 451 |
-
|
| 452 |
-
.record-card.needs-recovery .record-case::after {
|
| 453 |
-
content: "needs recovery";
|
| 454 |
-
display: inline-block;
|
| 455 |
-
margin-left: 7px;
|
| 456 |
-
padding: 1px 5px;
|
| 457 |
-
border: 1px solid #a65b0b;
|
| 458 |
-
border-radius: 3px;
|
| 459 |
-
color: #5c3100;
|
| 460 |
-
font-family: Helvetica, Arial, sans-serif !important;
|
| 461 |
-
font-size: 10px;
|
| 462 |
-
font-weight: 700;
|
| 463 |
-
text-transform: uppercase;
|
| 464 |
-
}
|
| 465 |
-
|
| 466 |
.record-top {
|
| 467 |
padding: 9px 10px 0;
|
| 468 |
}
|
|
@@ -2169,11 +2148,10 @@ def visual_provenance(record: dict[str, Any], visual_record: dict[str, Any]) ->
|
|
| 2169 |
if visual_metric != shown_metric:
|
| 2170 |
suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
|
| 2171 |
if "image-seeded" in notes or "image seeded" in notes:
|
| 2172 |
-
|
| 2173 |
-
return f"Image-seeded from the Friedman source image and verified ({status})." + suffix
|
| 2174 |
if visual_record is record or visual_record.get("id") == record.get("id"):
|
| 2175 |
return "Verified coordinate layout credited under the public attribution policy." + suffix
|
| 2176 |
-
return "
|
| 2177 |
|
| 2178 |
if visual_record is record or visual_record.get("id") == record.get("id"):
|
| 2179 |
if visual_record.get("frontend_seed"):
|
|
@@ -2187,16 +2165,6 @@ def display_metric_record(record: dict[str, Any], visual_record: dict[str, Any])
|
|
| 2187 |
return record
|
| 2188 |
|
| 2189 |
|
| 2190 |
-
def needs_recovery(record: dict[str, Any], visual_record: dict[str, Any]) -> bool:
|
| 2191 |
-
if record.get("record_type") != "reference":
|
| 2192 |
-
return False
|
| 2193 |
-
reference = numeric_metric(record)
|
| 2194 |
-
visual = numeric_metric(visual_record)
|
| 2195 |
-
if reference is None or visual is None:
|
| 2196 |
-
return True
|
| 2197 |
-
return not metric_matches_reference(visual_record, record)
|
| 2198 |
-
|
| 2199 |
-
|
| 2200 |
def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
|
| 2201 |
rows: list[str] = []
|
| 2202 |
rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
|
|
@@ -2292,8 +2260,6 @@ def record_card(record: dict[str, Any], coordinates: dict[str, dict[str, Any]])
|
|
| 2292 |
analytical = friedman_reference(record).get("analytical_or_proved")
|
| 2293 |
visible_author = display_author(record)
|
| 2294 |
card_class = "verified" if verified else "reference"
|
| 2295 |
-
if needs_recovery(record, visual_record):
|
| 2296 |
-
card_class += " needs-recovery"
|
| 2297 |
if is_recent_record(record):
|
| 2298 |
card_class += " recent-record"
|
| 2299 |
recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
|
|
@@ -2389,9 +2355,8 @@ def family_description(summary: dict[str, Any]) -> str:
|
|
| 2389 |
return (
|
| 2390 |
f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
|
| 2391 |
f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
|
| 2392 |
-
f"for
|
| 2393 |
-
f"
|
| 2394 |
-
f"the benchmark recovery band for the listed reference.{updated}"
|
| 2395 |
)
|
| 2396 |
|
| 2397 |
|
|
@@ -2681,7 +2646,6 @@ def leaderboard_author_modal(author: str, stats: dict[str, Any], rank: int, moda
|
|
| 2681 |
<div class="leaderboard-stat-grid">
|
| 2682 |
<div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
|
| 2683 |
<div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
|
| 2684 |
-
<div class="leaderboard-stat"><strong>{int(stats["orange"])}</strong><span>needs recovery</span></div>
|
| 2685 |
<div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
|
| 2686 |
</div>
|
| 2687 |
<div class="leaderboard-class-strip">{class_badges}</div>
|
|
@@ -2701,15 +2665,12 @@ def leaderboard_html() -> str:
|
|
| 2701 |
author = display_author(record)
|
| 2702 |
if author.strip().lower() == "trivial":
|
| 2703 |
continue
|
| 2704 |
-
entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "
|
| 2705 |
entry["total_top"] += 1
|
| 2706 |
item_code = setup_item_code(record)
|
| 2707 |
entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
|
| 2708 |
if record.get("record_type") == "verified":
|
| 2709 |
entry["verified"] += 1
|
| 2710 |
-
visual = visual_record_for(record, coordinates)
|
| 2711 |
-
if needs_recovery(record, visual):
|
| 2712 |
-
entry["orange"] += 1
|
| 2713 |
entry["cases"].append(str(record.get("case")))
|
| 2714 |
|
| 2715 |
class_columns = leaderboard_class_columns(rows)
|
|
|
|
| 20 |
parse_friedman_date_text,
|
| 21 |
)
|
| 22 |
from packing_benchmark.hub_sync import maybe_hydrate_from_dataset
|
| 23 |
+
from packing_benchmark.store import SolutionStore, is_trivial_record
|
| 24 |
from packing_benchmark.verifier import (
|
| 25 |
DEFAULT_TOLERANCE,
|
| 26 |
load_solution_json,
|
|
|
|
| 96 |
--line-soft: #d4cab8;
|
| 97 |
--green: #b9e7bd;
|
| 98 |
--green-strong: #2e7d32;
|
|
|
|
|
|
|
| 99 |
--link: #174f8a;
|
| 100 |
--tri: #e69f00;
|
| 101 |
--cir: #56b4e9;
|
|
|
|
| 442 |
text-decoration: none !important;
|
| 443 |
}
|
| 444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
.record-top {
|
| 446 |
padding: 9px 10px 0;
|
| 447 |
}
|
|
|
|
| 2148 |
if visual_metric != shown_metric:
|
| 2149 |
suffix = f" Coordinate JSON evaluator metric: {esc(metric_symbol(visual_record))} = {esc(visual_metric)}; Friedman reported metric: {esc(metric_symbol(record))} = {esc(shown_metric)}."
|
| 2150 |
if "image-seeded" in notes or "image seeded" in notes:
|
| 2151 |
+
return "Image-seeded from the Friedman source image and verified for rendering." + suffix
|
|
|
|
| 2152 |
if visual_record is record or visual_record.get("id") == record.get("id"):
|
| 2153 |
return "Verified coordinate layout credited under the public attribution policy." + suffix
|
| 2154 |
+
return "Verified coordinate rendering for the listed reference." + suffix
|
| 2155 |
|
| 2156 |
if visual_record is record or visual_record.get("id") == record.get("id"):
|
| 2157 |
if visual_record.get("frontend_seed"):
|
|
|
|
| 2165 |
return record
|
| 2166 |
|
| 2167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2168 |
def record_detail_rows(record: dict[str, Any], visual_record: dict[str, Any], source: str, image_name: str, expression: str, analytical: Any) -> list[str]:
|
| 2169 |
rows: list[str] = []
|
| 2170 |
rows.append(detail_row("Rendering", esc(visual_provenance(record, visual_record))))
|
|
|
|
| 2260 |
analytical = friedman_reference(record).get("analytical_or_proved")
|
| 2261 |
visible_author = display_author(record)
|
| 2262 |
card_class = "verified" if verified else "reference"
|
|
|
|
|
|
|
| 2263 |
if is_recent_record(record):
|
| 2264 |
card_class += " recent-record"
|
| 2265 |
recent = '<span class="recent-dot" title="New in the last 7 days"></span>' if is_recent_record(record) else ""
|
|
|
|
| 2355 |
return (
|
| 2356 |
f"This page tracks the smallest known container metric for packing n equal {esc(item)} "
|
| 2357 |
f"inside a {esc(container)}. The number shown on each card is the verified coordinate metric "
|
| 2358 |
+
f"for current coordinate records, or the listed reference metric when that is the better value. "
|
| 2359 |
+
f"Click a card to view the coordinate JSON metric, source information, and previous bests.{updated}"
|
|
|
|
| 2360 |
)
|
| 2361 |
|
| 2362 |
|
|
|
|
| 2646 |
<div class="leaderboard-stat-grid">
|
| 2647 |
<div class="leaderboard-stat"><strong>{int(stats["total_top"])}</strong><span>total top records</span></div>
|
| 2648 |
<div class="leaderboard-stat"><strong>{int(stats["verified"])}</strong><span>verified submissions</span></div>
|
|
|
|
| 2649 |
<div class="leaderboard-stat"><strong>{family_count}</strong><span>families</span></div>
|
| 2650 |
</div>
|
| 2651 |
<div class="leaderboard-class-strip">{class_badges}</div>
|
|
|
|
| 2665 |
author = display_author(record)
|
| 2666 |
if author.strip().lower() == "trivial":
|
| 2667 |
continue
|
| 2668 |
+
entry = rows.setdefault(author, {"total_top": 0, "verified": 0, "cases": [], "class_counts": {}})
|
| 2669 |
entry["total_top"] += 1
|
| 2670 |
item_code = setup_item_code(record)
|
| 2671 |
entry["class_counts"][item_code] = int(entry["class_counts"].get(item_code, 0)) + 1
|
| 2672 |
if record.get("record_type") == "verified":
|
| 2673 |
entry["verified"] += 1
|
|
|
|
|
|
|
|
|
|
| 2674 |
entry["cases"].append(str(record.get("case")))
|
| 2675 |
|
| 2676 |
class_columns = leaderboard_class_columns(rows)
|
packing_benchmark/store.py
CHANGED
|
@@ -20,6 +20,7 @@ from .verifier import (
|
|
| 20 |
|
| 21 |
PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
|
| 22 |
PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def metric_float(record: dict[str, Any]) -> float:
|
|
@@ -33,6 +34,14 @@ def metric_float(record: dict[str, Any]) -> float:
|
|
| 33 |
|
| 34 |
|
| 35 |
def reference_metric_tolerance(record: dict[str, Any]) -> float:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
try:
|
| 37 |
absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
|
| 38 |
except (TypeError, ValueError):
|
|
@@ -44,8 +53,6 @@ def reference_metric_tolerance(record: dict[str, Any]) -> float:
|
|
| 44 |
value = metric_float(record)
|
| 45 |
if value == float("inf"):
|
| 46 |
return absolute
|
| 47 |
-
if record.get("metric_value_source") == "exact_expression" or record.get("friedman_analytical_or_proved"):
|
| 48 |
-
return absolute
|
| 49 |
return max(absolute, abs(value) * max(0.0, relative))
|
| 50 |
|
| 51 |
|
|
@@ -53,6 +60,10 @@ def metric_matches_reference(coordinate: dict[str, Any], reference: dict[str, An
|
|
| 53 |
return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def is_trivial_record(record: dict[str, Any]) -> bool:
|
| 57 |
try:
|
| 58 |
if int(record.get("n") or 0) == 1:
|
|
@@ -175,7 +186,7 @@ class SolutionStore:
|
|
| 175 |
if is_trivial_record(reference) and is_trivial_record(coordinate):
|
| 176 |
best[key] = reference
|
| 177 |
continue
|
| 178 |
-
if
|
| 179 |
best[key] = coordinate
|
| 180 |
else:
|
| 181 |
best[key] = reference
|
|
|
|
| 20 |
|
| 21 |
PUBLIC_RECORD_METRIC_TOLERANCE_ABS = DEFAULT_TOLERANCE
|
| 22 |
PUBLIC_RECORD_METRIC_TOLERANCE_REL = 1.0e-3
|
| 23 |
+
STRICT_RECORD_IMPROVEMENT_EPS = 1.0e-12
|
| 24 |
|
| 25 |
|
| 26 |
def metric_float(record: dict[str, Any]) -> float:
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
def reference_metric_tolerance(record: dict[str, Any]) -> float:
|
| 37 |
+
"""Tolerance for using coordinate JSON as a rendering of a reference row.
|
| 38 |
+
|
| 39 |
+
This is deliberately a display/provenance tolerance, not the evaluator's
|
| 40 |
+
geometric feasibility tolerance and not permission to change an exact
|
| 41 |
+
Friedman metric. Exact analytical rows can still use slightly padded
|
| 42 |
+
coordinate JSON for rendering without being marked as unrecovered.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
try:
|
| 46 |
absolute = float(record.get("metric_recovery_tolerance_abs", PUBLIC_RECORD_METRIC_TOLERANCE_ABS))
|
| 47 |
except (TypeError, ValueError):
|
|
|
|
| 53 |
value = metric_float(record)
|
| 54 |
if value == float("inf"):
|
| 55 |
return absolute
|
|
|
|
|
|
|
| 56 |
return max(absolute, abs(value) * max(0.0, relative))
|
| 57 |
|
| 58 |
|
|
|
|
| 60 |
return metric_float(coordinate) <= metric_float(reference) + reference_metric_tolerance(reference)
|
| 61 |
|
| 62 |
|
| 63 |
+
def metric_strictly_better(candidate: dict[str, Any], incumbent: dict[str, Any]) -> bool:
|
| 64 |
+
return metric_float(candidate) < metric_float(incumbent) - STRICT_RECORD_IMPROVEMENT_EPS
|
| 65 |
+
|
| 66 |
+
|
| 67 |
def is_trivial_record(record: dict[str, Any]) -> bool:
|
| 68 |
try:
|
| 69 |
if int(record.get("n") or 0) == 1:
|
|
|
|
| 186 |
if is_trivial_record(reference) and is_trivial_record(coordinate):
|
| 187 |
best[key] = reference
|
| 188 |
continue
|
| 189 |
+
if metric_strictly_better(coordinate, reference):
|
| 190 |
best[key] = coordinate
|
| 191 |
else:
|
| 192 |
best[key] = reference
|