Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/eval_agent_memory/__pycache__/auxiliary_metrics.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/auxiliary_metrics_snapshot.py +169 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/metrics.json +41 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/auxiliary_metrics_snapshot.py +169 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/metrics.json +47 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/metrics.json +41 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_114/results/auxiliary_metrics_snapshot.py +172 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/auxiliary_metrics_snapshot.py +187 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/metrics.json +41 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/auxiliary_metrics_snapshot.py +198 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/metrics.json +40 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/auxiliary_metrics_snapshot.py +198 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/metrics.json +40 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/__pycache__/main.cpython-313.pyc +0 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/auxiliary_metrics_snapshot.py +3 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/correct.json +4 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/metrics.json +19 -0
- tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_121/__pycache__/main.cpython-313.pyc +0 -0
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/eval_agent_memory/__pycache__/auxiliary_metrics.cpython-313.pyc
ADDED
|
Binary file (7.98 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (8.54 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: has_nan_or_inf (NEW)
|
| 57 |
+
try:
|
| 58 |
+
has_nan = np.isnan(centers).any() or np.isnan(radii).any()
|
| 59 |
+
has_inf = np.isinf(centers).any() or np.isinf(radii).any()
|
| 60 |
+
metrics["has_nan_or_inf"] = 1.0 if (has_nan or has_inf) else 0.0
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error calculating has_nan_or_inf: {e}")
|
| 63 |
+
metrics["has_nan_or_inf"] = 0.0 # Default to no error if calculation fails
|
| 64 |
+
|
| 65 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 66 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 67 |
+
if centers.size == 0 or radii.size == 0:
|
| 68 |
+
metrics["mean_radius"] = 0.0
|
| 69 |
+
metrics["radius_std_dev"] = 0.0
|
| 70 |
+
metrics["total_packed_area"] = 0.0
|
| 71 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 72 |
+
metrics["max_overlap_value"] = 0.0
|
| 73 |
+
metrics["num_circles_generated"] = 0
|
| 74 |
+
metrics["min_radius"] = 0.0
|
| 75 |
+
metrics["boundary_contact_count"] = 0
|
| 76 |
+
return metrics
|
| 77 |
+
|
| 78 |
+
# Metric: num_circles_generated (actual count)
|
| 79 |
+
try:
|
| 80 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 83 |
+
metrics["num_circles_generated"] = 0
|
| 84 |
+
|
| 85 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 86 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 87 |
+
metrics["mean_radius"] = 0.0
|
| 88 |
+
metrics["radius_std_dev"] = 0.0
|
| 89 |
+
metrics["total_packed_area"] = 0.0
|
| 90 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 91 |
+
metrics["max_overlap_value"] = 0.0
|
| 92 |
+
metrics["min_radius"] = 0.0
|
| 93 |
+
metrics["boundary_contact_count"] = 0
|
| 94 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 95 |
+
|
| 96 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 97 |
+
|
| 98 |
+
# Metric 2: mean_radius
|
| 99 |
+
try:
|
| 100 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Error calculating mean_radius: {e}")
|
| 103 |
+
metrics["mean_radius"] = 0.0
|
| 104 |
+
|
| 105 |
+
# Metric 3: radius_std_dev
|
| 106 |
+
try:
|
| 107 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 110 |
+
metrics["radius_std_dev"] = 0.0
|
| 111 |
+
|
| 112 |
+
# Metric 4: total_packed_area
|
| 113 |
+
try:
|
| 114 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 117 |
+
metrics["total_packed_area"] = 0.0
|
| 118 |
+
|
| 119 |
+
# Metric 5: min_clearance_to_boundary
|
| 120 |
+
try:
|
| 121 |
+
min_clearance = float('inf')
|
| 122 |
+
for i in range(N_EXPECTED):
|
| 123 |
+
x, y = centers[i]
|
| 124 |
+
r = radii[i]
|
| 125 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 126 |
+
min_clearance = min(min_clearance, *clearances)
|
| 127 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 130 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 131 |
+
|
| 132 |
+
# Metric 6: max_overlap_value
|
| 133 |
+
try:
|
| 134 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 135 |
+
for i in range(N_EXPECTED):
|
| 136 |
+
for j in range(i + 1, N_EXPECTED):
|
| 137 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 138 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 139 |
+
if overlap > max_overlap:
|
| 140 |
+
max_overlap = overlap
|
| 141 |
+
metrics["max_overlap_value"] = max_overlap
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 144 |
+
metrics["max_overlap_value"] = 0.0
|
| 145 |
+
|
| 146 |
+
# Metric 8: min_radius
|
| 147 |
+
try:
|
| 148 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"Error calculating min_radius: {e}")
|
| 151 |
+
metrics["min_radius"] = 0.0
|
| 152 |
+
|
| 153 |
+
# Metric 10: boundary_contact_count
|
| 154 |
+
try:
|
| 155 |
+
contact_count = 0
|
| 156 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 157 |
+
for i in range(N_EXPECTED):
|
| 158 |
+
x, y = centers[i]
|
| 159 |
+
r = radii[i]
|
| 160 |
+
# Check if touching any of the four boundaries
|
| 161 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 162 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 163 |
+
contact_count += 1
|
| 164 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 167 |
+
metrics["boundary_contact_count"] = 0
|
| 168 |
+
|
| 169 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "ValueError: too many values to unpack (expected 2)"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_100/results/metrics.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "ValueError: too many values to unpack (expected 2)"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {
|
| 16 |
+
"is_valid_packing": 0.0,
|
| 17 |
+
"has_nan_or_inf": 0.0,
|
| 18 |
+
"mean_radius": 0.0,
|
| 19 |
+
"radius_std_dev": 0.0,
|
| 20 |
+
"total_packed_area": 0.0,
|
| 21 |
+
"min_clearance_to_boundary": 0.0,
|
| 22 |
+
"max_overlap_value": 0.0,
|
| 23 |
+
"num_circles_generated": 0,
|
| 24 |
+
"min_radius": 0.0,
|
| 25 |
+
"boundary_contact_count": 0
|
| 26 |
+
},
|
| 27 |
+
"auxiliary_descriptions": {
|
| 28 |
+
"mean_radius": "Average radius of all circles.",
|
| 29 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 30 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 31 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 32 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 33 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 34 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 35 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 36 |
+
"has_nan_or_inf": "Binary flag indicating if any center or radius contains NaN or Inf values, a sign of numerical instability.",
|
| 37 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 38 |
+
},
|
| 39 |
+
"timestamp": 1770931407.4188106,
|
| 40 |
+
"generation": 100
|
| 41 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (11.9 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: has_nan_or_inf (NEW)
|
| 57 |
+
try:
|
| 58 |
+
has_nan = np.isnan(centers).any() or np.isnan(radii).any()
|
| 59 |
+
has_inf = np.isinf(centers).any() or np.isinf(radii).any()
|
| 60 |
+
metrics["has_nan_or_inf"] = 1.0 if (has_nan or has_inf) else 0.0
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error calculating has_nan_or_inf: {e}")
|
| 63 |
+
metrics["has_nan_or_inf"] = 0.0 # Default to no error if calculation fails
|
| 64 |
+
|
| 65 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 66 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 67 |
+
if centers.size == 0 or radii.size == 0:
|
| 68 |
+
metrics["mean_radius"] = 0.0
|
| 69 |
+
metrics["radius_std_dev"] = 0.0
|
| 70 |
+
metrics["total_packed_area"] = 0.0
|
| 71 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 72 |
+
metrics["max_overlap_value"] = 0.0
|
| 73 |
+
metrics["num_circles_generated"] = 0
|
| 74 |
+
metrics["min_radius"] = 0.0
|
| 75 |
+
metrics["boundary_contact_count"] = 0
|
| 76 |
+
return metrics
|
| 77 |
+
|
| 78 |
+
# Metric: num_circles_generated (actual count)
|
| 79 |
+
try:
|
| 80 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 83 |
+
metrics["num_circles_generated"] = 0
|
| 84 |
+
|
| 85 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 86 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 87 |
+
metrics["mean_radius"] = 0.0
|
| 88 |
+
metrics["radius_std_dev"] = 0.0
|
| 89 |
+
metrics["total_packed_area"] = 0.0
|
| 90 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 91 |
+
metrics["max_overlap_value"] = 0.0
|
| 92 |
+
metrics["min_radius"] = 0.0
|
| 93 |
+
metrics["boundary_contact_count"] = 0
|
| 94 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 95 |
+
|
| 96 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 97 |
+
|
| 98 |
+
# Metric 2: mean_radius
|
| 99 |
+
try:
|
| 100 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Error calculating mean_radius: {e}")
|
| 103 |
+
metrics["mean_radius"] = 0.0
|
| 104 |
+
|
| 105 |
+
# Metric 3: radius_std_dev
|
| 106 |
+
try:
|
| 107 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 110 |
+
metrics["radius_std_dev"] = 0.0
|
| 111 |
+
|
| 112 |
+
# Metric 4: total_packed_area
|
| 113 |
+
try:
|
| 114 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 117 |
+
metrics["total_packed_area"] = 0.0
|
| 118 |
+
|
| 119 |
+
# Metric 5: min_clearance_to_boundary
|
| 120 |
+
try:
|
| 121 |
+
min_clearance = float('inf')
|
| 122 |
+
for i in range(N_EXPECTED):
|
| 123 |
+
x, y = centers[i]
|
| 124 |
+
r = radii[i]
|
| 125 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 126 |
+
min_clearance = min(min_clearance, *clearances)
|
| 127 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 130 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 131 |
+
|
| 132 |
+
# Metric 6: max_overlap_value
|
| 133 |
+
try:
|
| 134 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 135 |
+
for i in range(N_EXPECTED):
|
| 136 |
+
for j in range(i + 1, N_EXPECTED):
|
| 137 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 138 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 139 |
+
if overlap > max_overlap:
|
| 140 |
+
max_overlap = overlap
|
| 141 |
+
metrics["max_overlap_value"] = max_overlap
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 144 |
+
metrics["max_overlap_value"] = 0.0
|
| 145 |
+
|
| 146 |
+
# Metric 8: min_radius
|
| 147 |
+
try:
|
| 148 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"Error calculating min_radius: {e}")
|
| 151 |
+
metrics["min_radius"] = 0.0
|
| 152 |
+
|
| 153 |
+
# Metric 10: boundary_contact_count
|
| 154 |
+
try:
|
| 155 |
+
contact_count = 0
|
| 156 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 157 |
+
for i in range(N_EXPECTED):
|
| 158 |
+
x, y = centers[i]
|
| 159 |
+
r = radii[i]
|
| 160 |
+
# Check if touching any of the four boundaries
|
| 161 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 162 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 163 |
+
contact_count += 1
|
| 164 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 167 |
+
metrics["boundary_contact_count"] = 0
|
| 168 |
+
|
| 169 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_101/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.598251252706035,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.598251252706035,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.0820, 0.0813)\n centers[1] = (0.2889, 0.1266)\n centers[2] = (0.4401, 0.2608)\n centers[3] = (0.5191, 0.1022)\n centers[4] = (0.7173, 0.0960)\n centers[5] = (0.9066, 0.0934)\n centers[6] = (0.1040, 0.2653)\n centers[7] = (0.2899, 0.3559)\n centers[8] = (0.4703, 0.4225)\n centers[9] = (0.6313, 0.2919)\n centers[10] = (0.8635, 0.2955)\n centers[11] = (0.1141, 0.4831)\n centers[12] = (0.3106, 0.5516)\n centers[13] = (0.3846, 0.7270)\n centers[14] = (0.6451, 0.5202)\n centers[15] = (0.8779, 0.5305)\n centers[16] = (0.1679, 0.7104)\n centers[17] = (0.4732, 0.5839)\n centers[18] = (0.5768, 0.7158)\n centers[19] = (0.7448, 0.6785)\n centers[20] = (0.9033, 0.7478)\n centers[21] = (0.0921, 0.9079)\n centers[22] = (0.2855, 0.8984)\n centers[23] = (0.4915, 0.8963)\n centers[24] = (0.7253, 0.8769)\n centers[25] = (0.9217, 0.9218)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.598251252706035
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 11.946096290834248,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"has_nan_or_inf": 0.0,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.09993274048869366,
|
| 26 |
+
"radius_std_dev": 0.015295358419589405,
|
| 27 |
+
"total_packed_area": 0.834824890504465,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.07188900122340332,
|
| 31 |
+
"boundary_contact_count": 9.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"has_nan_or_inf": "Binary flag indicating if any center or radius contains NaN or Inf values, a sign of numerical instability.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770931544.2339413,
|
| 46 |
+
"generation": 101
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (11.4 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_103/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.58878731265258,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.58878731265258,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.1141, 0.1142)\n centers[1] = (0.3221, 0.0949)\n centers[2] = (0.7227, 0.1043)\n centers[3] = (0.9131, 0.0870)\n centers[4] = (0.1012, 0.3292)\n centers[5] = (0.2468, 0.2406)\n centers[6] = (0.5179, 0.1005)\n centers[7] = (0.6130, 0.2872)\n centers[8] = (0.8576, 0.3020)\n centers[9] = (0.4810, 0.4314)\n centers[10] = (0.4633, 0.5761)\n centers[11] = (0.4098, 0.2640)\n centers[12] = (0.6676, 0.5028)\n centers[13] = (0.1057, 0.5348)\n centers[14] = (0.2863, 0.4247)\n centers[15] = (0.5713, 0.6932)\n centers[16] = (0.7491, 0.6766)\n centers[17] = (0.8884, 0.5468)\n centers[18] = (0.0981, 0.7371)\n centers[19] = (0.3000, 0.6520)\n centers[20] = (0.5008, 0.8905)\n centers[21] = (0.9051, 0.7525)\n centers[22] = (0.0827, 0.9172)\n centers[23] = (0.2771, 0.8857)\n centers[24] = (0.7307, 0.8792)\n centers[25] = (0.9231, 0.9232)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.58878731265258
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 13.756057593040168,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"avg_dist_to_center": 0.3706455082509068,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.09956874279433,
|
| 26 |
+
"radius_std_dev": 0.016847858934533527,
|
| 27 |
+
"total_packed_area": 0.8329694386157515,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.059401842374791416,
|
| 31 |
+
"boundary_contact_count": 9.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770931681.47676,
|
| 46 |
+
"generation": 103
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (11.2 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_105/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.6022372719739915,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.6022372719739915,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.8982, 0.2462)\n centers[1] = (0.3016, 0.0736)\n centers[2] = (0.5011, 0.1335)\n centers[3] = (0.4557, 0.3579)\n centers[4] = (0.7498, 0.1039)\n centers[5] = (0.9258, 0.0734)\n centers[6] = (0.1074, 0.3367)\n centers[7] = (0.2945, 0.2470)\n centers[8] = (0.5600, 0.4863)\n centers[9] = (0.6805, 0.3286)\n centers[10] = (0.8916, 0.8929)\n centers[11] = (0.1068, 0.5513)\n centers[12] = (0.2824, 0.4451)\n centers[13] = (0.7052, 0.9199)\n centers[14] = (0.7036, 0.5469)\n centers[15] = (0.8863, 0.4609)\n centers[16] = (0.0993, 0.7571)\n centers[17] = (0.3102, 0.6709)\n centers[18] = (0.5271, 0.6407)\n centers[19] = (0.6939, 0.7376)\n centers[20] = (0.8942, 0.6801)\n centers[21] = (0.0730, 0.9271)\n centers[22] = (0.4302, 0.5176)\n centers[23] = (0.4910, 0.8641)\n centers[24] = (0.2492, 0.8957)\n centers[25] = (0.1168, 0.1148)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.6022372719739915
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 12.359428913332522,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"avg_dist_to_center": 0.3652892177516893,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.10008604892207659,
|
| 26 |
+
"radius_std_dev": 0.019429407832541483,
|
| 27 |
+
"total_packed_area": 0.8490553003446314,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.06616651730878995,
|
| 31 |
+
"boundary_contact_count": 13.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770931808.6927269,
|
| 46 |
+
"generation": 105
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_106/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.603588180078524,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.603588180078524,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.0922, 0.0922)\n centers[1] = (0.2800, 0.0958)\n centers[2] = (0.3694, 0.2724)\n centers[3] = (0.4757, 0.1001)\n centers[4] = (0.6817, 0.1058)\n centers[5] = (0.8937, 0.1060)\n centers[6] = (0.1354, 0.3158)\n centers[7] = (0.4734, 0.4298)\n centers[8] = (0.3081, 0.4437)\n centers[9] = (0.5685, 0.2735)\n centers[10] = (0.6524, 0.4435)\n centers[11] = (0.7574, 0.2897)\n centers[12] = (0.9252, 0.2839)\n centers[13] = (0.1444, 0.5965)\n centers[14] = (0.3963, 0.5862)\n centers[15] = (0.3269, 0.7658)\n centers[16] = (0.5219, 0.7205)\n centers[17] = (0.6980, 0.6324)\n centers[18] = (0.5474, 0.5618)\n centers[19] = (0.8712, 0.4803)\n centers[20] = (0.1250, 0.8748)\n centers[21] = (0.3058, 0.9345)\n centers[22] = (0.4711, 0.9046)\n centers[23] = (0.6981, 0.8668)\n centers[24] = (0.8904, 0.7185)\n centers[25] = (0.9129, 0.9134)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.603588180078524
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 15.023704247549176,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"avg_dist_to_center": 0.35282780739916286,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.10013800692609708,
|
| 26 |
+
"radius_std_dev": 0.019777605238614774,
|
| 27 |
+
"total_packed_area": 0.8510201484798829,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.06502791892219674,
|
| 31 |
+
"boundary_contact_count": 14.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770931850.5854654,
|
| 46 |
+
"generation": 106
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (9.92 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_108/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.599656357093383,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.599656357093383,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.0885, 0.0884)\n centers[1] = (0.2781, 0.1017)\n centers[2] = (0.4987, 0.1196)\n centers[3] = (0.7269, 0.1089)\n centers[4] = (0.9170, 0.0830)\n centers[5] = (0.1340, 0.3060)\n centers[6] = (0.3583, 0.2780)\n centers[7] = (0.4836, 0.4035)\n centers[8] = (0.6316, 0.2936)\n centers[9] = (0.8776, 0.2846)\n centers[10] = (0.1134, 0.5527)\n centers[11] = (0.3074, 0.4633)\n centers[12] = (0.4497, 0.5533)\n centers[13] = (0.5938, 0.5251)\n centers[14] = (0.7413, 0.4459)\n centers[15] = (0.9044, 0.5317)\n centers[16] = (0.0980, 0.7635)\n centers[17] = (0.3109, 0.6907)\n centers[18] = (0.5400, 0.6977)\n centers[19] = (0.7366, 0.6382)\n centers[20] = (0.9118, 0.7153)\n centers[21] = (0.0704, 0.9296)\n centers[22] = (0.2361, 0.9024)\n centers[23] = (0.4446, 0.8886)\n centers[24] = (0.6803, 0.8755)\n centers[25] = (0.9016, 0.9016)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.599656357093383
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 23.100646714679897,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"avg_dist_to_center": 0.36462229842860683,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.09998678296513011,
|
| 26 |
+
"radius_std_dev": 0.016554252962000086,
|
| 27 |
+
"total_packed_area": 0.8389824291407547,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.06821299107750015,
|
| 31 |
+
"boundary_contact_count": 6.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770931983.354642,
|
| 46 |
+
"generation": 108
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_110/results/metrics.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.5853378579135846,
|
| 3 |
+
"correct": true,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 2.5853378579135846,
|
| 6 |
+
"public": {
|
| 7 |
+
"centers_str": " centers[0] = (0.1206, 0.1207)\n centers[1] = (0.2625, 0.2625)\n centers[2] = (0.3409, 0.1002)\n centers[3] = (0.5405, 0.0992)\n centers[4] = (0.7385, 0.0987)\n centers[5] = (0.9182, 0.0820)\n centers[6] = (0.1070, 0.3663)\n centers[7] = (0.3164, 0.4548)\n centers[8] = (0.4429, 0.2737)\n centers[9] = (0.6405, 0.2679)\n centers[10] = (0.8681, 0.2897)\n centers[11] = (0.1379, 0.5563)\n centers[12] = (0.7028, 0.4540)\n centers[13] = (0.5486, 0.3999)\n centers[14] = (0.4951, 0.5251)\n centers[15] = (0.8947, 0.5253)\n centers[16] = (0.1066, 0.7459)\n centers[17] = (0.3252, 0.6916)\n centers[18] = (0.5453, 0.6928)\n centers[19] = (0.7305, 0.6409)\n centers[20] = (0.8985, 0.7323)\n centers[21] = (0.0749, 0.9247)\n centers[22] = (0.2501, 0.8977)\n centers[23] = (0.4627, 0.8895)\n centers[24] = (0.7054, 0.8661)\n centers[25] = (0.9163, 0.9165)",
|
| 8 |
+
"num_circles": 26
|
| 9 |
+
},
|
| 10 |
+
"private": {
|
| 11 |
+
"reported_sum_of_radii": 2.5853378579135846
|
| 12 |
+
},
|
| 13 |
+
"execution_time_mean": 25.150721285492182,
|
| 14 |
+
"execution_time_std": 0.0,
|
| 15 |
+
"num_valid_runs": 1,
|
| 16 |
+
"num_invalid_runs": 0,
|
| 17 |
+
"all_validation_errors": [],
|
| 18 |
+
"correct": true,
|
| 19 |
+
"validation_error": null
|
| 20 |
+
},
|
| 21 |
+
"auxiliary": {
|
| 22 |
+
"is_valid_packing": 1.0,
|
| 23 |
+
"avg_dist_to_center": 0.3663982520663652,
|
| 24 |
+
"num_circles_generated": 26,
|
| 25 |
+
"mean_radius": 0.09943607145821479,
|
| 26 |
+
"radius_std_dev": 0.01698796410051645,
|
| 27 |
+
"total_packed_area": 0.8312000836243196,
|
| 28 |
+
"min_clearance_to_boundary": 0.0,
|
| 29 |
+
"max_overlap_value": 0.0,
|
| 30 |
+
"min_radius": 0.06393655738555767,
|
| 31 |
+
"boundary_contact_count": 11.0
|
| 32 |
+
},
|
| 33 |
+
"auxiliary_descriptions": {
|
| 34 |
+
"mean_radius": "Average radius of all circles.",
|
| 35 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 36 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 37 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 38 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 39 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 40 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 41 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 42 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 43 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 44 |
+
},
|
| 45 |
+
"timestamp": 1770932121.5753243,
|
| 46 |
+
"generation": 110
|
| 47 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "NameError: name 'np' is not defined"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_111/results/metrics.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "NameError: name 'np' is not defined"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {
|
| 16 |
+
"is_valid_packing": 0.0,
|
| 17 |
+
"avg_dist_to_center": 0.0,
|
| 18 |
+
"mean_radius": 0.0,
|
| 19 |
+
"radius_std_dev": 0.0,
|
| 20 |
+
"total_packed_area": 0.0,
|
| 21 |
+
"min_clearance_to_boundary": 0.0,
|
| 22 |
+
"max_overlap_value": 0.0,
|
| 23 |
+
"num_circles_generated": 0,
|
| 24 |
+
"min_radius": 0.0,
|
| 25 |
+
"boundary_contact_count": 0
|
| 26 |
+
},
|
| 27 |
+
"auxiliary_descriptions": {
|
| 28 |
+
"mean_radius": "Average radius of all circles.",
|
| 29 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 30 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 31 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 32 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 33 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 34 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 35 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 36 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 37 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 38 |
+
},
|
| 39 |
+
"timestamp": 1770932154.5226638,
|
| 40 |
+
"generation": 111
|
| 41 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_114/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 9 |
+
metrics = {}
|
| 10 |
+
|
| 11 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 12 |
+
centers = np.array([])
|
| 13 |
+
radii = np.array([])
|
| 14 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 15 |
+
|
| 16 |
+
# Try to load the extra.npz file for detailed data
|
| 17 |
+
try:
|
| 18 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 19 |
+
if os.path.exists(extra_data_path):
|
| 20 |
+
with np.load(extra_data_path) as data:
|
| 21 |
+
centers = data["centers"]
|
| 22 |
+
radii = data["radii"]
|
| 23 |
+
else:
|
| 24 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error loading extra.npz: {e}")
|
| 27 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 28 |
+
|
| 29 |
+
# Get primary validation status
|
| 30 |
+
primary_correct_flag = False
|
| 31 |
+
if primary_result and "correct" in primary_result:
|
| 32 |
+
primary_correct_flag = primary_result["correct"]
|
| 33 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 34 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 35 |
+
if os.path.exists(metrics_json_path):
|
| 36 |
+
try:
|
| 37 |
+
with open(metrics_json_path, 'r') as f:
|
| 38 |
+
json_data = json.load(f)
|
| 39 |
+
if "correct" in json_data:
|
| 40 |
+
primary_correct_flag = json_data["correct"]
|
| 41 |
+
elif "public" in json_data and "correct" in json_data["public"]:
|
| 42 |
+
primary_correct_flag = json_data["public"]["correct"]
|
| 43 |
+
elif "private" in json_data and "correct" in json_data["private"]:
|
| 44 |
+
primary_correct_flag = json_data["private"]["correct"]
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading metrics.json for primary_correct_flag: {e}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Metric 1: is_valid_packing (Binary correctness signal)
|
| 50 |
+
try:
|
| 51 |
+
metrics["is_valid_packing"] = 1.0 if primary_correct_flag else 0.0
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error calculating is_valid_packing: {e}")
|
| 54 |
+
metrics["is_valid_packing"] = 0.0
|
| 55 |
+
|
| 56 |
+
# Metric: avg_dist_to_center
|
| 57 |
+
try:
|
| 58 |
+
if centers.shape[0] == N_EXPECTED:
|
| 59 |
+
square_center = np.array([0.5, 0.5])
|
| 60 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 61 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 62 |
+
else:
|
| 63 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 66 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 67 |
+
|
| 68 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 69 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 70 |
+
if centers.size == 0 or radii.size == 0:
|
| 71 |
+
metrics["mean_radius"] = 0.0
|
| 72 |
+
metrics["radius_std_dev"] = 0.0
|
| 73 |
+
metrics["total_packed_area"] = 0.0
|
| 74 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 75 |
+
metrics["max_overlap_value"] = 0.0
|
| 76 |
+
metrics["num_circles_generated"] = 0
|
| 77 |
+
metrics["min_radius"] = 0.0
|
| 78 |
+
metrics["boundary_contact_count"] = 0
|
| 79 |
+
return metrics
|
| 80 |
+
|
| 81 |
+
# Metric: num_circles_generated (actual count)
|
| 82 |
+
try:
|
| 83 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 86 |
+
metrics["num_circles_generated"] = 0
|
| 87 |
+
|
| 88 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 89 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 90 |
+
metrics["mean_radius"] = 0.0
|
| 91 |
+
metrics["radius_std_dev"] = 0.0
|
| 92 |
+
metrics["total_packed_area"] = 0.0
|
| 93 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 94 |
+
metrics["max_overlap_value"] = 0.0
|
| 95 |
+
metrics["min_radius"] = 0.0
|
| 96 |
+
metrics["boundary_contact_count"] = 0
|
| 97 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 98 |
+
|
| 99 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 100 |
+
|
| 101 |
+
# Metric 2: mean_radius
|
| 102 |
+
try:
|
| 103 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error calculating mean_radius: {e}")
|
| 106 |
+
metrics["mean_radius"] = 0.0
|
| 107 |
+
|
| 108 |
+
# Metric 3: radius_std_dev
|
| 109 |
+
try:
|
| 110 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 113 |
+
metrics["radius_std_dev"] = 0.0
|
| 114 |
+
|
| 115 |
+
# Metric 4: total_packed_area
|
| 116 |
+
try:
|
| 117 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 120 |
+
metrics["total_packed_area"] = 0.0
|
| 121 |
+
|
| 122 |
+
# Metric 5: min_clearance_to_boundary
|
| 123 |
+
try:
|
| 124 |
+
min_clearance = float('inf')
|
| 125 |
+
for i in range(N_EXPECTED):
|
| 126 |
+
x, y = centers[i]
|
| 127 |
+
r = radii[i]
|
| 128 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 129 |
+
min_clearance = min(min_clearance, *clearances)
|
| 130 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 133 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 134 |
+
|
| 135 |
+
# Metric 6: max_overlap_value
|
| 136 |
+
try:
|
| 137 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 138 |
+
for i in range(N_EXPECTED):
|
| 139 |
+
for j in range(i + 1, N_EXPECTED):
|
| 140 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 141 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 142 |
+
if overlap > max_overlap:
|
| 143 |
+
max_overlap = overlap
|
| 144 |
+
metrics["max_overlap_value"] = max_overlap
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 147 |
+
metrics["max_overlap_value"] = 0.0
|
| 148 |
+
|
| 149 |
+
# Metric 8: min_radius
|
| 150 |
+
try:
|
| 151 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error calculating min_radius: {e}")
|
| 154 |
+
metrics["min_radius"] = 0.0
|
| 155 |
+
|
| 156 |
+
# Metric 10: boundary_contact_count
|
| 157 |
+
try:
|
| 158 |
+
contact_count = 0
|
| 159 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 160 |
+
for i in range(N_EXPECTED):
|
| 161 |
+
x, y = centers[i]
|
| 162 |
+
r = radii[i]
|
| 163 |
+
# Check if touching any of the four boundaries
|
| 164 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 165 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 166 |
+
contact_count += 1
|
| 167 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 170 |
+
metrics["boundary_contact_count"] = 0
|
| 171 |
+
|
| 172 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
auxiliary_metric_descriptions = {
|
| 9 |
+
"execution_successful": "Binary flag: 1.0 if the program executed without fundamental runtime errors (e.g., NameError); 0.0 otherwise.",
|
| 10 |
+
"avg_dist_to_center": "Average Euclidean distance of circle centers from the center of the unit square (0.5, 0.5), indicating compactness. Only calculated for valid packings.",
|
| 11 |
+
"mean_radius": "Average radius of all circles. Only calculated for valid packings.",
|
| 12 |
+
"radius_std_dev": "Standard deviation of radii. Only calculated for valid packings.",
|
| 13 |
+
"total_packed_area": "Total area covered by all circles (sum of pi * r^2). Only calculated for valid packings.",
|
| 14 |
+
"min_clearance_to_boundary": "Minimum distance of any circle (edge) to the unit square boundary. A negative value indicates a circle is out of bounds. Only calculated for valid packings.",
|
| 15 |
+
"max_overlap_value": "Maximum overlap between any two circles. 0.0 if no overlap. Only calculated for valid packings.",
|
| 16 |
+
"num_tangent_pairs": "Counts the number of pairs of circles that are tangent (touching without overlapping) within a small tolerance. Indicates efficient packing.",
|
| 17 |
+
"min_radius": "The minimum radius among all circles in the packing. Only calculated for valid packings.",
|
| 18 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries (within a small epsilon). Indicates effective utilization of space. Only calculated for valid packings."
|
| 19 |
+
}
|
| 20 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 21 |
+
metrics = {}
|
| 22 |
+
|
| 23 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 24 |
+
centers = np.array([])
|
| 25 |
+
radii = np.array([])
|
| 26 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 27 |
+
|
| 28 |
+
# Try to load the extra.npz file for detailed data
|
| 29 |
+
try:
|
| 30 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 31 |
+
if os.path.exists(extra_data_path):
|
| 32 |
+
with np.load(extra_data_path) as data:
|
| 33 |
+
centers = data["centers"]
|
| 34 |
+
radii = data["radii"]
|
| 35 |
+
else:
|
| 36 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error loading extra.npz: {e}")
|
| 39 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 40 |
+
|
| 41 |
+
# Get primary validation status
|
| 42 |
+
primary_correct_flag = False
|
| 43 |
+
validation_error_str = None
|
| 44 |
+
if primary_result:
|
| 45 |
+
if "correct" in primary_result:
|
| 46 |
+
primary_correct_flag = primary_result["correct"]
|
| 47 |
+
if "primary" in primary_result and "validation_error" in primary_result["primary"]:
|
| 48 |
+
validation_error_str = primary_result["primary"]["validation_error"]
|
| 49 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 50 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 51 |
+
if os.path.exists(metrics_json_path):
|
| 52 |
+
try:
|
| 53 |
+
with open(metrics_json_path, 'r') as f:
|
| 54 |
+
json_data = json.load(f)
|
| 55 |
+
if "correct" in json_data:
|
| 56 |
+
primary_correct_flag = json_data["correct"]
|
| 57 |
+
elif "primary" in json_data and "correct" in json_data["primary"]:
|
| 58 |
+
primary_correct_flag = json_data["primary"]["correct"]
|
| 59 |
+
elif "primary" in json_data and "validation_error" in json_data["primary"]:
|
| 60 |
+
validation_error_str = json_data["primary"]["validation_error"]
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error loading metrics.json for primary_correct_flag or validation_error: {e}")
|
| 63 |
+
|
| 64 |
+
# Metric: execution_successful (Binary flag for successful program execution)
|
| 65 |
+
try:
|
| 66 |
+
metrics["execution_successful"] = 1.0 if not validation_error_str else 0.0
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error calculating execution_successful: {e}")
|
| 69 |
+
metrics["execution_successful"] = 0.0
|
| 70 |
+
|
| 71 |
+
# Metric: avg_dist_to_center
|
| 72 |
+
try:
|
| 73 |
+
if centers.shape[0] == N_EXPECTED:
|
| 74 |
+
square_center = np.array([0.5, 0.5])
|
| 75 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 76 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 77 |
+
else:
|
| 78 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 81 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 82 |
+
|
| 83 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 84 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 85 |
+
if centers.size == 0 or radii.size == 0:
|
| 86 |
+
metrics["mean_radius"] = 0.0
|
| 87 |
+
metrics["radius_std_dev"] = 0.0
|
| 88 |
+
metrics["total_packed_area"] = 0.0
|
| 89 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 90 |
+
metrics["max_overlap_value"] = 0.0
|
| 91 |
+
metrics["num_circles_generated"] = 0
|
| 92 |
+
metrics["min_radius"] = 0.0
|
| 93 |
+
metrics["boundary_contact_count"] = 0
|
| 94 |
+
return metrics
|
| 95 |
+
|
| 96 |
+
# Metric: num_circles_generated (actual count)
|
| 97 |
+
try:
|
| 98 |
+
metrics["num_circles_generated"] = centers.shape[0]
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print(f"Error calculating num_circles_generated: {e}")
|
| 101 |
+
metrics["num_circles_generated"] = 0
|
| 102 |
+
|
| 103 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 104 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 105 |
+
metrics["mean_radius"] = 0.0
|
| 106 |
+
metrics["radius_std_dev"] = 0.0
|
| 107 |
+
metrics["total_packed_area"] = 0.0
|
| 108 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 109 |
+
metrics["max_overlap_value"] = 0.0
|
| 110 |
+
metrics["min_radius"] = 0.0
|
| 111 |
+
metrics["boundary_contact_count"] = 0
|
| 112 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 113 |
+
|
| 114 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 115 |
+
|
| 116 |
+
# Metric 2: mean_radius
|
| 117 |
+
try:
|
| 118 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 119 |
+
except Exception as e:
|
| 120 |
+
print(f"Error calculating mean_radius: {e}")
|
| 121 |
+
metrics["mean_radius"] = 0.0
|
| 122 |
+
|
| 123 |
+
# Metric 3: radius_std_dev
|
| 124 |
+
try:
|
| 125 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 128 |
+
metrics["radius_std_dev"] = 0.0
|
| 129 |
+
|
| 130 |
+
# Metric 4: total_packed_area
|
| 131 |
+
try:
|
| 132 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 135 |
+
metrics["total_packed_area"] = 0.0
|
| 136 |
+
|
| 137 |
+
# Metric 5: min_clearance_to_boundary
|
| 138 |
+
try:
|
| 139 |
+
min_clearance = float('inf')
|
| 140 |
+
for i in range(N_EXPECTED):
|
| 141 |
+
x, y = centers[i]
|
| 142 |
+
r = radii[i]
|
| 143 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 144 |
+
min_clearance = min(min_clearance, *clearances)
|
| 145 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 148 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 149 |
+
|
| 150 |
+
# Metric 6: max_overlap_value
|
| 151 |
+
try:
|
| 152 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 153 |
+
for i in range(N_EXPECTED):
|
| 154 |
+
for j in range(i + 1, N_EXPECTED):
|
| 155 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 156 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 157 |
+
if overlap > max_overlap:
|
| 158 |
+
max_overlap = overlap
|
| 159 |
+
metrics["max_overlap_value"] = max_overlap
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 162 |
+
metrics["max_overlap_value"] = 0.0
|
| 163 |
+
|
| 164 |
+
# Metric 8: min_radius
|
| 165 |
+
try:
|
| 166 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error calculating min_radius: {e}")
|
| 169 |
+
metrics["min_radius"] = 0.0
|
| 170 |
+
|
| 171 |
+
# Metric 10: boundary_contact_count
|
| 172 |
+
try:
|
| 173 |
+
contact_count = 0
|
| 174 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 175 |
+
for i in range(N_EXPECTED):
|
| 176 |
+
x, y = centers[i]
|
| 177 |
+
r = radii[i]
|
| 178 |
+
# Check if touching any of the four boundaries
|
| 179 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 180 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 181 |
+
contact_count += 1
|
| 182 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 183 |
+
except Exception as e:
|
| 184 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 185 |
+
metrics["boundary_contact_count"] = 0
|
| 186 |
+
|
| 187 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "NameError: name 'np' is not defined"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_116/results/metrics.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "NameError: name 'np' is not defined"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {
|
| 16 |
+
"execution_successful": 1.0,
|
| 17 |
+
"avg_dist_to_center": 0.0,
|
| 18 |
+
"mean_radius": 0.0,
|
| 19 |
+
"radius_std_dev": 0.0,
|
| 20 |
+
"total_packed_area": 0.0,
|
| 21 |
+
"min_clearance_to_boundary": 0.0,
|
| 22 |
+
"max_overlap_value": 0.0,
|
| 23 |
+
"num_circles_generated": 0,
|
| 24 |
+
"min_radius": 0.0,
|
| 25 |
+
"boundary_contact_count": 0
|
| 26 |
+
},
|
| 27 |
+
"auxiliary_descriptions": {
|
| 28 |
+
"mean_radius": "Average radius of all circles.",
|
| 29 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 30 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 31 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 32 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 33 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 34 |
+
"num_circles_generated": "The actual number of circles generated by the solution.",
|
| 35 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 36 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 37 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space."
|
| 38 |
+
},
|
| 39 |
+
"timestamp": 1770932363.1500704,
|
| 40 |
+
"generation": 116
|
| 41 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
auxiliary_metric_descriptions = {
|
| 9 |
+
"execution_successful": "Binary flag: 1.0 if the program executed without fundamental runtime errors (e.g., NameError); 0.0 otherwise.",
|
| 10 |
+
"avg_dist_to_center": "Average Euclidean distance of circle centers from the center of the unit square (0.5, 0.5), indicating compactness. Only calculated for valid packings.",
|
| 11 |
+
"mean_radius": "Average radius of all circles. Only calculated for valid packings.",
|
| 12 |
+
"radius_std_dev": "Standard deviation of radii. Only calculated for valid packings.",
|
| 13 |
+
"total_packed_area": "Total area covered by all circles (sum of pi * r^2). Only calculated for valid packings.",
|
| 14 |
+
"min_clearance_to_boundary": "Minimum distance of any circle (edge) to the unit square boundary. A negative value indicates a circle is out of bounds. Only calculated for valid packings.",
|
| 15 |
+
"max_overlap_value": "Maximum overlap between any two circles. 0.0 if no overlap. Only calculated for valid packings.",
|
| 16 |
+
"num_tangent_pairs": "Counts the number of pairs of circles that are tangent (touching without overlapping) within a small tolerance. Indicates efficient packing.",
|
| 17 |
+
"min_radius": "The minimum radius among all circles in the packing. Only calculated for valid packings.",
|
| 18 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries (within a small epsilon). Indicates effective utilization of space. Only calculated for valid packings."
|
| 19 |
+
}
|
| 20 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 21 |
+
metrics = {}
|
| 22 |
+
|
| 23 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 24 |
+
centers = np.array([])
|
| 25 |
+
radii = np.array([])
|
| 26 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 27 |
+
|
| 28 |
+
# Try to load the extra.npz file for detailed data
|
| 29 |
+
try:
|
| 30 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 31 |
+
if os.path.exists(extra_data_path):
|
| 32 |
+
with np.load(extra_data_path) as data:
|
| 33 |
+
centers = data["centers"]
|
| 34 |
+
radii = data["radii"]
|
| 35 |
+
else:
|
| 36 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error loading extra.npz: {e}")
|
| 39 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 40 |
+
|
| 41 |
+
# Get primary validation status
|
| 42 |
+
primary_correct_flag = False
|
| 43 |
+
validation_error_str = None
|
| 44 |
+
if primary_result:
|
| 45 |
+
if "correct" in primary_result:
|
| 46 |
+
primary_correct_flag = primary_result["correct"]
|
| 47 |
+
if "primary" in primary_result and "validation_error" in primary_result["primary"]:
|
| 48 |
+
validation_error_str = primary_result["primary"]["validation_error"]
|
| 49 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 50 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 51 |
+
if os.path.exists(metrics_json_path):
|
| 52 |
+
try:
|
| 53 |
+
with open(metrics_json_path, 'r') as f:
|
| 54 |
+
json_data = json.load(f)
|
| 55 |
+
if "correct" in json_data:
|
| 56 |
+
primary_correct_flag = json_data["correct"]
|
| 57 |
+
elif "primary" in json_data and "correct" in json_data["primary"]:
|
| 58 |
+
primary_correct_flag = json_data["primary"]["correct"]
|
| 59 |
+
elif "primary" in json_data and "validation_error" in json_data["primary"]:
|
| 60 |
+
validation_error_str = json_data["primary"]["validation_error"]
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error loading metrics.json for primary_correct_flag or validation_error: {e}")
|
| 63 |
+
|
| 64 |
+
# Metric: execution_successful (Binary flag for successful program execution)
|
| 65 |
+
try:
|
| 66 |
+
metrics["execution_successful"] = 1.0 if not validation_error_str else 0.0
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error calculating execution_successful: {e}")
|
| 69 |
+
metrics["execution_successful"] = 0.0
|
| 70 |
+
|
| 71 |
+
# Metric: avg_dist_to_center
|
| 72 |
+
try:
|
| 73 |
+
if centers.shape[0] == N_EXPECTED:
|
| 74 |
+
square_center = np.array([0.5, 0.5])
|
| 75 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 76 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 77 |
+
else:
|
| 78 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 81 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 82 |
+
|
| 83 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 84 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 85 |
+
if centers.size == 0 or radii.size == 0:
|
| 86 |
+
metrics["mean_radius"] = 0.0
|
| 87 |
+
metrics["radius_std_dev"] = 0.0
|
| 88 |
+
metrics["total_packed_area"] = 0.0
|
| 89 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 90 |
+
metrics["max_overlap_value"] = 0.0
|
| 91 |
+
|
| 92 |
+
metrics["min_radius"] = 0.0
|
| 93 |
+
metrics["boundary_contact_count"] = 0
|
| 94 |
+
return metrics
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 99 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 100 |
+
metrics["mean_radius"] = 0.0
|
| 101 |
+
metrics["radius_std_dev"] = 0.0
|
| 102 |
+
metrics["total_packed_area"] = 0.0
|
| 103 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 104 |
+
metrics["max_overlap_value"] = 0.0
|
| 105 |
+
metrics["min_radius"] = 0.0
|
| 106 |
+
metrics["boundary_contact_count"] = 0
|
| 107 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 108 |
+
|
| 109 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 110 |
+
|
| 111 |
+
# Metric 2: mean_radius
|
| 112 |
+
try:
|
| 113 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Error calculating mean_radius: {e}")
|
| 116 |
+
metrics["mean_radius"] = 0.0
|
| 117 |
+
|
| 118 |
+
# Metric 3: radius_std_dev
|
| 119 |
+
try:
|
| 120 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 123 |
+
metrics["radius_std_dev"] = 0.0
|
| 124 |
+
|
| 125 |
+
# Metric 4: total_packed_area
|
| 126 |
+
try:
|
| 127 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 130 |
+
metrics["total_packed_area"] = 0.0
|
| 131 |
+
|
| 132 |
+
# Metric 5: min_clearance_to_boundary
|
| 133 |
+
try:
|
| 134 |
+
min_clearance = float('inf')
|
| 135 |
+
for i in range(N_EXPECTED):
|
| 136 |
+
x, y = centers[i]
|
| 137 |
+
r = radii[i]
|
| 138 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 139 |
+
min_clearance = min(min_clearance, *clearances)
|
| 140 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 143 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 144 |
+
|
| 145 |
+
# Metric 6: max_overlap_value
|
| 146 |
+
try:
|
| 147 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 148 |
+
for i in range(N_EXPECTED):
|
| 149 |
+
for j in range(i + 1, N_EXPECTED):
|
| 150 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 151 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 152 |
+
if overlap > max_overlap:
|
| 153 |
+
max_overlap = overlap
|
| 154 |
+
metrics["max_overlap_value"] = max_overlap
|
| 155 |
+
except Exception as e:
|
| 156 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 157 |
+
metrics["max_overlap_value"] = 0.0
|
| 158 |
+
|
| 159 |
+
# Metric: num_tangent_pairs
|
| 160 |
+
try:
|
| 161 |
+
tangent_count = 0
|
| 162 |
+
epsilon = 1e-4 # Tolerance for tangency
|
| 163 |
+
for i in range(N_EXPECTED):
|
| 164 |
+
for j in range(i + 1, N_EXPECTED):
|
| 165 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 166 |
+
sum_radii = radii[i] + radii[j]
|
| 167 |
+
# Check for tangency: distance is approximately sum of radii
|
| 168 |
+
if abs(dist - sum_radii) < epsilon:
|
| 169 |
+
tangent_count += 1
|
| 170 |
+
metrics["num_tangent_pairs"] = float(tangent_count)
|
| 171 |
+
except Exception as e:
|
| 172 |
+
print(f"Error calculating num_tangent_pairs: {e}")
|
| 173 |
+
metrics["num_tangent_pairs"] = 0.0
|
| 174 |
+
|
| 175 |
+
# Metric 8: min_radius
|
| 176 |
+
try:
|
| 177 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 178 |
+
except Exception as e:
|
| 179 |
+
print(f"Error calculating min_radius: {e}")
|
| 180 |
+
metrics["min_radius"] = 0.0
|
| 181 |
+
|
| 182 |
+
# Metric 10: boundary_contact_count
|
| 183 |
+
try:
|
| 184 |
+
contact_count = 0
|
| 185 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 186 |
+
for i in range(N_EXPECTED):
|
| 187 |
+
x, y = centers[i]
|
| 188 |
+
r = radii[i]
|
| 189 |
+
# Check if touching any of the four boundaries
|
| 190 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 191 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 192 |
+
contact_count += 1
|
| 193 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 196 |
+
metrics["boundary_contact_count"] = 0
|
| 197 |
+
|
| 198 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "NameError: name 'np' is not defined"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_118/results/metrics.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "NameError: name 'np' is not defined"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {
|
| 16 |
+
"execution_successful": 1.0,
|
| 17 |
+
"avg_dist_to_center": 0.0,
|
| 18 |
+
"mean_radius": 0.0,
|
| 19 |
+
"radius_std_dev": 0.0,
|
| 20 |
+
"total_packed_area": 0.0,
|
| 21 |
+
"min_clearance_to_boundary": 0.0,
|
| 22 |
+
"max_overlap_value": 0.0,
|
| 23 |
+
"min_radius": 0.0,
|
| 24 |
+
"boundary_contact_count": 0
|
| 25 |
+
},
|
| 26 |
+
"auxiliary_descriptions": {
|
| 27 |
+
"mean_radius": "Average radius of all circles.",
|
| 28 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 29 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 30 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 31 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 32 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 33 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 34 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 35 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space.",
|
| 36 |
+
"num_tangent_pairs": "Counts the number of pairs of circles that are tangent (touching without overlapping) within a small tolerance. Indicates efficient packing."
|
| 37 |
+
},
|
| 38 |
+
"timestamp": 1770932465.668164,
|
| 39 |
+
"generation": 118
|
| 40 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Any
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
auxiliary_metric_descriptions = {
|
| 9 |
+
"execution_successful": "Binary flag: 1.0 if the program executed without fundamental runtime errors (e.g., NameError); 0.0 otherwise.",
|
| 10 |
+
"avg_dist_to_center": "Average Euclidean distance of circle centers from the center of the unit square (0.5, 0.5), indicating compactness. Only calculated for valid packings.",
|
| 11 |
+
"mean_radius": "Average radius of all circles. Only calculated for valid packings.",
|
| 12 |
+
"radius_std_dev": "Standard deviation of radii. Only calculated for valid packings.",
|
| 13 |
+
"total_packed_area": "Total area covered by all circles (sum of pi * r^2). Only calculated for valid packings.",
|
| 14 |
+
"min_clearance_to_boundary": "Minimum distance of any circle (edge) to the unit square boundary. A negative value indicates a circle is out of bounds. Only calculated for valid packings.",
|
| 15 |
+
"max_overlap_value": "Maximum overlap between any two circles. 0.0 if no overlap. Only calculated for valid packings.",
|
| 16 |
+
"num_tangent_pairs": "Counts the number of pairs of circles that are tangent (touching without overlapping) within a small tolerance. Indicates efficient packing.",
|
| 17 |
+
"min_radius": "The minimum radius among all circles in the packing. Only calculated for valid packings.",
|
| 18 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries (within a small epsilon). Indicates effective utilization of space. Only calculated for valid packings."
|
| 19 |
+
}
|
| 20 |
+
def evaluate_aux(results_dir: str, primary_result: Dict[str, Any] | None = None) -> Dict[str, Any]:
|
| 21 |
+
metrics = {}
|
| 22 |
+
|
| 23 |
+
# Initialize centers and radii to empty arrays for robustness
|
| 24 |
+
centers = np.array([])
|
| 25 |
+
radii = np.array([])
|
| 26 |
+
N_EXPECTED = 26 # Define N_EXPECTED early for consistent use
|
| 27 |
+
|
| 28 |
+
# Try to load the extra.npz file for detailed data
|
| 29 |
+
try:
|
| 30 |
+
extra_data_path = os.path.join(results_dir, "extra.npz")
|
| 31 |
+
if os.path.exists(extra_data_path):
|
| 32 |
+
with np.load(extra_data_path) as data:
|
| 33 |
+
centers = data["centers"]
|
| 34 |
+
radii = data["radii"]
|
| 35 |
+
else:
|
| 36 |
+
print(f"Warning: {extra_data_path} not found. Some metrics might be 0.")
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error loading extra.npz: {e}")
|
| 39 |
+
# If error, centers and radii remain empty arrays, handled below
|
| 40 |
+
|
| 41 |
+
# Get primary validation status
|
| 42 |
+
primary_correct_flag = False
|
| 43 |
+
validation_error_str = None
|
| 44 |
+
if primary_result:
|
| 45 |
+
if "correct" in primary_result:
|
| 46 |
+
primary_correct_flag = primary_result["correct"]
|
| 47 |
+
if "primary" in primary_result and "validation_error" in primary_result["primary"]:
|
| 48 |
+
validation_error_str = primary_result["primary"]["validation_error"]
|
| 49 |
+
else: # Fallback to metrics.json if primary_result is incomplete
|
| 50 |
+
metrics_json_path = os.path.join(results_dir, "results/metrics.json") # Correct path for metrics.json
|
| 51 |
+
if os.path.exists(metrics_json_path):
|
| 52 |
+
try:
|
| 53 |
+
with open(metrics_json_path, 'r') as f:
|
| 54 |
+
json_data = json.load(f)
|
| 55 |
+
if "correct" in json_data:
|
| 56 |
+
primary_correct_flag = json_data["correct"]
|
| 57 |
+
elif "primary" in json_data and "correct" in json_data["primary"]:
|
| 58 |
+
primary_correct_flag = json_data["primary"]["correct"]
|
| 59 |
+
elif "primary" in json_data and "validation_error" in json_data["primary"]:
|
| 60 |
+
validation_error_str = json_data["primary"]["validation_error"]
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error loading metrics.json for primary_correct_flag or validation_error: {e}")
|
| 63 |
+
|
| 64 |
+
# Metric: execution_successful (Binary flag for successful program execution)
|
| 65 |
+
try:
|
| 66 |
+
metrics["execution_successful"] = 1.0 if not validation_error_str else 0.0
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error calculating execution_successful: {e}")
|
| 69 |
+
metrics["execution_successful"] = 0.0
|
| 70 |
+
|
| 71 |
+
# Metric: avg_dist_to_center
|
| 72 |
+
try:
|
| 73 |
+
if centers.shape[0] == N_EXPECTED:
|
| 74 |
+
square_center = np.array([0.5, 0.5])
|
| 75 |
+
distances = np.linalg.norm(centers - square_center, axis=1)
|
| 76 |
+
metrics["avg_dist_to_center"] = float(np.mean(distances))
|
| 77 |
+
else:
|
| 78 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Error calculating avg_dist_to_center: {e}")
|
| 81 |
+
metrics["avg_dist_to_center"] = 0.0
|
| 82 |
+
|
| 83 |
+
# If no valid data or incorrect number of circles, set dependent metrics to 0
|
| 84 |
+
# Also set num_circles_generated to actual count even if it's not N_EXPECTED
|
| 85 |
+
if centers.size == 0 or radii.size == 0:
|
| 86 |
+
metrics["mean_radius"] = 0.0
|
| 87 |
+
metrics["radius_std_dev"] = 0.0
|
| 88 |
+
metrics["total_packed_area"] = 0.0
|
| 89 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 90 |
+
metrics["max_overlap_value"] = 0.0
|
| 91 |
+
|
| 92 |
+
metrics["min_radius"] = 0.0
|
| 93 |
+
metrics["boundary_contact_count"] = 0
|
| 94 |
+
return metrics
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# Check if the number of circles matches N_EXPECTED before calculating other metrics
|
| 99 |
+
if centers.shape[0] != N_EXPECTED or radii.shape[0] != N_EXPECTED:
|
| 100 |
+
metrics["mean_radius"] = 0.0
|
| 101 |
+
metrics["radius_std_dev"] = 0.0
|
| 102 |
+
metrics["total_packed_area"] = 0.0
|
| 103 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 104 |
+
metrics["max_overlap_value"] = 0.0
|
| 105 |
+
metrics["min_radius"] = 0.0
|
| 106 |
+
metrics["boundary_contact_count"] = 0
|
| 107 |
+
return metrics # Exit early if counts are wrong, as other metrics might be nonsensical
|
| 108 |
+
|
| 109 |
+
# All subsequent metrics assume N_EXPECTED circles are present and no NaNs/Infs
|
| 110 |
+
|
| 111 |
+
# Metric 2: mean_radius
|
| 112 |
+
try:
|
| 113 |
+
metrics["mean_radius"] = float(np.mean(radii))
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Error calculating mean_radius: {e}")
|
| 116 |
+
metrics["mean_radius"] = 0.0
|
| 117 |
+
|
| 118 |
+
# Metric 3: radius_std_dev
|
| 119 |
+
try:
|
| 120 |
+
metrics["radius_std_dev"] = float(np.std(radii)) if len(radii) > 1 else 0.0
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Error calculating radius_std_dev: {e}")
|
| 123 |
+
metrics["radius_std_dev"] = 0.0
|
| 124 |
+
|
| 125 |
+
# Metric 4: total_packed_area
|
| 126 |
+
try:
|
| 127 |
+
metrics["total_packed_area"] = float(np.sum(np.pi * radii**2))
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error calculating total_packed_area: {e}")
|
| 130 |
+
metrics["total_packed_area"] = 0.0
|
| 131 |
+
|
| 132 |
+
# Metric 5: min_clearance_to_boundary
|
| 133 |
+
try:
|
| 134 |
+
min_clearance = float('inf')
|
| 135 |
+
for i in range(N_EXPECTED):
|
| 136 |
+
x, y = centers[i]
|
| 137 |
+
r = radii[i]
|
| 138 |
+
clearances = [x - r, 1 - (x + r), y - r, 1 - (y + r)]
|
| 139 |
+
min_clearance = min(min_clearance, *clearances)
|
| 140 |
+
metrics["min_clearance_to_boundary"] = min_clearance if math.isfinite(min_clearance) else 0.0
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Error calculating min_clearance_to_boundary: {e}")
|
| 143 |
+
metrics["min_clearance_to_boundary"] = 0.0
|
| 144 |
+
|
| 145 |
+
# Metric 6: max_overlap_value
|
| 146 |
+
try:
|
| 147 |
+
max_overlap = 0.0 # Changed from -inf, as overlap should be >= 0 for this metric
|
| 148 |
+
for i in range(N_EXPECTED):
|
| 149 |
+
for j in range(i + 1, N_EXPECTED):
|
| 150 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 151 |
+
overlap = (radii[i] + radii[j]) - dist
|
| 152 |
+
if overlap > max_overlap:
|
| 153 |
+
max_overlap = overlap
|
| 154 |
+
metrics["max_overlap_value"] = max_overlap
|
| 155 |
+
except Exception as e:
|
| 156 |
+
print(f"Error calculating max_overlap_value: {e}")
|
| 157 |
+
metrics["max_overlap_value"] = 0.0
|
| 158 |
+
|
| 159 |
+
# Metric: num_tangent_pairs
|
| 160 |
+
try:
|
| 161 |
+
tangent_count = 0
|
| 162 |
+
epsilon = 1e-4 # Tolerance for tangency
|
| 163 |
+
for i in range(N_EXPECTED):
|
| 164 |
+
for j in range(i + 1, N_EXPECTED):
|
| 165 |
+
dist = np.linalg.norm(centers[i] - centers[j])
|
| 166 |
+
sum_radii = radii[i] + radii[j]
|
| 167 |
+
# Check for tangency: distance is approximately sum of radii
|
| 168 |
+
if abs(dist - sum_radii) < epsilon:
|
| 169 |
+
tangent_count += 1
|
| 170 |
+
metrics["num_tangent_pairs"] = float(tangent_count)
|
| 171 |
+
except Exception as e:
|
| 172 |
+
print(f"Error calculating num_tangent_pairs: {e}")
|
| 173 |
+
metrics["num_tangent_pairs"] = 0.0
|
| 174 |
+
|
| 175 |
+
# Metric 8: min_radius
|
| 176 |
+
try:
|
| 177 |
+
metrics["min_radius"] = float(np.min(radii))
|
| 178 |
+
except Exception as e:
|
| 179 |
+
print(f"Error calculating min_radius: {e}")
|
| 180 |
+
metrics["min_radius"] = 0.0
|
| 181 |
+
|
| 182 |
+
# Metric 10: boundary_contact_count
|
| 183 |
+
try:
|
| 184 |
+
contact_count = 0
|
| 185 |
+
epsilon = 1e-6 # Tolerance for floating point comparisons
|
| 186 |
+
for i in range(N_EXPECTED):
|
| 187 |
+
x, y = centers[i]
|
| 188 |
+
r = radii[i]
|
| 189 |
+
# Check if touching any of the four boundaries
|
| 190 |
+
if abs(x - r) < epsilon or abs(1 - (x + r)) < epsilon or \
|
| 191 |
+
abs(y - r) < epsilon or abs(1 - (y + r)) < epsilon:
|
| 192 |
+
contact_count += 1
|
| 193 |
+
metrics["boundary_contact_count"] = float(contact_count)
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"Error calculating boundary_contact_count: {e}")
|
| 196 |
+
metrics["boundary_contact_count"] = 0
|
| 197 |
+
|
| 198 |
+
return metrics
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "NameError: name 'np' is not defined"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_119/results/metrics.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "NameError: name 'np' is not defined"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {
|
| 16 |
+
"execution_successful": 1.0,
|
| 17 |
+
"avg_dist_to_center": 0.0,
|
| 18 |
+
"mean_radius": 0.0,
|
| 19 |
+
"radius_std_dev": 0.0,
|
| 20 |
+
"total_packed_area": 0.0,
|
| 21 |
+
"min_clearance_to_boundary": 0.0,
|
| 22 |
+
"max_overlap_value": 0.0,
|
| 23 |
+
"min_radius": 0.0,
|
| 24 |
+
"boundary_contact_count": 0
|
| 25 |
+
},
|
| 26 |
+
"auxiliary_descriptions": {
|
| 27 |
+
"mean_radius": "Average radius of all circles.",
|
| 28 |
+
"radius_std_dev": "Standard deviation of radii.",
|
| 29 |
+
"total_packed_area": "Total area covered by all circles.",
|
| 30 |
+
"min_clearance_to_boundary": "Minimum distance of any circle to the unit square boundary.",
|
| 31 |
+
"max_overlap_value": "Maximum overlap between any two circles.",
|
| 32 |
+
"is_valid_packing": "Binary flag indicating if the packing is valid (no overlaps, in bounds, correct shapes).",
|
| 33 |
+
"min_radius": "The minimum radius among all circles in the packing.",
|
| 34 |
+
"avg_dist_to_center": "Average distance of circle centers to the center of the unit square, indicating compactness.",
|
| 35 |
+
"boundary_contact_count": "Number of circles touching any of the unit square boundaries, indicating effective utilization of space.",
|
| 36 |
+
"num_tangent_pairs": "Counts the number of pairs of circles that are tangent (touching without overlapping) within a small tolerance. Indicates efficient packing."
|
| 37 |
+
},
|
| 38 |
+
"timestamp": 1770932519.1297677,
|
| 39 |
+
"generation": 119
|
| 40 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (5.77 kB). View file
|
|
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/auxiliary_metrics_snapshot.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def evaluate_aux(results_dir, primary_result=None):
|
| 2 |
+
"""Return auxiliary metrics as a dict."""
|
| 3 |
+
return {}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": false,
|
| 3 |
+
"error": "NameError: name 'np' is not defined"
|
| 4 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_12/results/metrics.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 0.0,
|
| 3 |
+
"correct": false,
|
| 4 |
+
"primary": {
|
| 5 |
+
"combined_score": 0.0,
|
| 6 |
+
"execution_time_mean": 0.0,
|
| 7 |
+
"execution_time_std": 0.0,
|
| 8 |
+
"num_successful_runs": 0,
|
| 9 |
+
"num_valid_runs": 0,
|
| 10 |
+
"num_invalid_runs": 0,
|
| 11 |
+
"all_validation_errors": [],
|
| 12 |
+
"correct": false,
|
| 13 |
+
"validation_error": "NameError: name 'np' is not defined"
|
| 14 |
+
},
|
| 15 |
+
"auxiliary": {},
|
| 16 |
+
"auxiliary_descriptions": {},
|
| 17 |
+
"timestamp": 1770924589.2499666,
|
| 18 |
+
"generation": 12
|
| 19 |
+
}
|
tasks/circle_packing/results/results_circle_packing_mmv1_3_gen200_periodic10_20260212_185849/gen_121/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|