vLAR's picture
leaderboard
b4d728d
"""Task: Physical Property Estimation (placeholder, scene-level).
⚠️ Task owner TODO: implement real MAE/RMSE-style metrics.
"""
from __future__ import annotations
import json
import os
from typing import Any, Dict
from src.tasks.base import TaskPlugin
def validate_scene(scene_dir: str) -> None:
path = os.path.join(scene_dir, "prediction.json")
if not os.path.isfile(path):
raise ValueError("prediction.json is missing from the scene ZIP root")
try:
with open(path, "r", encoding="utf-8") as f:
payload = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"prediction.json is not valid JSON: {e.msg}") from e
if not isinstance(payload, dict):
raise ValueError("prediction.json must have an object at the top level")
def evaluate_scene(scene_dir: str, gt_scene: Any) -> Dict[str, float]:
with open(os.path.join(scene_dir, "prediction.json"), "r", encoding="utf-8") as f:
pred = json.load(f)
gt = gt_scene if isinstance(gt_scene, dict) else {}
abs_err = []
for k, true_v in gt.items():
p = pred.get(k)
if p is None:
continue
try:
abs_err.append(abs(float(p) - float(true_v)))
except (TypeError, ValueError):
continue
n = len(abs_err) or 1
return {
"mae": round(sum(abs_err) / n, 6),
"coverage": round(len(abs_err) / (len(gt) or 1), 6),
}
def load_gt_scene(scene_id: str, gt_dir: str) -> Any:
with open(os.path.join(gt_dir, "properties.json"), "r", encoding="utf-8") as f:
return json.load(f)
TASK = TaskPlugin(
name="task_property_estimation",
display_name="Physical Property Estimation",
description="Predict object physical properties such as mass and friction. Evaluated by MAE (lower is better).",
expected_scene_layout=(
"```\n"
"<scene_id>.zip\n"
"└── prediction.json # {\"mass\": float, \"friction\": float, ...}\n"
"```"
),
validate_scene_fn=validate_scene,
evaluate_scene_fn=evaluate_scene,
load_gt_scene_fn=load_gt_scene,
primary_metric="mae",
higher_is_better=False,
leaderboard_columns=["mae", "coverage"],
)