Spaces:

vLAR
/

PhysInOne-Leaderboard

Running

File size: 2,230 Bytes

"""Task: Physical Property Estimation (placeholder, scene-level).

⚠️ Task owner TODO: implement real MAE/RMSE-style metrics.
"""
from __future__ import annotations

import json
import os
from typing import Any, Dict

from src.tasks.base import TaskPlugin


def validate_scene(scene_dir: str) -> None:
    path = os.path.join(scene_dir, "prediction.json")
    if not os.path.isfile(path):
        raise ValueError("prediction.json is missing from the scene ZIP root")
    try:
        with open(path, "r", encoding="utf-8") as f:
            payload = json.load(f)
    except json.JSONDecodeError as e:
        raise ValueError(f"prediction.json is not valid JSON: {e.msg}") from e
    if not isinstance(payload, dict):
        raise ValueError("prediction.json must have an object at the top level")


def evaluate_scene(scene_dir: str, gt_scene: Any) -> Dict[str, float]:
    with open(os.path.join(scene_dir, "prediction.json"), "r", encoding="utf-8") as f:
        pred = json.load(f)
    gt = gt_scene if isinstance(gt_scene, dict) else {}
    abs_err = []
    for k, true_v in gt.items():
        p = pred.get(k)
        if p is None:
            continue
        try:
            abs_err.append(abs(float(p) - float(true_v)))
        except (TypeError, ValueError):
            continue
    n = len(abs_err) or 1
    return {
        "mae": round(sum(abs_err) / n, 6),
        "coverage": round(len(abs_err) / (len(gt) or 1), 6),
    }


def load_gt_scene(scene_id: str, gt_dir: str) -> Any:
    with open(os.path.join(gt_dir, "properties.json"), "r", encoding="utf-8") as f:
        return json.load(f)


TASK = TaskPlugin(
    name="task_property_estimation",
    display_name="Physical Property Estimation",
    description="Predict object physical properties such as mass and friction. Evaluated by MAE (lower is better).",
    expected_scene_layout=(
        "```\n"
        "<scene_id>.zip\n"
        "└── prediction.json   # {\"mass\": float, \"friction\": float, ...}\n"
        "```"
    ),
    validate_scene_fn=validate_scene,
    evaluate_scene_fn=evaluate_scene,
    load_gt_scene_fn=load_gt_scene,
    primary_metric="mae",
    higher_is_better=False,
    leaderboard_columns=["mae", "coverage"],
)