File size: 2,230 Bytes
54708e8
03b0173
b4d728d
03b0173
 
 
 
 
 
 
 
 
 
54708e8
 
03b0173
b4d728d
03b0173
 
 
 
b4d728d
54708e8
b4d728d
03b0173
 
54708e8
 
 
 
 
 
 
03b0173
 
 
54708e8
03b0173
 
54708e8
03b0173
54708e8
 
03b0173
 
 
54708e8
 
 
 
 
03b0173
 
 
b4d728d
54708e8
03b0173
54708e8
 
03b0173
 
54708e8
 
 
03b0173
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Task: Physical Property Estimation (placeholder, scene-level).

⚠️ Task owner TODO: implement real MAE/RMSE-style metrics.
"""
from __future__ import annotations

import json
import os
from typing import Any, Dict

from src.tasks.base import TaskPlugin


def validate_scene(scene_dir: str) -> None:
    path = os.path.join(scene_dir, "prediction.json")
    if not os.path.isfile(path):
        raise ValueError("prediction.json is missing from the scene ZIP root")
    try:
        with open(path, "r", encoding="utf-8") as f:
            payload = json.load(f)
    except json.JSONDecodeError as e:
        raise ValueError(f"prediction.json is not valid JSON: {e.msg}") from e
    if not isinstance(payload, dict):
        raise ValueError("prediction.json must have an object at the top level")


def evaluate_scene(scene_dir: str, gt_scene: Any) -> Dict[str, float]:
    with open(os.path.join(scene_dir, "prediction.json"), "r", encoding="utf-8") as f:
        pred = json.load(f)
    gt = gt_scene if isinstance(gt_scene, dict) else {}
    abs_err = []
    for k, true_v in gt.items():
        p = pred.get(k)
        if p is None:
            continue
        try:
            abs_err.append(abs(float(p) - float(true_v)))
        except (TypeError, ValueError):
            continue
    n = len(abs_err) or 1
    return {
        "mae": round(sum(abs_err) / n, 6),
        "coverage": round(len(abs_err) / (len(gt) or 1), 6),
    }


def load_gt_scene(scene_id: str, gt_dir: str) -> Any:
    with open(os.path.join(gt_dir, "properties.json"), "r", encoding="utf-8") as f:
        return json.load(f)


TASK = TaskPlugin(
    name="task_property_estimation",
    display_name="Physical Property Estimation",
    description="Predict object physical properties such as mass and friction. Evaluated by MAE (lower is better).",
    expected_scene_layout=(
        "```\n"
        "<scene_id>.zip\n"
        "└── prediction.json   # {\"mass\": float, \"friction\": float, ...}\n"
        "```"
    ),
    validate_scene_fn=validate_scene,
    evaluate_scene_fn=evaluate_scene,
    load_gt_scene_fn=load_gt_scene,
    primary_metric="mae",
    higher_is_better=False,
    leaderboard_columns=["mae", "coverage"],
)