Ray0202 commited on
Commit
004530b
·
1 Parent(s): 4527eaf

Update space

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/results.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_name": "demo-model-1",
4
+ "agent_name": "TemporalAgent-A",
5
+ "agent_type": "single-LLM",
6
+ "base_model": "demo-base-1",
7
+ "T1_acc": 71.2,
8
+ "T2_acc": 64.5,
9
+ "T3_acc": 69.8,
10
+ "T4_acc": 62.3,
11
+ "T2_MAE": 0.41,
12
+ "T4_sMAPE": 0.22,
13
+ "Retail_T3_acc": 70.1
14
+ },
15
+ {
16
+ "model_name": "demo-model-2",
17
+ "agent_name": "TemporalAgent-B",
18
+ "agent_type": "general agent",
19
+ "base_model": "demo-base-2",
20
+ "T1_acc": 75.4,
21
+ "T2_acc": 66.7,
22
+ "T3_acc": 72.9,
23
+ "T4_acc": 65.8,
24
+ "T2_MAE": 0.38,
25
+ "T4_sMAPE": 0.20,
26
+ "MIMIC_T3_acc": 71.6
27
+ },
28
+ {
29
+ "model_name": "demo-model-3",
30
+ "agent_name": "TemporalAgent-C",
31
+ "agent_type": "time-series-specific agent",
32
+ "base_model": "demo-base-3",
33
+ "T1_acc": 69.9,
34
+ "T2_acc": 63.2,
35
+ "T3_acc": 68.4,
36
+ "T4_acc": 61.7,
37
+ "T2_MAE": 0.44,
38
+ "T4_sMAPE": 0.24,
39
+ "PSML_T3_acc": 67.9
40
+ }
41
+ ]
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/leaderboard/load_results.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import os
6
+ from typing import Iterable
7
+
8
+ import pandas as pd
9
+
10
+ from src.leaderboard.schema import SCHEMA
11
+
12
+
13
+ class ResultsValidationError(ValueError):
14
+ pass
15
+
16
+
17
+ def _is_number(value) -> bool:
18
+ if not isinstance(value, (int, float)) or isinstance(value, bool):
19
+ return False
20
+ return math.isfinite(float(value))
21
+
22
+
23
+ def _load_json_records(path: str) -> list[dict]:
24
+ with open(path, "r") as fp:
25
+ data = json.load(fp)
26
+
27
+ if isinstance(data, list):
28
+ return data
29
+ if isinstance(data, dict) and "records" in data and isinstance(data["records"], list):
30
+ return data["records"]
31
+ raise ResultsValidationError(
32
+ "JSON must be a list of records or an object with a 'records' list."
33
+ )
34
+
35
+
36
+ def _load_csv_records(path: str) -> list[dict]:
37
+ df = pd.read_csv(path)
38
+ return df.to_dict(orient="records")
39
+
40
+
41
+ def load_records(path: str) -> list[dict]:
42
+ if not os.path.exists(path):
43
+ raise ResultsValidationError(f"Results file not found: {path}")
44
+
45
+ _, ext = os.path.splitext(path)
46
+ ext = ext.lower()
47
+ if ext == ".json":
48
+ return _load_json_records(path)
49
+ if ext == ".csv":
50
+ return _load_csv_records(path)
51
+
52
+ raise ResultsValidationError("Unsupported file type. Use .json or .csv")
53
+
54
+
55
+ def validate_records(records: Iterable[dict]) -> None:
56
+ records = list(records)
57
+ if not records:
58
+ raise ResultsValidationError("Results file is empty.")
59
+
60
+ for idx, record in enumerate(records):
61
+ if not isinstance(record, dict):
62
+ raise ResultsValidationError(f"Record {idx} is not an object.")
63
+
64
+ missing = [f for f in SCHEMA.identity_fields if f not in record]
65
+ if missing:
66
+ raise ResultsValidationError(f"Record {idx} is missing fields: {missing}")
67
+
68
+ for field in SCHEMA.identity_fields:
69
+ if not isinstance(record[field], str):
70
+ raise ResultsValidationError(
71
+ f"Record {idx} field '{field}' must be a string."
72
+ )
73
+
74
+ missing_metrics = [m for m in SCHEMA.required_metrics if m not in record]
75
+ if missing_metrics:
76
+ raise ResultsValidationError(
77
+ f"Record {idx} is missing required metrics: {missing_metrics}"
78
+ )
79
+
80
+ for key, value in record.items():
81
+ if key in SCHEMA.identity_fields:
82
+ continue
83
+ if not _is_number(value):
84
+ raise ResultsValidationError(
85
+ f"Record {idx} metric '{key}' must be numeric."
86
+ )
87
+
88
+
89
+ def infer_metric_columns(records: Iterable[dict]) -> list[str]:
90
+ records = list(records)
91
+ if not records:
92
+ return []
93
+
94
+ all_keys = set()
95
+ for record in records:
96
+ all_keys.update(record.keys())
97
+ metric_keys = [k for k in all_keys if k not in SCHEMA.identity_fields]
98
+
99
+ ordered = []
100
+ for key in SCHEMA.required_metrics:
101
+ if key in metric_keys:
102
+ ordered.append(key)
103
+ for key in SCHEMA.optional_metrics:
104
+ if key in metric_keys:
105
+ ordered.append(key)
106
+
107
+ remaining = sorted([k for k in metric_keys if k not in ordered])
108
+ ordered.extend(remaining)
109
+ return ordered
110
+
111
+
112
+ def build_dataframe(records: list[dict]) -> tuple[pd.DataFrame, list[str]]:
113
+ validate_records(records)
114
+ metric_cols = infer_metric_columns(records)
115
+ column_order = list(SCHEMA.identity_fields) + metric_cols
116
+ df = pd.DataFrame.from_records(records)
117
+ df = df[column_order]
118
+ return df, column_order
src/leaderboard/schema.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class TemporalBenchSchema:
8
+ identity_fields: tuple[str, ...] = (
9
+ "model_name",
10
+ "agent_name",
11
+ "agent_type",
12
+ "base_model",
13
+ )
14
+ required_metrics: tuple[str, ...] = (
15
+ "T1_acc",
16
+ "T2_acc",
17
+ "T3_acc",
18
+ "T4_acc",
19
+ )
20
+ optional_metrics: tuple[str, ...] = (
21
+ "T2_MAE",
22
+ "T4_sMAPE",
23
+ )
24
+
25
+
26
+ SCHEMA = TemporalBenchSchema()