Solves 500 Errors For Some Users

#1
by Tonic - opened
This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +1 -1
  2. .gitignore +13 -1
  3. .pre-commit-config.yaml +53 -0
  4. .streamlit/config.toml +0 -2
  5. CLAUDE.md +0 -82
  6. Dockerfile +0 -21
  7. Makefile +13 -0
  8. README.md +36 -12
  9. pages/chronos_bench_ii.py → app.py +70 -152
  10. fev-leaderboard-app.py +0 -9
  11. pages/about.py +0 -19
  12. pages/fev_bench.py +0 -219
  13. pyproject.toml +13 -12
  14. requirements.txt +8 -4
  15. save_tables.py +0 -212
  16. src/about.py +50 -0
  17. src/colors.py +0 -6
  18. src/custom_html_js.py +99 -0
  19. src/formatting.py +31 -0
  20. src/streamlit_app.py +0 -9
  21. src/strings.py +0 -114
  22. src/task_groups.py +0 -209
  23. src/utils.py +0 -374
  24. tables/domain_cloud/leaderboard_MASE.csv +0 -16
  25. tables/domain_cloud/leaderboard_SQL.csv +0 -16
  26. tables/domain_cloud/leaderboard_WAPE.csv +0 -16
  27. tables/domain_cloud/leaderboard_WQL.csv +0 -16
  28. tables/domain_cloud/pairwise_MASE.csv +0 -226
  29. tables/domain_cloud/pairwise_SQL.csv +0 -226
  30. tables/domain_cloud/pairwise_WAPE.csv +0 -226
  31. tables/domain_cloud/pairwise_WQL.csv +0 -226
  32. tables/domain_econ/leaderboard_MASE.csv +0 -16
  33. tables/domain_econ/leaderboard_SQL.csv +0 -16
  34. tables/domain_econ/leaderboard_WAPE.csv +0 -16
  35. tables/domain_econ/leaderboard_WQL.csv +0 -16
  36. tables/domain_econ/pairwise_MASE.csv +0 -226
  37. tables/domain_econ/pairwise_SQL.csv +0 -226
  38. tables/domain_econ/pairwise_WAPE.csv +0 -226
  39. tables/domain_econ/pairwise_WQL.csv +0 -226
  40. tables/domain_energy/leaderboard_MASE.csv +0 -16
  41. tables/domain_energy/leaderboard_SQL.csv +0 -16
  42. tables/domain_energy/leaderboard_WAPE.csv +0 -16
  43. tables/domain_energy/leaderboard_WQL.csv +0 -16
  44. tables/domain_energy/pairwise_MASE.csv +0 -226
  45. tables/domain_energy/pairwise_SQL.csv +0 -226
  46. tables/domain_energy/pairwise_WAPE.csv +0 -226
  47. tables/domain_energy/pairwise_WQL.csv +0 -226
  48. tables/domain_health/leaderboard_MASE.csv +0 -16
  49. tables/domain_health/leaderboard_SQL.csv +0 -16
  50. tables/domain_health/leaderboard_WAPE.csv +0 -16
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1 +1,13 @@
1
- __pycache__
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_evals/
2
+ venv/
3
+ __pycache__/
4
+ .env
5
+ .ipynb_checkpoints
6
+ *ipynb
7
+ .vscode/
8
+
9
+ eval-queue/
10
+ eval-results/
11
+ eval-queue-bk/
12
+ eval-results-bk/
13
+ logs/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ default_language_version:
16
+ python: python3
17
+
18
+ ci:
19
+ autofix_prs: true
20
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
+ autoupdate_schedule: quarterly
22
+
23
+ repos:
24
+ - repo: https://github.com/pre-commit/pre-commit-hooks
25
+ rev: v4.3.0
26
+ hooks:
27
+ - id: check-yaml
28
+ - id: check-case-conflict
29
+ - id: detect-private-key
30
+ - id: check-added-large-files
31
+ args: ['--maxkb=1000']
32
+ - id: requirements-txt-fixer
33
+ - id: end-of-file-fixer
34
+ - id: trailing-whitespace
35
+
36
+ - repo: https://github.com/PyCQA/isort
37
+ rev: 5.12.0
38
+ hooks:
39
+ - id: isort
40
+ name: Format imports
41
+
42
+ - repo: https://github.com/psf/black
43
+ rev: 22.12.0
44
+ hooks:
45
+ - id: black
46
+ name: Format code
47
+ additional_dependencies: ['click==8.0.2']
48
+
49
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
50
+ # Ruff version.
51
+ rev: 'v0.0.267'
52
+ hooks:
53
+ - id: ruff
.streamlit/config.toml DELETED
@@ -1,2 +0,0 @@
1
- [theme]
2
- base = "light"
 
 
 
CLAUDE.md DELETED
@@ -1,82 +0,0 @@
1
- # CLAUDE.md
2
-
3
- This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
-
5
- ## Project Overview
6
-
7
- fev-bench Leaderboard is a Streamlit web application displaying time series forecasting model evaluation results from the fev-bench benchmark. It evaluates 30+ forecasting models using multiple metrics (SQL, MASE, WQL, WAPE) across 100 benchmark tasks.
8
-
9
- ## Common Commands
10
-
11
- ```bash
12
- # Run the Streamlit app locally
13
- uv run streamlit run fev-leaderboard-app.py --server.port=8501 --server.address=0.0.0.0
14
-
15
- # Regenerate leaderboard tables from autogluon/fev repo (defaults to main branch)
16
- uv run python save_tables.py [commit] # e.g., uv run python save_tables.py abc123
17
-
18
- # Docker build and run
19
- docker build -t fev-leaderboard .
20
- docker run -p 8501:8501 fev-leaderboard
21
- ```
22
-
23
- Note: Use `uv run` prefix for all Python commands in this project.
24
-
25
- No test or lint frameworks are configured.
26
-
27
- ## Architecture
28
-
29
- ```
30
- fev-leaderboard-app.py # Main entry point (Streamlit multi-page router)
31
- save_tables.py # Generates pre-computed CSV tables from raw summaries
32
- pages/
33
- ├── fev_bench.py # Main leaderboard (100 tasks, loads from tables/)
34
- ├── chronos_bench_ii.py # Alternative leaderboard (27 tasks, fetches from GitHub)
35
- └── about.py # Help page with links
36
- src/
37
- ├── utils.py # Visualization, formatting, MODEL_CONFIG, color palette
38
- ├── strings.py # UI text, metric descriptions, paper citations
39
- └── task_groups.py # Task groupings by frequency and domain
40
- tables/ # Pre-generated CSVs
41
- ├── pivot_*.csv # Full pivot tables (filtered in app by task group)
42
- ├── summaries.csv # Raw evaluation summaries
43
- └── {group}/ # Subdirectories for each task group (full, mini, frequency_*, domain_*)
44
- ├── leaderboard_*.csv # Leaderboard tables per metric
45
- └── pairwise_*.csv # Pairwise comparison tables per metric
46
- ```
47
-
48
- **Data flow**: GitHub (autogluon/fev) → `save_tables.py` → pre-computed tables → `fev_bench.py` visualization
49
-
50
- ## Key Modules
51
-
52
- **`src/utils.py`**: Core module containing:
53
- - `MODEL_CONFIG`: Dict mapping model names to (huggingface_url, organization, is_zero_shot, model_type)
54
- - `ALL_METRICS`: Dict with SQL, MASE, WQL, WAPE definitions
55
- - `format_leaderboard()`, `construct_bar_chart()`, `construct_pairwise_chart()`, `construct_pivot_table()`: Styling functions
56
- - `COLORS`: Custom palette (purple, gold, silver, bronze)
57
-
58
- **`src/strings.py`**: Documentation strings for metric formulas, win rate/skill score calculations, imputation strategies
59
-
60
- ## Metrics
61
-
62
- | Metric | Type | Description |
63
- |--------|------|-------------|
64
- | SQL | Probabilistic | Scaled Quantile Loss (scale-invariant) |
65
- | MASE | Point | Mean Absolute Scaled Error (scale-invariant) |
66
- | WQL | Probabilistic | Weighted Quantile Loss (scale-dependent) |
67
- | WAPE | Point | Weighted Absolute Percentage Error (scale-dependent) |
68
-
69
- ## Model Types
70
-
71
- Models are categorized as DL (deep learning) or ST (statistical) in `MODEL_CONFIG`. This affects color-coding in visualizations (blue vs. orange).
72
-
73
- ## Imputation Strategy
74
-
75
- - **Failed tasks**: Replaced with Seasonal Naive scores
76
- - **Leaky tasks** (training corpus overlap for zero-shot models): Replaced with Chronos-Bolt scores
77
-
78
- ## External References
79
-
80
- - fev-bench paper: https://arxiv.org/abs/2509.26468
81
- - fev library docs: https://autogluon.github.io/fev/latest/
82
- - GitHub: https://github.com/autogluon/fev
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile DELETED
@@ -1,21 +0,0 @@
1
- FROM python:3.13.5-slim
2
-
3
- RUN useradd -m -u 1000 user
4
- WORKDIR /app
5
-
6
- RUN apt-get update && apt-get install -y \
7
- build-essential \
8
- curl \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- COPY --chown=user . /app
14
-
15
- RUN pip3 install -r requirements.txt
16
-
17
- EXPOSE 8501
18
-
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "fev-leaderboard-app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: style format
2
+
3
+
4
+ style:
5
+ python -m black --line-length 119 .
6
+ python -m isort .
7
+ ruff check --fix .
8
+
9
+
10
+ quality:
11
+ python -m black --check --line-length 119 .
12
+ python -m isort --check-only .
13
+ ruff check .
README.md CHANGED
@@ -1,20 +1,44 @@
1
  ---
2
- title: fev-bench
3
- emoji: 🏆
4
  colorFrom: green
5
  colorTo: indigo
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Forecast evaluation benchmark
12
  license: apache-2.0
13
  ---
14
 
15
- # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
 
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Fev Leaderboard
3
+ emoji: 🥇
4
  colorFrom: green
5
  colorTo: indigo
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: true
 
 
 
9
  license: apache-2.0
10
  ---
11
 
12
+ # Start the configuration
13
 
14
+ Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
15
 
16
+ Results files should have the following format and be stored as json files:
17
+ ```json
18
+ {
19
+ "config": {
20
+ "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
21
+ "model_name": "path of the model on the hub: org/model",
22
+ "model_sha": "revision on the hub",
23
+ },
24
+ "results": {
25
+ "task_name": {
26
+ "metric_name": score,
27
+ },
28
+ "task_name2": {
29
+ "metric_name": score,
30
+ }
31
+ }
32
+ }
33
+ ```
34
+
35
+ Request files are created automatically by this tool.
36
+
37
+ If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
38
+
39
+ # Code logic for more complex edits
40
+
41
+ You'll find
42
+ - the main table' columns names and properties in `src/display/utils.py`
43
+ - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
44
+ - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
pages/chronos_bench_ii.py → app.py RENAMED
@@ -1,41 +1,18 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- sys.path.append(str(Path(__file__).parent.parent))
5
-
6
  import fev
 
7
  import pandas as pd
8
- import streamlit as st
9
- from streamlit.elements.lib.column_types import ColumnConfig
10
-
11
- from src.strings import (
12
- CHRONOS_BENCHMARK_BASIC_INFO,
13
- CHRONOS_BENCHMARK_DETAILS,
14
- CITATION_CHRONOS,
15
- CITATION_FEV,
16
- CITATION_HEADER,
17
- PAIRWISE_BENCHMARK_DETAILS,
18
- get_pivot_legend,
19
- )
20
- from src.utils import (
21
- construct_bar_chart,
22
- construct_pairwise_chart,
23
- construct_pivot_table,
24
- format_leaderboard,
25
- format_metric_name,
26
- get_metric_description,
27
  )
28
 
29
- st.set_page_config(layout="wide", page_title="FEV Benchmark Leaderboard", page_icon=":material/trophy:")
30
 
31
- TITLE = "<h1 style='text-align: center; font-size: 350%;'>Chronos Benchmark II</h1>"
32
- BASELINE_MODEL = "seasonal_naive"
33
- LEAKAGE_IMPUTATION_MODEL = "chronos_bolt_base"
34
- SORT_COL = "win_rate"
35
- N_RESAMPLES_FOR_CI = 1000
36
- TOP_K_MODELS_TO_PLOT = 15
37
- AVAILABLE_METRICS = ["WQL", "MASE"]
38
- SUMMARY_URLS = [
39
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
40
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
41
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
@@ -58,122 +35,63 @@ SUMMARY_URLS = [
58
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/tirex.csv",
59
  ]
60
 
61
-
62
- @st.cache_data()
63
- def load_summaries():
64
- summaries = []
65
- for url in SUMMARY_URLS:
66
- df = pd.read_csv(url)
67
- summaries.append(df)
68
- return pd.concat(summaries, ignore_index=True)
69
-
70
-
71
- @st.cache_data()
72
- def get_leaderboard(metric_name: str) -> pd.DataFrame:
73
- summaries = load_summaries()
74
- lb = fev.analysis.leaderboard(
75
- summaries=summaries,
76
- metric_column=metric_name,
77
- missing_strategy="impute",
78
- baseline_model=BASELINE_MODEL,
79
- leakage_imputation_model=LEAKAGE_IMPUTATION_MODEL,
80
- )
81
- lb = lb.astype("float64").reset_index()
82
-
83
- lb["skill_score"] = lb["skill_score"] * 100
84
- lb["win_rate"] = lb["win_rate"] * 100
85
- lb["num_failures"] = lb["num_failures"] / summaries["task_name"].nunique() * 100
86
- return lb
87
-
88
-
89
- @st.cache_data()
90
- def get_pairwise(metric_name: str, included_models: list[str]) -> pd.DataFrame:
91
- if BASELINE_MODEL not in included_models:
92
- included_models = included_models + [BASELINE_MODEL]
93
- summaries = load_summaries()
94
- return (
95
- fev.analysis.pairwise_comparison(
96
- summaries,
97
- included_models=included_models,
98
- metric_column=metric_name,
99
- baseline_model=BASELINE_MODEL,
100
- missing_strategy="impute",
101
- n_resamples=N_RESAMPLES_FOR_CI,
102
- leakage_imputation_model=LEAKAGE_IMPUTATION_MODEL,
103
- )
104
- .round(3)
105
- .reset_index()
106
- )
107
-
108
-
109
- with st.sidebar:
110
- selected_metric = st.selectbox("Evaluation Metric", options=AVAILABLE_METRICS, format_func=format_metric_name)
111
- st.caption(get_metric_description(selected_metric))
112
-
113
- cols = st.columns(spec=[0.025, 0.95, 0.025])
114
-
115
- with cols[1] as main_container:
116
- st.markdown(TITLE, unsafe_allow_html=True)
117
-
118
- metric_df = get_leaderboard(selected_metric).sort_values(by=SORT_COL, ascending=False)
119
- top_k_models = metric_df.head(TOP_K_MODELS_TO_PLOT)["model_name"].tolist()
120
- pairwise_df = get_pairwise(selected_metric, included_models=top_k_models)
121
-
122
- st.markdown("## :material/trophy: Leaderboard", unsafe_allow_html=True)
123
- st.markdown(CHRONOS_BENCHMARK_BASIC_INFO, unsafe_allow_html=True)
124
- df_styled = format_leaderboard(metric_df)
125
- st.dataframe(
126
- df_styled,
127
- use_container_width=True,
128
- hide_index=True,
129
- column_config={
130
- "model_name": ColumnConfig(label="Model Name", alignment="left"),
131
- "win_rate": st.column_config.NumberColumn(label="Avg. win rate (%)", format="%.1f"),
132
- "skill_score": st.column_config.NumberColumn(label="Skill score (%)", format="%.1f"),
133
- "median_inference_time_s": st.column_config.NumberColumn(label="Median runtime (s)", format="%.1f"),
134
- "training_corpus_overlap": st.column_config.NumberColumn(label="Leakage (%)", format="%d"),
135
- "num_failures": st.column_config.NumberColumn(label="Failed tasks (%)", format="%.0f"),
136
- "zero_shot": ColumnConfig(label="Zero-shot", alignment="center"),
137
- "org": ColumnConfig(label="Organization", alignment="left"),
138
- "link": st.column_config.LinkColumn(label="Link", display_text=":material/open_in_new:"),
139
- },
140
- )
141
-
142
- with st.expander("See details"):
143
- st.markdown(CHRONOS_BENCHMARK_DETAILS, unsafe_allow_html=True)
144
-
145
- st.markdown("## :material/bar_chart: Pairwise comparison", unsafe_allow_html=True)
146
- chart_col_1, _, chart_col_2 = st.columns(spec=[0.45, 0.1, 0.45])
147
-
148
- with chart_col_1:
149
- st.altair_chart(
150
- construct_pairwise_chart(pairwise_df, col="win_rate", metric_name=selected_metric),
151
- use_container_width=True,
152
- )
153
-
154
- with chart_col_2:
155
- st.altair_chart(
156
- construct_pairwise_chart(pairwise_df, col="skill_score", metric_name=selected_metric),
157
- use_container_width=True,
158
- )
159
-
160
- with st.expander("See details"):
161
- st.markdown(PAIRWISE_BENCHMARK_DETAILS, unsafe_allow_html=True)
162
-
163
- st.markdown("## :material/table_chart: Results for individual tasks", unsafe_allow_html=True)
164
- with st.expander("Show detailed results"):
165
- st.markdown(get_pivot_legend(BASELINE_MODEL, LEAKAGE_IMPUTATION_MODEL), unsafe_allow_html=True)
166
- st.dataframe(
167
- construct_pivot_table(
168
- summaries=load_summaries(),
169
- metric_name=selected_metric,
170
- baseline_model=BASELINE_MODEL,
171
- leakage_imputation_model=LEAKAGE_IMPUTATION_MODEL,
172
- )
173
- )
174
-
175
- st.divider()
176
- st.markdown("### :material/format_quote: Citation", unsafe_allow_html=True)
177
- st.markdown(CITATION_HEADER)
178
- st.markdown(CITATION_FEV)
179
- st.markdown(CITATION_CHRONOS)
 
 
 
 
 
 
1
  import fev
2
+ import gradio as gr
3
  import pandas as pd
4
+
5
+ from src import about
6
+ from src.custom_html_js import custom_css
7
+ from src.formatting import make_clickable_model
8
+
9
+ # Load the CSV data into a pandas DataFrame
10
+ df = pd.read_csv(
11
+ "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
 
 
 
 
 
 
 
 
 
 
 
12
  )
13
 
 
14
 
15
+ summary_urls = [
 
 
 
 
 
 
 
16
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
17
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
18
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
 
35
  "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/tirex.csv",
36
  ]
37
 
38
+ rename_cols = {
39
+ "gmean_relative_error": "Average relative error",
40
+ "avg_rank": "Average rank",
41
+ "median_inference_time_s": "Median inference time (s)",
42
+ "training_corpus_overlap": "Training corpus overlap (%)",
43
+ }
44
+ selected_cols = list(rename_cols.keys())
45
+
46
+
47
+ def highlight_zeroshot(styler):
48
+ """Highlight training overlap for zero-shot models with bold green."""
49
+
50
+ def style_func(val):
51
+ if val == 0:
52
+ return "color: green; font-weight: bold"
53
+ else:
54
+ return "color: black"
55
+
56
+ return styler.map(style_func, subset=["Training corpus overlap (%)"])
57
+
58
+
59
+ leaderboards = {}
60
+ for metric in ["WQL", "MASE"]:
61
+ lb = fev.leaderboard(summary_urls, metric_column=metric)[selected_cols].rename(columns=rename_cols)
62
+ lb = lb.astype("float64").round(3).reset_index()
63
+ lb["Training corpus overlap (%)"] = (lb["Training corpus overlap (%)"] * 100).round(1)
64
+ lb["model_name"] = lb["model_name"].apply(make_clickable_model)
65
+ leaderboards[metric] = highlight_zeroshot(lb.style).format(precision=3)
66
+
67
+
68
+ with gr.Blocks(css=custom_css) as demo:
69
+ gr.HTML(about.TITLE)
70
+ gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")
71
+
72
+ with gr.Tabs(elem_classes="tab-buttons"):
73
+ with gr.Tab("🏅 Chronos Benchmark II", id=0):
74
+ with gr.Column():
75
+ gr.Markdown(about.CHRONOS_BENCHMARK, elem_classes="markdown-text")
76
+ with gr.Tabs():
77
+ with gr.Tab("📊 Probabilistic forecast (WQL)"):
78
+ gr.Markdown("""Forecast accuracy measured by Weighted Quantile Loss.""")
79
+ gr.Dataframe(
80
+ value=leaderboards["WQL"],
81
+ datatype=["markdown", "number", "number", "number"],
82
+ interactive=False,
83
+ )
84
+
85
+ with gr.Tab("📈 Point forecast (MASE)"):
86
+ gr.Markdown("""Forecast accuracy measured by Mean Absolute Scaled Error.""")
87
+ gr.Dataframe(
88
+ value=leaderboards["MASE"],
89
+ datatype=["markdown", "number", "number", "number"],
90
+ interactive=False,
91
+ )
92
+
93
+ with gr.Tab("📝 About", id=1):
94
+ gr.Markdown(about.ABOUT_LEADERBOARD)
95
+
96
+ if __name__ == "__main__":
97
+ demo.launch(ssr_mode=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fev-leaderboard-app.py DELETED
@@ -1,9 +0,0 @@
1
- import streamlit as st
2
-
3
- pages = [
4
- st.Page("pages/fev_bench.py", title="fev-bench", icon=":material/trophy:"),
5
- st.Page("pages/about.py", title="About", icon=":material/info:"),
6
- ]
7
-
8
- page = st.navigation(pages)
9
- page.run()
 
 
 
 
 
 
 
 
 
 
pages/about.py DELETED
@@ -1,19 +0,0 @@
1
- import streamlit as st
2
-
3
- ABOUT_LEADERBOARD = """
4
- ## About
5
-
6
- [**fev**](https://github.com/autogluon/fev) is a lightweight wrapper around the 🤗 [datasets](https://huggingface.co/docs/datasets/en/index) library designed to streamline
7
- benchmarking of time series forecasting models.
8
-
9
- ### 📚 Resources
10
- - **Documentation**: [Official docs](https://autogluon.github.io/fev/latest/)
11
- - **Publication**: ["fev-bench: A Realistic Benchmark for Time Series Forecasting"](https://arxiv.org/abs/2509.26468)
12
- - **Source Code**: [GitHub repository](https://github.com/autogluon/fev)
13
- - **Issues & Questions**: [GitHub Issues](https://github.com/autogluon/fev/issues)
14
-
15
- ### 🚀 Submit Your Model
16
- Ready to add your model to the leaderboard? Follow this [tutorial](https://autogluon.github.io/fev/latest/tutorials/05-add-your-model/) to evaluate your model with fev and contribute your results.
17
- """
18
- st.set_page_config(layout="wide", page_title="About FEV", page_icon=":material/info:")
19
- st.markdown(ABOUT_LEADERBOARD)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/fev_bench.py DELETED
@@ -1,219 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- sys.path.append(str(Path(__file__).parent))
5
-
6
- import fev
7
- import pandas as pd
8
- import streamlit as st
9
- from streamlit.elements.lib.column_types import ColumnConfig
10
-
11
- from src.strings import (
12
- CITATION_FEV,
13
- CITATION_HEADER,
14
- FEV_BENCHMARK_DETAILS,
15
- PAIRWISE_BENCHMARK_DETAILS,
16
- get_pivot_legend,
17
- )
18
- from src.task_groups import (
19
- ALL_TASKS,
20
- DOMAIN_GROUPS,
21
- FREQUENCY_GROUPS,
22
- MINI_TASKS,
23
- get_task_group,
24
- )
25
- from src.utils import (
26
- COLORS,
27
- construct_pairwise_chart,
28
- format_leaderboard,
29
- format_metric_name,
30
- get_metric_description,
31
- )
32
-
33
- st.set_page_config(layout="wide", page_title="fev leaderboard", page_icon=":material/trophy:")
34
-
35
- TITLE = "<h1 style='text-align: center; font-size: 350%;'>fev-bench</h1>"
36
- SORT_COL = "win_rate"
37
- AVAILABLE_METRICS = ["SQL", "MASE", "WQL", "WAPE"]
38
-
39
- # Group type options
40
- GROUP_TYPES = ["Full (100 tasks)", "Mini (20 tasks)", "By frequency", "By domain"]
41
- FREQUENCY_OPTIONS = list(FREQUENCY_GROUPS.keys())
42
- DOMAIN_OPTIONS = list(DOMAIN_GROUPS.keys())
43
-
44
- def get_subset_description(group_type: str, subgroup: str | None, num_tasks: int) -> str:
45
- """Generate a description of the current subset."""
46
- base = f"Results for various forecasting models on **{num_tasks} tasks**"
47
- if group_type == "Full (100 tasks)":
48
- subset_desc = "from the full **fev-bench** benchmark"
49
- elif group_type == "Mini (20 tasks)":
50
- subset_desc = "from the **fev-bench-mini** subset"
51
- elif group_type == "By frequency":
52
- subset_desc = f"with **{subgroup.lower()}** frequency"
53
- else: # By domain
54
- subset_desc = f"from the **{subgroup}** domain"
55
- paper_link = "[fev-bench: A Realistic Benchmark for Time Series Forecasting](https://arxiv.org/abs/2509.26468)"
56
- return f"{base} {subset_desc}, as described in the paper {paper_link}."
57
-
58
-
59
- # Mapping from UI selections to table directory names
60
- GROUP_DIR_MAPPING = {
61
- "Full (100 tasks)": "full",
62
- "Mini (20 tasks)": "mini",
63
- "Sub-hourly": "frequency_sub_hourly",
64
- "Hourly": "frequency_hourly",
65
- "Daily": "frequency_daily",
66
- "Weekly": "frequency_weekly",
67
- "Monthly+": "frequency_monthly_plus",
68
- "Energy": "domain_energy",
69
- "Nature": "domain_nature",
70
- "Cloud": "domain_cloud",
71
- "Mobility": "domain_mobility",
72
- "Econ": "domain_econ",
73
- "Health": "domain_health",
74
- "Retail": "domain_retail",
75
- }
76
-
77
-
78
- @st.cache_data()
79
- def get_leaderboard(metric_name: str, group_dir: str) -> pd.DataFrame:
80
- return pd.read_csv(f"tables/{group_dir}/leaderboard_{metric_name}.csv")
81
-
82
-
83
- @st.cache_data()
84
- def get_pairwise(metric_name: str, group_dir: str) -> pd.DataFrame:
85
- return pd.read_csv(f"tables/{group_dir}/pairwise_{metric_name}.csv")
86
-
87
-
88
- @st.cache_data()
89
- def get_pivot_table(metric_name: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
90
- pivot_df = pd.read_csv(f"tables/pivot_{metric_name}.csv")
91
- baseline_imputed = pd.read_csv(f"tables/pivot_{metric_name}_baseline_imputed.csv")
92
- leakage_imputed = pd.read_csv(f"tables/pivot_{metric_name}_leakage_imputed.csv")
93
- return pivot_df, baseline_imputed, leakage_imputed
94
-
95
-
96
- with st.sidebar:
97
- # Task group selection
98
- selected_group_type = st.selectbox("Subset", options=GROUP_TYPES)
99
-
100
- # Conditional sub-selection for frequency/domain
101
- selected_subgroup = None
102
- if selected_group_type == "By frequency":
103
- selected_subgroup = st.selectbox("Frequency", options=FREQUENCY_OPTIONS)
104
- elif selected_group_type == "By domain":
105
- selected_subgroup = st.selectbox("Domain", options=DOMAIN_OPTIONS)
106
-
107
- # Determine the directory to load tables from
108
- if selected_group_type in ["Full (100 tasks)", "Mini (20 tasks)"]:
109
- group_dir = GROUP_DIR_MAPPING[selected_group_type]
110
- task_list = ALL_TASKS if selected_group_type == "Full (100 tasks)" else MINI_TASKS
111
- else:
112
- group_dir = GROUP_DIR_MAPPING[selected_subgroup]
113
- if selected_group_type == "By frequency":
114
- task_list = FREQUENCY_GROUPS[selected_subgroup]
115
- else:
116
- task_list = DOMAIN_GROUPS[selected_subgroup]
117
-
118
- st.caption(f"{len(task_list)} tasks")
119
-
120
- st.divider()
121
-
122
- selected_metric = st.selectbox("Evaluation Metric", options=AVAILABLE_METRICS, format_func=format_metric_name)
123
- st.caption(get_metric_description(selected_metric))
124
-
125
- cols = st.columns(spec=[0.025, 0.95, 0.025])
126
-
127
- with cols[1] as main_container:
128
- st.markdown(TITLE, unsafe_allow_html=True)
129
-
130
- metric_df = get_leaderboard(selected_metric, group_dir).sort_values(by=SORT_COL, ascending=False)
131
- pairwise_df = get_pairwise(selected_metric, group_dir)
132
-
133
- st.markdown("## :material/trophy: Leaderboard", unsafe_allow_html=True)
134
- st.markdown(get_subset_description(selected_group_type, selected_subgroup, len(task_list)), unsafe_allow_html=True)
135
- df_styled = format_leaderboard(metric_df)
136
- st.dataframe(
137
- df_styled,
138
- width="stretch",
139
- hide_index=True,
140
- column_config={
141
- "model_name": ColumnConfig(label="Model Name", alignment="left"),
142
- "win_rate": st.column_config.NumberColumn(label="Avg. win rate (%)", format="%.1f"),
143
- "skill_score": st.column_config.NumberColumn(label="Skill score (%)", format="%.1f"),
144
- "median_inference_time_s_per100": st.column_config.NumberColumn(label="Median runtime (s / 100 series)", format="%.1f"),
145
- "training_corpus_overlap": st.column_config.NumberColumn(label="Leakage (%)", format="%d"),
146
- "num_failures": st.column_config.NumberColumn(label="Failed tasks (%)", format="%.0f"),
147
- "zero_shot": ColumnConfig(label="Zero-shot", alignment="center"),
148
- "org": ColumnConfig(label="Organization", alignment="left"),
149
- "link": st.column_config.LinkColumn(label="Link", display_text="🔗"),
150
- },
151
- )
152
-
153
- with st.expander("See details"):
154
- st.markdown(FEV_BENCHMARK_DETAILS, unsafe_allow_html=True)
155
-
156
- st.markdown("## :material/bar_chart: Pairwise comparison", unsafe_allow_html=True)
157
- chart_col_1, _, chart_col_2 = st.columns(spec=[0.45, 0.1, 0.45])
158
-
159
- with chart_col_1:
160
- st.altair_chart(
161
- construct_pairwise_chart(pairwise_df, col="win_rate", metric_name=selected_metric),
162
- use_container_width=True,
163
- )
164
-
165
- with chart_col_2:
166
- st.altair_chart(
167
- construct_pairwise_chart(pairwise_df, col="skill_score", metric_name=selected_metric),
168
- use_container_width=True,
169
- )
170
-
171
- with st.expander("See details"):
172
- st.markdown(PAIRWISE_BENCHMARK_DETAILS, unsafe_allow_html=True)
173
-
174
- st.markdown("## :material/table_chart: Results for individual tasks", unsafe_allow_html=True)
175
- with st.expander("Show detailed results"):
176
- st.markdown(get_pivot_legend("Seasonal Naive", "Chronos-Bolt"), unsafe_allow_html=True)
177
- pivot_df, baseline_imputed, leakage_imputed = get_pivot_table(selected_metric)
178
- pivot_df = pivot_df.set_index("Task name")
179
- baseline_imputed = baseline_imputed.set_index("Task name")
180
- leakage_imputed = leakage_imputed.set_index("Task name")
181
-
182
- # Filter pivot table to only show tasks in the selected group
183
- available_tasks = [t for t in task_list if t in pivot_df.index]
184
- pivot_df = pivot_df.loc[available_tasks]
185
- baseline_imputed = baseline_imputed.loc[available_tasks]
186
- leakage_imputed = leakage_imputed.loc[available_tasks]
187
-
188
- def style_pivot_table(errors, is_baseline_imputed, is_leakage_imputed):
189
- rank_colors = {1: COLORS["gold"], 2: COLORS["silver"], 3: COLORS["bronze"]}
190
-
191
- def highlight_by_position(styler):
192
- for row_idx in errors.index:
193
- row_ranks = errors.loc[row_idx].rank(method="min")
194
- for col_idx in errors.columns:
195
- rank = row_ranks[col_idx]
196
- style_parts = []
197
- if rank <= 3:
198
- style_parts.append(f"background-color: {rank_colors[rank]}")
199
- if is_leakage_imputed.loc[row_idx, col_idx]:
200
- style_parts.append(f"color: {COLORS['leakage_impute']}")
201
- elif is_baseline_imputed.loc[row_idx, col_idx]:
202
- style_parts.append(f"color: {COLORS['failure_impute']}")
203
- elif not style_parts:
204
- style_parts.append(f"color: {COLORS['text_default']}")
205
- if style_parts:
206
- styler = styler.map(
207
- lambda x, s="; ".join(style_parts): s,
208
- subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx],
209
- )
210
- return styler
211
-
212
- return highlight_by_position(errors.style).format(precision=3)
213
-
214
- st.dataframe(style_pivot_table(pivot_df, baseline_imputed, leakage_imputed))
215
-
216
- st.divider()
217
- st.markdown("### :material/format_quote: Citation", unsafe_allow_html=True)
218
- st.markdown(CITATION_HEADER)
219
- st.markdown(CITATION_FEV)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml CHANGED
@@ -1,12 +1,13 @@
1
- [project]
2
- name = "hf-leaderboard"
3
- version = "0.1.0"
4
- requires-python = ">=3.11"
5
- dependencies = [
6
- "altair>=6.0.0",
7
- "fev>=0.7.0",
8
- "numpy<2.2",
9
- "pyarrow<21",
10
- "scipy<1.15",
11
- "streamlit>=1.53.1",
12
- ]
 
 
1
+ [tool.ruff]
2
+ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
+ select = ["E", "F"]
4
+ ignore = ["E501"] # line too long (black is taking care of this)
5
+ line-length = 119
6
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
+
8
+ [tool.isort]
9
+ profile = "black"
10
+ line_length = 119
11
+
12
+ [tool.black]
13
+ line-length = 119
requirements.txt CHANGED
@@ -1,7 +1,11 @@
 
 
 
 
 
 
 
1
  matplotlib
2
  numpy
3
  pandas
4
- requests
5
- streamlit==1.49.1
6
- fev>=0.6.0
7
- altair>=5.5.0
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_client
7
+ huggingface-hub>=0.18.0
8
  matplotlib
9
  numpy
10
  pandas
11
+ fev==0.4.0
 
 
 
save_tables.py DELETED
@@ -1,212 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- import argparse
4
- import io
5
- import sys
6
- from pathlib import Path
7
-
8
- import requests
9
-
10
- sys.path.append(str(Path(__file__).parent))
11
-
12
- import fev
13
- import pandas as pd
14
-
15
- from src.task_groups import ALL_TASKS, DOMAIN_GROUPS, FREQUENCY_GROUPS, MINI_TASKS
16
- from src.utils import format_leaderboard
17
-
18
- GITHUB_REPO = "autogluon/fev"
19
- RESULTS_PATH = "benchmarks/fev_bench/results"
20
-
21
- # Constants from the main app
22
- BASELINE_MODEL = "Seasonal Naive"
23
- LEAKAGE_IMPUTATION_MODEL = "Chronos-Bolt"
24
- SORT_COL = "win_rate"
25
- N_RESAMPLES_FOR_CI = 1000
26
- TOP_K_MODELS_TO_PLOT = 15
27
- AVAILABLE_METRICS = ["SQL", "MASE", "WQL", "WAPE"]
28
-
29
- # All task groups to generate tables for
30
- TASK_GROUPS = {
31
- "full": ALL_TASKS,
32
- "mini": MINI_TASKS,
33
- "frequency_sub_hourly": FREQUENCY_GROUPS["Sub-hourly"],
34
- "frequency_hourly": FREQUENCY_GROUPS["Hourly"],
35
- "frequency_daily": FREQUENCY_GROUPS["Daily"],
36
- "frequency_weekly": FREQUENCY_GROUPS["Weekly"],
37
- "frequency_monthly_plus": FREQUENCY_GROUPS["Monthly+"],
38
- "domain_energy": DOMAIN_GROUPS["Energy"],
39
- "domain_nature": DOMAIN_GROUPS["Nature"],
40
- "domain_cloud": DOMAIN_GROUPS["Cloud"],
41
- "domain_mobility": DOMAIN_GROUPS["Mobility"],
42
- "domain_econ": DOMAIN_GROUPS["Econ"],
43
- "domain_health": DOMAIN_GROUPS["Health"],
44
- "domain_retail": DOMAIN_GROUPS["Retail"],
45
- }
46
-
47
-
48
- def get_csv_files_from_github(commit: str) -> list[str]:
49
- """Get list of CSV file paths from the GitHub repo at a specific commit."""
50
- api_url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{RESULTS_PATH}?ref={commit}"
51
- response = requests.get(api_url)
52
- response.raise_for_status()
53
-
54
- files = response.json()
55
- csv_files = [f["path"] for f in files if f["name"].endswith(".csv")]
56
-
57
- if not csv_files:
58
- raise FileNotFoundError(f"No CSV files found in {RESULTS_PATH} at commit {commit}")
59
-
60
- return csv_files
61
-
62
-
63
- def load_summaries_from_github(commit: str) -> pd.DataFrame:
64
- """Load and concatenate all CSV summaries from the GitHub repo at a specific commit."""
65
- csv_files = get_csv_files_from_github(commit)
66
- print(f"Found {len(csv_files)} CSV files")
67
-
68
- dfs = []
69
- for file_path in csv_files:
70
- raw_url = f"https://raw.githubusercontent.com/{GITHUB_REPO}/{commit}/{file_path}"
71
- response = requests.get(raw_url)
72
- response.raise_for_status()
73
- df = pd.read_csv(io.StringIO(response.text))
74
- dfs.append(df)
75
- print(f" Loaded: {Path(file_path).name}")
76
-
77
- return pd.concat(dfs, ignore_index=True)
78
-
79
-
80
- def compute_leaderboard(summaries: pd.DataFrame, metric_name: str) -> pd.DataFrame:
81
- lb = fev.analysis.leaderboard(
82
- summaries=summaries,
83
- metric_column=metric_name,
84
- missing_strategy="impute",
85
- baseline_model=BASELINE_MODEL,
86
- leakage_imputation_model=LEAKAGE_IMPUTATION_MODEL,
87
- normalize_time_per_n_forecasts=100,
88
- )
89
- lb = lb.astype("float64").reset_index()
90
-
91
- lb["skill_score"] = lb["skill_score"] * 100
92
- lb["win_rate"] = lb["win_rate"] * 100
93
- lb["num_failures"] = lb["num_failures"] / summaries["task_name"].nunique() * 100
94
- return lb
95
-
96
-
97
- def compute_pairwise(summaries: pd.DataFrame, metric_name: str, included_models: list[str]) -> pd.DataFrame:
98
- if BASELINE_MODEL not in included_models:
99
- included_models = included_models + [BASELINE_MODEL]
100
-
101
- return (
102
- fev.analysis.pairwise_comparison(
103
- summaries,
104
- included_models=included_models,
105
- metric_column=metric_name,
106
- baseline_model=BASELINE_MODEL,
107
- missing_strategy="impute",
108
- n_resamples=N_RESAMPLES_FOR_CI,
109
- leakage_imputation_model=LEAKAGE_IMPUTATION_MODEL,
110
- )
111
- .round(3)
112
- .reset_index()
113
- )
114
-
115
-
116
- def compute_pivot_table(summaries: pd.DataFrame, metric_name: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
117
- errors = fev.pivot_table(summaries=summaries, metric_column=metric_name, task_columns=["task_name"])
118
- train_overlap = (
119
- fev.pivot_table(summaries=summaries, metric_column="trained_on_this_dataset", task_columns=["task_name"])
120
- .fillna(False)
121
- .astype(bool)
122
- )
123
-
124
- is_imputed_baseline = errors.isna()
125
- is_leakage_imputed = train_overlap
126
-
127
- # Handle imputations
128
- errors = errors.mask(train_overlap, errors[LEAKAGE_IMPUTATION_MODEL], axis=0)
129
- for col in errors.columns:
130
- if col != BASELINE_MODEL:
131
- errors[col] = errors[col].fillna(errors[BASELINE_MODEL])
132
-
133
- errors = errors[errors.rank(axis=1).mean().sort_values().index]
134
- is_imputed_baseline = is_imputed_baseline[errors.columns]
135
- is_leakage_imputed = is_leakage_imputed[errors.columns]
136
-
137
- errors.index.rename("Task name", inplace=True)
138
- is_imputed_baseline.index.rename("Task name", inplace=True)
139
- is_leakage_imputed.index.rename("Task name", inplace=True)
140
-
141
- return errors.reset_index(), is_imputed_baseline.reset_index(), is_leakage_imputed.reset_index()
142
-
143
-
144
- def main():
145
- parser = argparse.ArgumentParser(description="Generate leaderboard tables from CSV summaries in the fev repo")
146
- parser.add_argument(
147
- "commit",
148
- nargs="?",
149
- default="main",
150
- help=f"Git commit SHA or branch name in the {GITHUB_REPO} repository (default: main)",
151
- )
152
- args = parser.parse_args()
153
-
154
- # Create tables directory
155
- tables_dir = Path("tables")
156
- tables_dir.mkdir(exist_ok=True)
157
-
158
- print(f"Loading summaries from {GITHUB_REPO} at commit {args.commit}...")
159
- summaries = load_summaries_from_github(args.commit)
160
-
161
- # Save raw summaries for on-the-fly subset computation
162
- summaries.to_csv(tables_dir / "summaries.csv", index=False)
163
- print("Saved: summaries.csv")
164
-
165
- # Generate pivot tables (full version only, at root level)
166
- for metric in AVAILABLE_METRICS:
167
- print(f"Processing pivot table for {metric}...")
168
- pivot_df, baseline_imputed, leakage_imputed = compute_pivot_table(summaries, metric)
169
- pivot_df.to_csv(tables_dir / f"pivot_{metric}.csv", index=False)
170
- baseline_imputed.to_csv(tables_dir / f"pivot_{metric}_baseline_imputed.csv", index=False)
171
- leakage_imputed.to_csv(tables_dir / f"pivot_{metric}_leakage_imputed.csv", index=False)
172
- print(f" Saved: pivot_{metric}.csv")
173
-
174
- # Generate leaderboard and pairwise tables for each task group
175
- for group_name, task_list in TASK_GROUPS.items():
176
- print(f"\nProcessing group: {group_name} ({len(task_list)} tasks)...")
177
-
178
- # Create subdirectory for this group
179
- group_dir = tables_dir / group_name
180
- group_dir.mkdir(exist_ok=True)
181
-
182
- # Filter summaries to only include tasks in this group
183
- group_summaries = summaries[summaries["task_name"].isin(task_list)]
184
-
185
- if group_summaries.empty:
186
- print(f" WARNING: No matching tasks found for group {group_name}")
187
- continue
188
-
189
- actual_tasks = group_summaries["task_name"].nunique()
190
- print(f" Found {actual_tasks} tasks in summaries")
191
-
192
- for metric in AVAILABLE_METRICS:
193
- # Compute leaderboard for this group
194
- leaderboard_df = compute_leaderboard(group_summaries, metric)
195
- leaderboard_df.to_csv(group_dir / f"leaderboard_{metric}.csv", index=False)
196
-
197
- # Get top models for pairwise comparison
198
- top_k_models = (
199
- leaderboard_df.sort_values(by=SORT_COL, ascending=False).head(TOP_K_MODELS_TO_PLOT)["model_name"].tolist()
200
- )
201
-
202
- # Compute pairwise comparison
203
- pairwise_df = compute_pairwise(group_summaries, metric, top_k_models)
204
- pairwise_df.to_csv(group_dir / f"pairwise_{metric}.csv", index=False)
205
-
206
- print(f" Saved: {group_name}/leaderboard_{metric}.csv, {group_name}/pairwise_{metric}.csv")
207
-
208
- print(f"\nAll tables saved to {tables_dir}/")
209
-
210
-
211
- if __name__ == "__main__":
212
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/about.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = """<h1 align="center" id="space-title">Forecast evaluation leaderboard</h1>"""
2
+
3
+ # What does your leaderboard evaluate?
4
+ INTRODUCTION_TEXT = """
5
+ This space hosts evaluation results for time series forecasting models.
6
+
7
+ The results are obtained using [fev](https://github.com/autogluon/fev) - a lightweight library for evaluating time series forecasting models.
8
+ """
9
+
10
+ ABOUT_LEADERBOARD = """
11
+ ## What is `fev`?
12
+
13
+ [`fev`](https://github.com/autogluon/fev) is a lightweight wrapper around the 🤗 [`datasets`](https://huggingface.co/docs/datasets/en/index) library that makes it easy to benchmark time series forecasting models.
14
+
15
+ For more information about `fev`, please check out [github.com/autogluon/fev](https://github.com/autogluon/fev).
16
+
17
+ Currently, the results in this space are a minimal proof of concept. We plan to add new benchmark datasets and tasks in the future.
18
+
19
+ ## How is `fev` different from other benchmarking tools?
20
+ Existing forecasting benchmarks usually fall into one of two categories:
21
+
22
+ - Standalone datasets without any supporting infrastructure. These provide no guarantees that the results obtained by different users are comparable. For example, changing the start date or duration of the forecast horizon totally changes the meaning of the scores.
23
+ - Bespoke end-to-end systems that combine models, datasets and forecasting tasks. Such packages usually come with lots of dependencies and assumptions, which makes extending or integrating these libraries into existing systems difficult.
24
+
25
+ `fev` aims for the middle ground - it provides the core benchmarking functionality without introducing unnecessary constraints or bloated dependencies. The library supports point & probabilistic forecasting, different types of covariates, as well as all popular forecasting metrics.
26
+
27
+
28
+ ## Submitting your model
29
+ For instructions on how to evaluate your model using `fev` and contribute your results to the leaderboard, please follow the [instructions in the GitHub repo](https://github.com/autogluon/fev/blob/main/docs/04-models.ipynb).
30
+ """
31
+
32
+ CHRONOS_BENCHMARK = """
33
+ ## Chronos Benchmark II results
34
+
35
+ This tab contains results for various forecasting models on the 27 datasets used in Benchmark II in the publication [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815).
36
+
37
+ These datasets were used for zero-shot evaluation of Chronos models (i.e., Chronos models were not trained on these datasets), but some other models did include certain datasets in their training corpus.
38
+
39
+ Each table contains the following information:
40
+
41
+ * **Average relative error**: Geometric mean of the relative errors for each task. The relative error for each task is computed as `model_error / baseline_error`.
42
+ * **Average rank**: Arithmetic mean of the ranks achieved by each model on each task.
43
+ * **Median inference time (s)**: Median of the times required to make predictions for the entire dataset (in seconds).
44
+ * **Training corpus overlap (%)**: Percentage of the datasets used in the benchmark that were included in the model's training corpus. Zero-shot models are highlighted in <span style="color:green; font-weight:bold;">green</span>.
45
+
46
+ Lower values are better for all of the above metrics.
47
+
48
+ Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot). More information for the datasets is available in [Table 3 of the paper](https://arxiv.org/abs/2403.07815).
49
+
50
+ """
src/colors.py DELETED
@@ -1,6 +0,0 @@
1
- # Legacy colors - kept for backward compatibility if needed elsewhere
2
- VERY_PALE_PURPLE = "#e8d9f3"
3
- VERY_PALE_GREEN = "#cffdbc"
4
- VERY_PALE_BLUE = "#d6fffe"
5
- DEEP_LAVENDER = "#8d5eb7"
6
- GRASS_GREEN = "#3f9b0b"
 
 
 
 
 
 
 
src/custom_html_js.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+
3
+ .markdown-text {
4
+ font-size: 20px !important;
5
+ }
6
+
7
+ """
8
+
9
+
10
+ # .tab-buttons button {
11
+ # font-size: 20px;
12
+ # }
13
+
14
+ # #citation-button span {
15
+ # font-size: 16px !important;
16
+ # }
17
+
18
+ # #citation-button textarea {
19
+ # font-size: 16px !important;
20
+ # }
21
+
22
+ # #citation-button > label > button {
23
+ # margin: 6px;
24
+ # transform: scale(1.3);
25
+ # }
26
+
27
+
28
+ # #leaderboard-table-lite {
29
+ # margin-top: 15px
30
+ # }
31
+
32
+ # #search-bar-table-box > div:first-child {
33
+ # background: none;
34
+ # border: none;
35
+ # }
36
+
37
+ # #search-bar {
38
+ # padding: 0px;
39
+ # }
40
+
41
+ # /* Hides the final AutoEvalColumn */
42
+ # #llm-benchmark-tab-table table td:last-child,
43
+ # #llm-benchmark-tab-table table th:last-child {
44
+ # display: none;
45
+ # }
46
+
47
+ # /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
48
+ # table td:first-child,
49
+ # table th:first-child {
50
+ # max-width: 400px;
51
+ # overflow: auto;
52
+ # white-space: nowrap;
53
+ # }
54
+
55
+
56
+ # #scale-logo {
57
+ # border-style: none !important;
58
+ # box-shadow: none;
59
+ # display: block;
60
+ # margin-left: auto;
61
+ # margin-right: auto;
62
+ # max-width: 600px;
63
+ # }
64
+
65
+ # #scale-logo .download {
66
+ # display: none;
67
+ # }
68
+ # #filter_type{
69
+ # border: 0;
70
+ # padding-left: 0;
71
+ # padding-top: 0;
72
+ # }
73
+ # #filter_type label {
74
+ # display: flex;
75
+ # }
76
+ # #filter_type label > span{
77
+ # margin-top: var(--spacing-lg);
78
+ # margin-right: 0.5em;
79
+ # }
80
+ # #filter_type label > .wrap{
81
+ # width: 103px;
82
+ # }
83
+ # #filter_type label > .wrap .wrap-inner{
84
+ # padding: 2px;
85
+ # }
86
+ # #filter_type label > .wrap .wrap-inner input{
87
+ # width: 1px
88
+ # }
89
+ # #filter-columns-type{
90
+ # border:0;
91
+ # padding:0.5;
92
+ # }
93
+ # #filter-columns-size{
94
+ # border:0;
95
+ # padding:0.5;
96
+ # }
97
+ # #box-filter > .form{
98
+ # border: 0
99
+ # }
src/formatting.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def model_hyperlink(link, model_name):
2
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
+
4
+
5
+ MODEL_URLS = {
6
+ "chronos_tiny": "amazon/chronos-t5-tiny",
7
+ "chronos_mini": "amazon/chronos-t5-mini",
8
+ "chronos_small": "amazon/chronos-t5-small",
9
+ "chronos_base": "amazon/chronos-t5-base",
10
+ "chronos_large": "amazon/chronos-t5-large",
11
+ "chronos_bolt_tiny": "amazon/chronos-bolt-tiny",
12
+ "chronos_bolt_mini": "amazon/chronos-bolt-mini",
13
+ "chronos_bolt_small": "amazon/chronos-bolt-small",
14
+ "chronos_bolt_base": "amazon/chronos-bolt-base",
15
+ "moirai_large": "Salesforce/moirai-1.1-R-large",
16
+ "moirai_base": "Salesforce/moirai-1.1-R-base",
17
+ "moirai_small": "Salesforce/moirai-1.1-R-small",
18
+ "timesfm": "google/timesfm-1.0-200m-pytorch",
19
+ "timesfm-2.0": "google/timesfm-2.0-500m-pytorch",
20
+ "ttm-r2": "ibm-granite/granite-timeseries-ttm-r2",
21
+ "tirex": "NX-AI/TiRex",
22
+ }
23
+
24
+
25
+ def make_clickable_model(model_name):
26
+ if model_name in MODEL_URLS:
27
+ model_path = MODEL_URLS.get(model_name)
28
+ link = f"https://huggingface.co/{model_path}"
29
+ return model_hyperlink(link, model_name)
30
+ else:
31
+ return model_name
src/streamlit_app.py DELETED
@@ -1,9 +0,0 @@
1
- import streamlit as st
2
-
3
- pages = [
4
- st.Page("../pages/fev_bench.py", title="fev-bench", icon=":material/trophy:"),
5
- st.Page("../pages/about.py", title="About", icon=":material/info:"),
6
- ]
7
-
8
- page = st.navigation(pages)
9
- page.run()
 
 
 
 
 
 
 
 
 
 
src/strings.py DELETED
@@ -1,114 +0,0 @@
1
- from src.utils import COLORS
2
-
3
- INTRODUCTION_TEXT = """
4
- This space hosts evaluation results for time series forecasting models. The results are obtained using [fev](https://github.com/autogluon/fev) - a lightweight library for evaluating time series forecasting models.
5
- """
6
-
7
- LEGEND = """
8
- """
9
-
10
- TABLE_INFO = f"""
11
- The leaderboard summarizes the performance of all models evaluated on the 100 tasks comprising **fev-bench**. More details available in the [paper](https://arxiv.org/abs/2509.26468).
12
-
13
- Model names are colored by type: <span style='color: {COLORS["dl_text"]}; font-weight: bold;'>Deep Learning</span> and <span style='color: {COLORS["st_text"]}; font-weight: bold;'>Statistical</span>.
14
-
15
- The full matrix $E_{{rj}}$ with the error of each model $j$ on task $r$ is available at the bottom of the page.
16
-
17
- * **Avg. win rate (%)**: Fraction of all possible model pairs and tasks where this model achieves lower error than the competing model. For model $j$, defined as $W_j = \\frac{{1}}{{R(M-1)}} \\sum_{{r=1}}^{{R}} \\sum_{{k \\neq j}} (\\mathbf{{1}}(E_{{rj}} < E_{{rk}}) + 0.5 \\cdot \\mathbf{{1}}(E_{{rj}} = E_{{rk}}))$ where $R$ is number of tasks, $M$ is number of models. Ties count as half-wins.
18
-
19
- Ranges from 0% (worst) to 100% (best). Higher values are better. This value changes as new models are added to the benchmark.
20
-
21
- * **Skill score (%)**: Measures how much the model reduces forecasting error compared to the Seasonal Naive baseline. Computed as $S_j = 100 \\times (1 - \\sqrt[R]{{\\prod_{{r=1}}^{{R}} E_{{rj}}/E_{{r\\beta}}}})$, where $E_{{r\\beta}}$ is baseline error on task $r$. Relative errors are clipped between 0.01 and 100 before aggregation to avoid extreme outliers. Positive values indicate better-than-baseline performance, negative values indicate worse-than-baseline performance.
22
-
23
- Higher values are better. This value does not change as new models are added to the benchmark.
24
-
25
- * **Median runtime (s)**: Median end-to-end time (training + prediction across all evaluation windows) in seconds. Note that inference times depend on hardware, batch sizes, and implementation details, so these serve as a rough guide rather than definitive performance benchmarks.
26
-
27
- * **Leakage (%)**: For zero-shot models, percentage of benchmark datasets included in the model's training corpus. Results for tasks with reported overlap are replaced with Chronos-Bolt (Base) performance to prevent data leakage.
28
-
29
- * **Failed tasks (%)**: Percentage of tasks where the model failed to produce a forecast. Results for failed tasks are replaced with Seasonal Naive performance.
30
-
31
- * **Zero-shot**: Indicates whether the model can make predictions without task-specific training (✓ = zero-shot, × = task-specific).
32
- """
33
-
34
- CHRONOS_BENCHMARK_BASIC_INFO = f"""
35
- **Chronos Benchmark II** contains results for various forecasting models on the 27 datasets used in Benchmark II in the paper [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815). {LEGEND}
36
- """
37
-
38
- CHRONOS_BENCHMARK_DETAILS = f"""
39
- {TABLE_INFO}
40
-
41
- Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot). More information for the datasets is available in [Table 3 of the paper](https://arxiv.org/abs/2403.07815).
42
- """
43
-
44
- FEV_BENCHMARK_BASIC_INFO = f"""
45
- Results for various forecasting models on 100 tasks of the **fev-bench** benchmark, as described in the paper [fev-bench: A Realistic Benchmark for Time Series Forecasting](https://arxiv.org/abs/2509.26468). {LEGEND}
46
- """
47
-
48
- FEV_BENCHMARK_DETAILS = f"""
49
- {TABLE_INFO}
50
-
51
- Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/). Datasets used for evaluation are available on [Hugging Face](https://huggingface.co/datasets/autogluon/fev_datasets).
52
- """
53
-
54
- CITATION_HEADER = """
55
- If you find this leaderboard useful for your research, please consider citing the associated paper(s):
56
-
57
- """
58
- CITATION_FEV = """
59
- ```
60
- @article{shchur2025fev,
61
- title={{fev-bench}: A Realistic Benchmark for Time Series Forecasting},
62
- author={Shchur, Oleksandr and Ansari, Abdul Fatir and Turkmen, Caner and Stella, Lorenzo and Erickson, Nick and Guerron, Pablo and Bohlke-Schneider, Michael and Wang, Yuyang},
63
- year={2025},
64
- eprint={2509.26468},
65
- archivePrefix={arXiv},
66
- primaryClass={cs.LG}
67
- }
68
- ```
69
- """
70
-
71
-
72
- def get_pivot_legend(baseline_model: str, leakage_imputation_model: str) -> str:
73
- return f"""
74
- Task definitions and raw results in CSV format are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/fev_bench).
75
-
76
- Best results for each task are marked with
77
- <span style='background: {COLORS["gold"]}; color: {COLORS["text_default"]}; padding: 3px; border-radius: 5px;'>🥇 1st</span>
78
- <span style='background: {COLORS["silver"]}; color: {COLORS["text_default"]}; padding: 3px; border-radius: 5px;'>🥈 2nd</span>
79
- <span style='background: {COLORS["bronze"]}; color: {COLORS["text_default"]}; padding: 3px; border-radius: 5px;'>🥉 3rd</span>
80
- <br><br>
81
- **Imputation:**
82
- - <span style='color: {COLORS["failure_impute"]}; font-weight: bold;'>Failed tasks</span> imputed by {baseline_model}
83
- - <span style='color: {COLORS["leakage_impute"]}; font-weight: bold;'>Leaky tasks</span> imputed by {leakage_imputation_model}
84
- """
85
-
86
-
87
- PAIRWISE_BENCHMARK_DETAILS = """
88
- The pairwise charts show head-to-head results between models:
89
-
90
- * **Win rate**: Percentage of tasks where Model 1 achieves lower error than Model 2 (ties count as half-wins).
91
- A value above 50% means Model 1 is more accurate than Model 2 on average.
92
-
93
- * **Skill score**: Average relative error reduction of Model 1 with respect to Model 2.
94
- A positive value means Model 1 reduces forecasting error compared to Model 2 on average.
95
-
96
- **Confidence Intervals**: 95% intervals are estimated using 1000 bootstrap samples over tasks.
97
- For each bootstrap sample, tasks are resampled with replacement and the pairwise win rate / skill score are recomputed.
98
- The intervals correspond to the 2.5th and 97.5th percentiles of these bootstrap distributions,
99
- capturing how model comparisons vary under alternative benchmark compositions.
100
- """
101
-
102
-
103
- CITATION_CHRONOS = """
104
- ```
105
- @article{ansari2024chronos,
106
- title={Chronos: Learning the Language of Time Series},
107
- author={Ansari, Abdul Fatir and Stella, Lorenzo and Turkmen, Caner and Zhang, Xiyuan, and Mercado, Pedro and Shen, Huibin and Shchur, Oleksandr and Rangapuram, Syama Syndar and Pineda Arango, Sebastian and Kapoor, Shubham and Zschiegner, Jasper and Maddix, Danielle C. and Wang, Hao and Mahoney, Michael W. and Torkkola, Kari and Gordon Wilson, Andrew and Bohlke-Schneider, Michael and Wang, Yuyang},
108
- journal={Transactions on Machine Learning Research},
109
- issn={2835-8856},
110
- year={2024},
111
- url={https://openreview.net/forum?id=gerNCVqqtR}
112
- }
113
- ```
114
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/task_groups.py DELETED
@@ -1,209 +0,0 @@
1
- """Task groupings for filtering the leaderboard by subsets."""
2
-
3
- # All tasks in the benchmark (100 tasks)
4
- ALL_TASKS = [
5
- "ETT_15T", "ETT_1D", "ETT_1H", "ETT_1W",
6
- "LOOP_SEATTLE_1D", "LOOP_SEATTLE_1H", "LOOP_SEATTLE_5T",
7
- "M_DENSE_1D", "M_DENSE_1H",
8
- "SZ_TAXI_15T", "SZ_TAXI_1H",
9
- "australian_tourism",
10
- "bizitobs_l2c_1H", "bizitobs_l2c_5T",
11
- "boomlet_1062", "boomlet_1209", "boomlet_1225", "boomlet_1230", "boomlet_1282",
12
- "boomlet_1487", "boomlet_1631", "boomlet_1676", "boomlet_1855", "boomlet_1975",
13
- "boomlet_2187", "boomlet_285", "boomlet_619", "boomlet_772", "boomlet_963",
14
- "ecdc_ili",
15
- "entsoe_15T", "entsoe_1H", "entsoe_30T",
16
- "epf_be", "epf_de", "epf_fr", "epf_np", "epf_pjm",
17
- "ercot_1D", "ercot_1H", "ercot_1M", "ercot_1W",
18
- "favorita_stores_1D", "favorita_stores_1M", "favorita_stores_1W",
19
- "favorita_transactions_1D", "favorita_transactions_1M", "favorita_transactions_1W",
20
- "fred_md_2025/cee", "fred_md_2025/macro",
21
- "fred_qd_2025/cee", "fred_qd_2025/macro",
22
- "gvar",
23
- "hermes",
24
- "hierarchical_sales_1D", "hierarchical_sales_1W",
25
- "hospital", "hospital_admissions_1D", "hospital_admissions_1W",
26
- "jena_weather_10T", "jena_weather_1D", "jena_weather_1H",
27
- "kdd_cup_2022_10T", "kdd_cup_2022_1D", "kdd_cup_2022_30T",
28
- "m5_1D", "m5_1M", "m5_1W",
29
- "proenfo_gfc12", "proenfo_gfc14", "proenfo_gfc17",
30
- "redset_15T", "redset_1H", "redset_5T",
31
- "restaurant",
32
- "rohlik_orders_1D", "rohlik_orders_1W", "rohlik_sales_1D", "rohlik_sales_1W",
33
- "rossmann_1D", "rossmann_1W",
34
- "solar_1D", "solar_1W", "solar_with_weather_15T", "solar_with_weather_1H",
35
- "uci_air_quality_1D", "uci_air_quality_1H",
36
- "uk_covid_nation_1D/cumulative", "uk_covid_nation_1D/new",
37
- "uk_covid_nation_1W/cumulative", "uk_covid_nation_1W/new",
38
- "uk_covid_utla_1D/new", "uk_covid_utla_1W/cumulative",
39
- "us_consumption_1M", "us_consumption_1Q", "us_consumption_1Y",
40
- "walmart",
41
- "world_co2_emissions", "world_life_expectancy", "world_tourism",
42
- ]
43
-
44
- # Mini benchmark - representative subset (20 tasks)
45
- MINI_TASKS = [
46
- "jena_weather_1H",
47
- "M_DENSE_1D",
48
- "bizitobs_l2c_5T",
49
- "rohlik_orders_1D",
50
- "boomlet_1282",
51
- "rossmann_1D",
52
- "rossmann_1W",
53
- "boomlet_1676",
54
- "solar_with_weather_1H",
55
- "boomlet_619",
56
- "uci_air_quality_1H",
57
- "uk_covid_nation_1D/cumulative",
58
- "us_consumption_1Y",
59
- "epf_np",
60
- "world_co2_emissions",
61
- "ETT_15T",
62
- "ETT_1H",
63
- "proenfo_gfc14",
64
- "hospital_admissions_1D",
65
- "hospital_admissions_1W",
66
- ]
67
-
68
- # Frequency-based groupings
69
- FREQUENCY_GROUPS = {
70
- "Sub-hourly": [
71
- # T (1 minute)
72
- "boomlet_1225", "boomlet_1282", "boomlet_285", "boomlet_619", "boomlet_772", "boomlet_963",
73
- # 5T (5 minutes)
74
- "LOOP_SEATTLE_5T", "bizitobs_l2c_5T", "redset_5T",
75
- "boomlet_1062", "boomlet_1209", "boomlet_1230", "boomlet_1487",
76
- # 10T (10 minutes)
77
- "jena_weather_10T", "kdd_cup_2022_10T",
78
- # 15T (15 minutes)
79
- "ETT_15T", "SZ_TAXI_15T", "entsoe_15T", "redset_15T", "solar_with_weather_15T",
80
- # 30T (30 minutes)
81
- "entsoe_30T", "kdd_cup_2022_30T", "boomlet_1631", "boomlet_1676",
82
- ],
83
- "Hourly": [
84
- "ETT_1H", "LOOP_SEATTLE_1H", "M_DENSE_1H", "SZ_TAXI_1H",
85
- "bizitobs_l2c_1H", "entsoe_1H", "ercot_1H",
86
- "epf_be", "epf_de", "epf_fr", "epf_np", "epf_pjm",
87
- "jena_weather_1H",
88
- "proenfo_gfc12", "proenfo_gfc14", "proenfo_gfc17",
89
- "redset_1H", "solar_with_weather_1H", "uci_air_quality_1H",
90
- "boomlet_1855", "boomlet_1975", "boomlet_2187",
91
- ],
92
- "Daily": [
93
- "ETT_1D", "LOOP_SEATTLE_1D", "M_DENSE_1D",
94
- "ercot_1D", "kdd_cup_2022_1D", "solar_1D",
95
- "favorita_stores_1D", "favorita_transactions_1D",
96
- "hierarchical_sales_1D", "m5_1D",
97
- "restaurant",
98
- "rohlik_orders_1D", "rohlik_sales_1D", "rossmann_1D",
99
- "jena_weather_1D", "uci_air_quality_1D",
100
- "hospital_admissions_1D",
101
- "uk_covid_nation_1D/cumulative", "uk_covid_nation_1D/new", "uk_covid_utla_1D/new",
102
- ],
103
- "Weekly": [
104
- "ETT_1W", "ercot_1W", "solar_1W",
105
- "favorita_stores_1W", "favorita_transactions_1W",
106
- "hierarchical_sales_1W", "m5_1W",
107
- "hermes", "walmart",
108
- "rohlik_orders_1W", "rohlik_sales_1W", "rossmann_1W",
109
- "ecdc_ili",
110
- "hospital_admissions_1W",
111
- "uk_covid_nation_1W/cumulative", "uk_covid_nation_1W/new", "uk_covid_utla_1W/cumulative",
112
- ],
113
- "Monthly+": [
114
- # Monthly
115
- "ercot_1M",
116
- "favorita_stores_1M", "favorita_transactions_1M", "m5_1M",
117
- "fred_md_2025/cee", "fred_md_2025/macro",
118
- "hospital",
119
- "us_consumption_1M",
120
- # Quarterly
121
- "australian_tourism", "gvar",
122
- "fred_qd_2025/cee", "fred_qd_2025/macro",
123
- "us_consumption_1Q",
124
- # Yearly
125
- "us_consumption_1Y",
126
- "world_co2_emissions", "world_life_expectancy", "world_tourism",
127
- ],
128
- }
129
-
130
- # Domain-based groupings
131
- DOMAIN_GROUPS = {
132
- "Energy": [
133
- "ETT_15T", "ETT_1D", "ETT_1H", "ETT_1W",
134
- "entsoe_15T", "entsoe_1H", "entsoe_30T",
135
- "epf_be", "epf_de", "epf_fr", "epf_np", "epf_pjm",
136
- "ercot_1D", "ercot_1H", "ercot_1M", "ercot_1W",
137
- "kdd_cup_2022_10T", "kdd_cup_2022_1D", "kdd_cup_2022_30T",
138
- "proenfo_gfc12", "proenfo_gfc14", "proenfo_gfc17",
139
- "solar_1D", "solar_1W", "solar_with_weather_15T", "solar_with_weather_1H",
140
- ],
141
- "Retail": [
142
- "favorita_stores_1D", "favorita_stores_1M", "favorita_stores_1W",
143
- "favorita_transactions_1D", "favorita_transactions_1M", "favorita_transactions_1W",
144
- "hermes",
145
- "hierarchical_sales_1D", "hierarchical_sales_1W",
146
- "m5_1D", "m5_1M", "m5_1W",
147
- "restaurant",
148
- "rohlik_orders_1D", "rohlik_orders_1W", "rohlik_sales_1D", "rohlik_sales_1W",
149
- "rossmann_1D", "rossmann_1W",
150
- "walmart",
151
- ],
152
- "Nature": [
153
- "jena_weather_10T", "jena_weather_1D", "jena_weather_1H",
154
- "uci_air_quality_1D", "uci_air_quality_1H",
155
- ],
156
- "Cloud": [
157
- "bizitobs_l2c_1H", "bizitobs_l2c_5T",
158
- "boomlet_1062", "boomlet_1209", "boomlet_1225", "boomlet_1230", "boomlet_1282",
159
- "boomlet_1487", "boomlet_1631", "boomlet_1676", "boomlet_1855", "boomlet_1975",
160
- "boomlet_2187", "boomlet_285", "boomlet_619", "boomlet_772", "boomlet_963",
161
- "redset_15T", "redset_1H", "redset_5T",
162
- ],
163
- "Health": [
164
- "ecdc_ili",
165
- "hospital", "hospital_admissions_1D", "hospital_admissions_1W",
166
- "uk_covid_nation_1D/cumulative", "uk_covid_nation_1D/new",
167
- "uk_covid_nation_1W/cumulative", "uk_covid_nation_1W/new",
168
- "uk_covid_utla_1D/new", "uk_covid_utla_1W/cumulative",
169
- ],
170
- "Econ": [
171
- "australian_tourism",
172
- "fred_md_2025/cee", "fred_md_2025/macro",
173
- "fred_qd_2025/cee", "fred_qd_2025/macro",
174
- "gvar",
175
- "us_consumption_1M", "us_consumption_1Q", "us_consumption_1Y",
176
- "world_co2_emissions", "world_life_expectancy", "world_tourism",
177
- ],
178
- "Mobility": [
179
- "LOOP_SEATTLE_1D", "LOOP_SEATTLE_1H", "LOOP_SEATTLE_5T",
180
- "M_DENSE_1D", "M_DENSE_1H",
181
- "SZ_TAXI_15T", "SZ_TAXI_1H",
182
- ],
183
- }
184
-
185
-
186
- def get_task_group(group_type: str, group_value: str | None = None) -> list[str]:
187
- """Get the list of tasks for a given group type and value.
188
-
189
- Args:
190
- group_type: One of "full", "mini", "frequency", "domain"
191
- group_value: Required for "frequency" and "domain" types
192
-
193
- Returns:
194
- List of task names belonging to the group
195
- """
196
- if group_type == "full":
197
- return ALL_TASKS
198
- elif group_type == "mini":
199
- return MINI_TASKS
200
- elif group_type == "frequency":
201
- if group_value is None:
202
- raise ValueError("group_value required for frequency grouping")
203
- return FREQUENCY_GROUPS[group_value]
204
- elif group_type == "domain":
205
- if group_value is None:
206
- raise ValueError("group_value required for domain grouping")
207
- return DOMAIN_GROUPS[group_value]
208
- else:
209
- raise ValueError(f"Unknown group_type: {group_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils.py DELETED
@@ -1,374 +0,0 @@
1
- import altair as alt
2
- import fev
3
- import pandas as pd
4
- import pandas.io.formats.style
5
-
6
- # Color constants - all colors defined in one place
7
-
8
- COLORS = {
9
- "dl_text": "#5A7FA5",
10
- "st_text": "#A5795A",
11
- # "st_text": "#666666",
12
- "bar_fill": "#8d5eb7",
13
- "error_bar": "#222222",
14
- "point": "#111111",
15
- "text_white": "white",
16
- "text_black": "black",
17
- "text_default": "#111",
18
- "gold": "#F7D36B",
19
- "silver": "#E5E7EB",
20
- "bronze": "#E6B089",
21
- "leakage_impute": "#3B82A0",
22
- "failure_impute": "#E07B39",
23
- }
24
- HEATMAP_COLOR_SCHEME = "purplegreen"
25
-
26
- # Model configuration: (url, org, zero_shot, model_type)
27
- MODEL_CONFIG = {
28
- # Chronos Models
29
- "chronos_tiny": ("amazon/chronos-t5-tiny", "AWS", True, "DL"),
30
- "chronos_mini": ("amazon/chronos-t5-mini", "AWS", True, "DL"),
31
- "chronos_small": ("amazon/chronos-t5-small", "AWS", True, "DL"),
32
- "chronos_base": ("amazon/chronos-t5-base", "AWS", True, "DL"),
33
- "chronos_large": ("amazon/chronos-t5-large", "AWS", True, "DL"),
34
- "chronos_bolt_tiny": ("amazon/chronos-bolt-tiny", "AWS", True, "DL"),
35
- "chronos_bolt_mini": ("amazon/chronos-bolt-mini", "AWS", True, "DL"),
36
- "chronos_bolt_small": ("amazon/chronos-bolt-small", "AWS", True, "DL"),
37
- "chronos_bolt_base": ("amazon/chronos-bolt-base", "AWS", True, "DL"),
38
- "chronos-bolt": ("amazon/chronos-bolt-base", "AWS", True, "DL"),
39
- "chronos-2": ("amazon/chronos-2", "AWS", True, "DL"),
40
- # Moirai Models
41
- "moirai_large": ("Salesforce/moirai-1.1-R-large", "Salesforce", True, "DL"),
42
- "moirai_base": ("Salesforce/moirai-1.1-R-base", "Salesforce", True, "DL"),
43
- "moirai_small": ("Salesforce/moirai-1.1-R-small", "Salesforce", True, "DL"),
44
- "moirai-2.0": ("Salesforce/moirai-2.0-R-small", "Salesforce", True, "DL"),
45
- # TimesFM Models
46
- "timesfm": ("google/timesfm-1.0-200m-pytorch", "Google", True, "DL"),
47
- "timesfm-2.0": ("google/timesfm-2.0-500m-pytorch", "Google", True, "DL"),
48
- "timesfm-2.5": ("google/timesfm-2.5-200m-pytorch", "Google", True, "DL"),
49
- # Toto Models
50
- "toto-1.0": ("Datadog/Toto-Open-Base-1.0", "Datadog", True, "DL"),
51
- # Other Models
52
- "tirex": ("NX-AI/TiRex", "NX-AI", True, "DL"),
53
- "tabpfn-ts": ("Prior-Labs/TabPFN-v2-reg", "Prior Labs", True, "DL"),
54
- "sundial-base": ("thuml/sundial-base-128m", "Tsinghua University", True, "DL"),
55
- "ttm-r2": ("ibm-granite/granite-timeseries-ttm-r2", "IBM", True, "DL"),
56
- # Task-specific models
57
- "stat. ensemble": (
58
- "https://nixtlaverse.nixtla.io/statsforecast/",
59
- "—",
60
- False,
61
- "ST",
62
- ),
63
- "autoarima": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
64
- "autotheta": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
65
- "autoets": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
66
- "seasonalnaive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
67
- "seasonal naive": (
68
- "https://nixtlaverse.nixtla.io/statsforecast/",
69
- "—",
70
- False,
71
- "ST",
72
- ),
73
- "drift": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
74
- "naive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
75
- }
76
-
77
-
78
- ALL_METRICS = {
79
- "SQL": (
80
- "SQL: Scaled Quantile Loss",
81
- "The [Scaled Quantile Loss (SQL)](https://auto.gluon.ai/dev/tutorials/timeseries/forecasting-metrics.html#autogluon.timeseries.metrics.SQL) is a **scale-invariant** metric for evaluating **probabilistic** forecasts.",
82
- ),
83
- "MASE": (
84
- "MASE: Mean Absolute Scaled Error",
85
- "The [Mean Absolute Scaled Error (MASE)](https://auto.gluon.ai/dev/tutorials/timeseries/forecasting-metrics.html#autogluon.timeseries.metrics.MASE) is a **scale-invariant** metric for evaluating **point** forecasts.",
86
- ),
87
- "WQL": (
88
- "WQL: Weighted Quantile Loss",
89
- "The [Weighted Quantile Loss (WQL)](https://auto.gluon.ai/dev/tutorials/timeseries/forecasting-metrics.html#autogluon.timeseries.metrics.WQL), is a **scale-dependent** metric for evaluating **probabilistic** forecasts.",
90
- ),
91
- "WAPE": (
92
- "WAPE: Weighted Absolute Percentage Error",
93
- "The [Weighted Absolute Percentage Error (WAPE)](https://auto.gluon.ai/dev/tutorials/timeseries/forecasting-metrics.html#autogluon.timeseries.metrics.WAPE) is a **scale-dependent** metric for evaluating **point** forecasts.",
94
- ),
95
- }
96
-
97
-
98
- def format_metric_name(metric_name: str):
99
- return ALL_METRICS[metric_name][0]
100
-
101
-
102
- def get_metric_description(metric_name: str):
103
- return ALL_METRICS[metric_name][1]
104
-
105
-
106
- def get_model_link(model_name):
107
- config = MODEL_CONFIG.get(model_name.lower())
108
- if not config or not config[0]:
109
- return ""
110
- url = config[0]
111
- return url if url.startswith("https:") else f"https://huggingface.co/{url}"
112
-
113
-
114
- def get_model_organization(model_name):
115
- config = MODEL_CONFIG.get(model_name.lower())
116
- return config[1] if config else "—"
117
-
118
-
119
- def get_zero_shot_status(model_name):
120
- config = MODEL_CONFIG.get(model_name.lower())
121
- return "✓" if config and config[2] else "×"
122
-
123
-
124
- def get_model_type(model_name):
125
- config = MODEL_CONFIG.get(model_name.lower())
126
- return config[3] if config else "—"
127
-
128
-
129
- def highlight_model_type_color(cell):
130
- config = MODEL_CONFIG.get(cell.lower())
131
- if config:
132
- color = COLORS["dl_text"] if config[3] == "DL" else COLORS["st_text"]
133
- return f"font-weight: bold; color: {color}"
134
- return "font-weight: bold"
135
-
136
-
137
- def format_leaderboard(df: pd.DataFrame):
138
- df = df.copy()
139
- df["skill_score"] = df["skill_score"].round(1)
140
- df["win_rate"] = df["win_rate"].round(1)
141
- df["zero_shot"] = df["model_name"].apply(get_zero_shot_status)
142
- # Format leakage column: convert to int for all models, 0 for non-zero-shot
143
- df["training_corpus_overlap"] = df.apply(
144
- lambda row: int(round(row["training_corpus_overlap"] * 100)) if row["zero_shot"] == "✓" else 0,
145
- axis=1,
146
- )
147
- df["link"] = df["model_name"].apply(get_model_link)
148
- df["org"] = df["model_name"].apply(get_model_organization)
149
- df = df[
150
- [
151
- "model_name",
152
- "win_rate",
153
- "skill_score",
154
- "median_inference_time_s_per100",
155
- "training_corpus_overlap",
156
- "num_failures",
157
- "zero_shot",
158
- "org",
159
- "link",
160
- ]
161
- ]
162
- return (
163
- df.style.map(highlight_model_type_color, subset=["model_name"])
164
- .map(lambda x: "font-weight: bold", subset=["zero_shot"])
165
- .apply(
166
- lambda x: ["background-color: #f8f9fa" if i % 2 == 1 else "" for i in range(len(x))],
167
- axis=0,
168
- )
169
- )
170
-
171
-
172
- def construct_bar_chart(df: pd.DataFrame, col: str, metric_name: str):
173
- label = "Skill Score" if col == "skill_score" else "Win Rate"
174
-
175
- tooltip = [
176
- alt.Tooltip("model_name:N"),
177
- alt.Tooltip(f"{col}:Q", format=".2f"),
178
- alt.Tooltip(f"{col}_lower:Q", title="95% CI Lower", format=".2f"),
179
- alt.Tooltip(f"{col}_upper:Q", title="95% CI Upper", format=".2f"),
180
- ]
181
-
182
- base_encode = {
183
- "y": alt.Y("model_name:N", title="Forecasting Model", sort=None),
184
- "tooltip": tooltip,
185
- }
186
-
187
- bars = (
188
- alt.Chart(df)
189
- .mark_bar(color=COLORS["bar_fill"], cornerRadius=4)
190
- .encode(
191
- x=alt.X(f"{col}:Q", title=f"{label} (%)", scale=alt.Scale(zero=False)),
192
- **base_encode,
193
- )
194
- )
195
-
196
- error_bars = (
197
- alt.Chart(df)
198
- .mark_errorbar(ticks={"height": 5}, color=COLORS["error_bar"])
199
- .encode(
200
- y=alt.Y("model_name:N", title=None, sort=None),
201
- x=alt.X(f"{col}_lower:Q", title=f"{label} (%)"),
202
- x2=alt.X2(f"{col}_upper:Q"),
203
- tooltip=tooltip,
204
- )
205
- )
206
-
207
- points = (
208
- alt.Chart(df)
209
- .mark_point(filled=True, color=COLORS["point"])
210
- .encode(x=alt.X(f"{col}:Q", title=f"{label} (%)"), **base_encode)
211
- )
212
-
213
- return (
214
- (bars + error_bars + points)
215
- .properties(height=500, title=f"{label} ({metric_name}) with 95% CIs")
216
- .configure_title(fontSize=16)
217
- )
218
-
219
-
220
- def construct_pairwise_chart(df: pd.DataFrame, col: str, metric_name: str):
221
- config = {
222
- "win_rate": ("Win Rate", [0, 100], 50, f"abs(datum.{col} - 50) > 30"),
223
- "skill_score": ("Skill Score", [-15, 15], 0, f"abs(datum.{col}) > 10"),
224
- }
225
- cbar_label, domain, domain_mid, text_condition = config[col]
226
-
227
- df = df.copy()
228
- for c in [col, f"{col}_lower", f"{col}_upper"]:
229
- df[c] *= 100
230
-
231
- model_order = df.groupby("model_1")[col].mean().sort_values(ascending=False).index.tolist()
232
-
233
- tooltip = [
234
- alt.Tooltip("model_1:N", title="Model 1"),
235
- alt.Tooltip("model_2:N", title="Model 2"),
236
- alt.Tooltip(f"{col}:Q", title=cbar_label.split(" ")[0], format=".1f"),
237
- alt.Tooltip(f"{col}_lower:Q", title="95% CI Lower", format=".1f"),
238
- alt.Tooltip(f"{col}_upper:Q", title="95% CI Upper", format=".1f"),
239
- ]
240
-
241
- base = alt.Chart(df).encode(
242
- x=alt.X(
243
- "model_2:N",
244
- sort=model_order,
245
- title="Model 2",
246
- axis=alt.Axis(orient="top", labelAngle=-90),
247
- ),
248
- y=alt.Y("model_1:N", sort=model_order, title="Model 1"),
249
- )
250
-
251
- heatmap = base.mark_rect().encode(
252
- color=alt.Color(
253
- f"{col}:Q",
254
- legend=None,
255
- scale=alt.Scale(
256
- scheme=HEATMAP_COLOR_SCHEME,
257
- domain=domain,
258
- domainMid=domain_mid,
259
- clamp=True,
260
- ),
261
- ),
262
- tooltip=tooltip,
263
- )
264
-
265
- text_main = base.mark_text(dy=-8, fontSize=8, baseline="top", yOffset=5).encode(
266
- text=alt.Text(f"{col}:Q", format=".1f"),
267
- color=alt.condition(
268
- text_condition,
269
- alt.value(COLORS["text_white"]),
270
- alt.value(COLORS["text_black"]),
271
- ),
272
- tooltip=tooltip,
273
- )
274
-
275
- return (
276
- (heatmap + text_main)
277
- .properties(
278
- height=550,
279
- title={
280
- "text": f"Pairwise {cbar_label} ({metric_name}) with 95% CIs",
281
- "fontSize": 16,
282
- },
283
- )
284
- .configure_axis(labelFontSize=11, titleFontSize=13, titleFontWeight="bold")
285
- .resolve_scale(color="independent")
286
- )
287
-
288
-
289
- def construct_pivot_table_from_df(errors: pd.DataFrame, metric_name: str) -> pd.io.formats.style.Styler:
290
- """Construct styled pivot table from precomputed DataFrame."""
291
-
292
- def highlight_by_position(styler):
293
- rank_colors = {1: COLORS["gold"], 2: COLORS["silver"], 3: COLORS["bronze"]}
294
-
295
- for row_idx in errors.index:
296
- row_ranks = errors.loc[row_idx].rank(method="min")
297
- for col_idx in errors.columns:
298
- rank = row_ranks[col_idx]
299
- style_parts = []
300
-
301
- # Rank background colors
302
- if rank <= 3:
303
- style_parts.append(f"background-color: {rank_colors[rank]}")
304
- else:
305
- style_parts.append(f"color: {COLORS['text_default']}")
306
-
307
- if style_parts:
308
- styler = styler.map(
309
- lambda x, s="; ".join(style_parts): s,
310
- subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx],
311
- )
312
- return styler
313
-
314
- return highlight_by_position(errors.style).format(precision=3)
315
-
316
-
317
- def construct_pivot_table(
318
- summaries: pd.DataFrame,
319
- metric_name: str,
320
- baseline_model: str,
321
- leakage_imputation_model: str,
322
- ) -> pd.io.formats.style.Styler:
323
- errors = fev.pivot_table(summaries=summaries, metric_column=metric_name, task_columns=["task_name"])
324
- train_overlap = (
325
- fev.pivot_table(
326
- summaries=summaries,
327
- metric_column="trained_on_this_dataset",
328
- task_columns=["task_name"],
329
- )
330
- .fillna(False)
331
- .astype(bool)
332
- )
333
-
334
- is_imputed_baseline = errors.isna()
335
- is_leakage_imputed = train_overlap
336
-
337
- # Handle imputations
338
- errors = errors.mask(train_overlap, errors[leakage_imputation_model], axis=0)
339
- for col in errors.columns:
340
- if col != baseline_model:
341
- errors[col] = errors[col].fillna(errors[baseline_model])
342
-
343
- errors = errors[errors.rank(axis=1).mean().sort_values().index]
344
- errors.index.rename("Task name", inplace=True)
345
-
346
- def highlight_by_position(styler):
347
- rank_colors = {1: COLORS["gold"], 2: COLORS["silver"], 3: COLORS["bronze"]}
348
-
349
- for row_idx in errors.index:
350
- row_ranks = errors.loc[row_idx].rank(method="min")
351
- for col_idx in errors.columns:
352
- rank = row_ranks[col_idx]
353
- style_parts = []
354
-
355
- # Rank background colors
356
- if rank <= 3:
357
- style_parts.append(f"background-color: {rank_colors[rank]}")
358
-
359
- # Imputation text colors
360
- if is_leakage_imputed.loc[row_idx, col_idx]:
361
- style_parts.append(f"color: {COLORS['leakage_impute']}")
362
- elif is_imputed_baseline.loc[row_idx, col_idx]:
363
- style_parts.append(f"color: {COLORS['failure_impute']}")
364
- elif not style_parts or (len(style_parts) == 1 and "font-weight" in style_parts[0]):
365
- style_parts.append(f"color: {COLORS['text_default']}")
366
-
367
- if style_parts:
368
- styler = styler.map(
369
- lambda x, s="; ".join(style_parts): s,
370
- subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx],
371
- )
372
- return styler
373
-
374
- return highlight_by_position(errors.style).format(precision=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/leaderboard_MASE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Toto-1.0,91.42857142857143,41.8883400084654,0.0,45.868307151375,0.0,0.0
3
- Chronos-2,89.64285714285714,42.5157996441094,0.0,1.18621670825,0.0,0.0
4
- TimesFM-2.5,82.85714285714286,40.26605685743614,0.0,6.4447616408988475,0.0,0.0
5
- TiRex,77.14285714285714,38.09940067233815,0.0,0.20031914146825397,0.0,0.0
6
- Moirai-2.0,65.00000000000001,35.579899495667256,0.0,0.36445902017857146,0.1,0.0
7
- Sundial-Base,61.07142857142858,34.44493255866505,0.0,7.874095355196429,0.0,0.0
8
- Chronos-Bolt,59.285714285714285,30.890375999745544,0.0,0.21195593629285714,0.0,0.0
9
- TabPFN-TS,57.857142857142854,31.63477113942893,0.0,187.1612375475248,0.0,0.0
10
- Stat. Ensemble,33.75,10.28649321258931,0.0,117.23392411285715,0.0,15.0
11
- AutoARIMA,32.32142857142858,11.154754181413495,0.0,17.96621433673913,0.0,15.0
12
- AutoTheta,27.500000000000004,6.832233342669413,0.0,3.978930596261481,0.0,0.0
13
- AutoETS,24.107142857142858,9.00813293024486,0.0,2.879931537857143,0.0,15.0
14
- Naive,23.214285714285715,-25.50199780067157,0.0,0.40027213541999274,0.0,0.0
15
- Seasonal Naive,15.892857142857144,0.0,0.0,0.43848143432692305,0.0,0.0
16
- Drift,8.928571428571429,-32.85429096552939,0.0,0.4187055400961539,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/leaderboard_SQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Toto-1.0,91.78571428571428,63.05871546601058,0.0,45.868307151375,0.0,0.0
3
- Chronos-2,91.42857142857143,63.92789671191335,0.0,1.18621670825,0.0,0.0
4
- TimesFM-2.5,83.57142857142857,61.98663044796851,0.0,6.4447616408988475,0.0,0.0
5
- TiRex,80.35714285714285,60.87156591539495,0.0,0.20031914146825397,0.0,0.0
6
- Moirai-2.0,68.21428571428572,58.41610987284718,0.0,0.36445902017857146,0.1,0.0
7
- Sundial-Base,59.64285714285715,56.647735223836015,0.0,7.874095355196429,0.0,0.0
8
- Chronos-Bolt,58.57142857142858,54.85929055283013,0.0,0.21195593629285714,0.0,0.0
9
- TabPFN-TS,57.50000000000001,53.05789136035014,0.0,187.1612375475248,0.0,0.0
10
- AutoARIMA,38.03571428571428,32.58752067086065,0.0,17.96621433673913,0.0,15.0
11
- Stat. Ensemble,34.82142857142856,20.1052436267542,0.0,117.23392411285715,0.0,15.0
12
- AutoETS,28.035714285714285,-26.853793211441058,0.0,2.879931537857143,0.0,15.0
13
- AutoTheta,22.142857142857146,-2.1145171067117996,0.0,3.978930596261481,0.0,0.0
14
- Seasonal Naive,19.107142857142854,0.0,0.0,0.43848143432692305,0.0,0.0
15
- Naive,13.571428571428573,-102.14853969808834,0.0,0.40027213541999274,0.0,0.0
16
- Drift,3.214285714285715,-110.2431874466197,0.0,0.4187055400961539,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/leaderboard_WAPE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,91.78571428571428,50.572763239812836,0.0,1.18621670825,0.0,0.0
3
- Toto-1.0,91.07142857142858,47.121114882947566,0.0,45.868307151375,0.0,0.0
4
- TimesFM-2.5,81.42857142857142,46.247719637805126,0.0,6.4447616408988475,0.0,0.0
5
- TiRex,77.49999999999999,44.52063533547439,0.0,0.20031914146825397,0.0,0.0
6
- Moirai-2.0,67.5,42.792548583451826,0.0,0.36445902017857146,0.1,0.0
7
- TabPFN-TS,66.78571428571429,43.22422155533027,0.0,187.1612375475248,0.0,0.0
8
- Chronos-Bolt,60.71428571428571,39.893668788246494,0.0,0.21195593629285714,0.0,0.0
9
- Sundial-Base,60.71428571428571,41.96635843416393,0.0,7.874095355196429,0.0,0.0
10
- Stat. Ensemble,32.32142857142857,15.58065226385924,0.0,117.23392411285715,0.0,15.0
11
- AutoARIMA,28.392857142857142,14.643401272886948,0.0,17.96621433673913,0.0,15.0
12
- AutoETS,23.75,5.6618608592632125,0.0,2.879931537857143,0.0,15.0
13
- AutoTheta,23.57142857142857,12.660865629460227,0.0,3.978930596261481,0.0,0.0
14
- Naive,23.21428571428571,4.49837332329478,0.0,0.40027213541999274,0.0,0.0
15
- Seasonal Naive,12.321428571428573,0.0,0.0,0.43848143432692305,0.0,0.0
16
- Drift,8.928571428571429,-1.7537851144645122,0.0,0.4187055400961539,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/leaderboard_WQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,92.5,66.94397676110452,0.0,1.18621670825,0.0,0.0
3
- Toto-1.0,90.35714285714286,64.45690397421868,0.0,45.868307151375,0.0,0.0
4
- TimesFM-2.5,82.14285714285712,63.753748722726,0.0,6.4447616408988475,0.0,0.0
5
- TiRex,80.71428571428571,62.74382048051918,0.0,0.20031914146825397,0.0,0.0
6
- Moirai-2.0,68.92857142857143,61.08750955081813,0.0,0.36445902017857146,0.1,0.0
7
- TabPFN-TS,65.00000000000001,59.171399643395425,0.0,187.1612375475248,0.0,0.0
8
- Chronos-Bolt,60.357142857142854,58.158162956775406,0.0,0.21195593629285714,0.0,0.0
9
- Sundial-Base,58.928571428571445,59.213728716592804,0.0,7.874095355196429,0.0,0.0
10
- AutoARIMA,36.25,33.35224855510223,0.0,17.96621433673913,0.0,15.0
11
- Stat. Ensemble,32.67857142857142,20.842955409190168,0.0,117.23392411285715,0.0,15.0
12
- AutoETS,24.82142857142857,-39.99504769759261,0.0,2.879931537857143,0.0,15.0
13
- AutoTheta,22.142857142857146,7.361982670066736,0.0,3.978930596261481,0.0,0.0
14
- Seasonal Naive,18.035714285714285,0.0,0.0,0.43848143432692305,0.0,0.0
15
- Naive,13.214285714285715,-64.97889014234586,0.0,0.40027213541999274,0.0,0.0
16
- Drift,3.9285714285714293,-71.7471743876533,0.0,0.4187055400961539,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/pairwise_MASE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
3
- Toto-1.0,Chronos-2,0.65,0.45,0.85,-0.011,-0.063,0.027
4
- Toto-1.0,TimesFM-2.5,0.7,0.5,0.9,0.027,-0.02,0.072
5
- Toto-1.0,TiRex,0.85,0.7,1.0,0.061,0.027,0.114
6
- Toto-1.0,Moirai-2.0,0.95,0.85,1.0,0.098,0.053,0.164
7
- Toto-1.0,Sundial-Base,0.9,0.75,1.0,0.114,0.037,0.182
8
- Toto-1.0,Chronos-Bolt,0.9,0.75,1.0,0.159,0.094,0.224
9
- Toto-1.0,TabPFN-TS,0.85,0.65,1.0,0.15,0.075,0.231
10
- Toto-1.0,Stat. Ensemble,1.0,1.0,1.0,0.352,0.266,0.45
11
- Toto-1.0,AutoARIMA,1.0,1.0,1.0,0.346,0.264,0.448
12
- Toto-1.0,AutoTheta,1.0,1.0,1.0,0.376,0.295,0.468
13
- Toto-1.0,AutoETS,1.0,1.0,1.0,0.361,0.267,0.469
14
- Toto-1.0,Naive,1.0,1.0,1.0,0.537,0.377,0.689
15
- Toto-1.0,Seasonal Naive,1.0,1.0,1.0,0.419,0.323,0.529
16
- Toto-1.0,Drift,1.0,1.0,1.0,0.563,0.414,0.702
17
- Chronos-2,Toto-1.0,0.35,0.15,0.55,0.011,-0.028,0.059
18
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
19
- Chronos-2,TimesFM-2.5,0.7,0.5,0.9,0.038,0.009,0.071
20
- Chronos-2,TiRex,0.75,0.55,0.95,0.071,0.022,0.128
21
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.108,0.053,0.168
22
- Chronos-2,Sundial-Base,0.9,0.75,1.0,0.123,0.07,0.182
23
- Chronos-2,Chronos-Bolt,0.95,0.85,1.0,0.168,0.101,0.231
24
- Chronos-2,TabPFN-TS,0.9,0.75,1.0,0.159,0.092,0.234
25
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.359,0.272,0.454
26
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.353,0.266,0.453
27
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.383,0.296,0.47
28
- Chronos-2,AutoETS,1.0,1.0,1.0,0.368,0.281,0.468
29
- Chronos-2,Naive,1.0,1.0,1.0,0.542,0.386,0.689
30
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.425,0.331,0.534
31
- Chronos-2,Drift,1.0,1.0,1.0,0.567,0.421,0.704
32
- TimesFM-2.5,Toto-1.0,0.3,0.1,0.5,-0.028,-0.078,0.02
33
- TimesFM-2.5,Chronos-2,0.3,0.1,0.5,-0.039,-0.077,-0.009
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,TiRex,0.65,0.45,0.85,0.035,-0.0,0.077
36
- TimesFM-2.5,Moirai-2.0,0.9,0.75,1.0,0.073,0.035,0.115
37
- TimesFM-2.5,Sundial-Base,0.85,0.7,1.0,0.089,0.041,0.137
38
- TimesFM-2.5,Chronos-Bolt,0.85,0.65,1.0,0.136,0.068,0.199
39
- TimesFM-2.5,TabPFN-TS,0.75,0.55,0.9,0.126,0.059,0.207
40
- TimesFM-2.5,Stat. Ensemble,1.0,1.0,1.0,0.334,0.261,0.415
41
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.328,0.254,0.415
42
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.359,0.283,0.436
43
- TimesFM-2.5,AutoETS,1.0,1.0,1.0,0.344,0.268,0.433
44
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.524,0.366,0.676
45
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.403,0.318,0.501
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.55,0.402,0.69
47
- TiRex,Toto-1.0,0.15,0.0,0.3,-0.065,-0.129,-0.028
48
- TiRex,Chronos-2,0.25,0.05,0.45,-0.077,-0.146,-0.022
49
- TiRex,TimesFM-2.5,0.35,0.15,0.55,-0.036,-0.083,0.0
50
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
51
- TiRex,Moirai-2.0,0.8,0.6,0.95,0.039,0.014,0.065
52
- TiRex,Sundial-Base,0.85,0.7,1.0,0.056,-0.026,0.115
53
- TiRex,Chronos-Bolt,0.75,0.55,0.9,0.104,0.038,0.166
54
- TiRex,TabPFN-TS,0.7,0.5,0.9,0.095,0.018,0.172
55
- TiRex,Stat. Ensemble,1.0,1.0,1.0,0.31,0.233,0.392
56
- TiRex,AutoARIMA,1.0,1.0,1.0,0.303,0.231,0.382
57
- TiRex,AutoTheta,1.0,1.0,1.0,0.336,0.263,0.412
58
- TiRex,AutoETS,1.0,1.0,1.0,0.32,0.235,0.41
59
- TiRex,Naive,0.95,0.85,1.0,0.507,0.342,0.658
60
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.381,0.295,0.476
61
- TiRex,Drift,1.0,1.0,1.0,0.534,0.385,0.676
62
- Moirai-2.0,Toto-1.0,0.05,0.0,0.15,-0.109,-0.196,-0.056
63
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.121,-0.202,-0.056
64
- Moirai-2.0,TimesFM-2.5,0.1,0.0,0.25,-0.078,-0.13,-0.036
65
- Moirai-2.0,TiRex,0.2,0.05,0.4,-0.041,-0.07,-0.015
66
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
67
- Moirai-2.0,Sundial-Base,0.8,0.6,0.95,0.017,-0.056,0.065
68
- Moirai-2.0,Chronos-Bolt,0.65,0.425,0.825,0.068,-0.002,0.135
69
- Moirai-2.0,TabPFN-TS,0.55,0.35,0.75,0.058,-0.02,0.138
70
- Moirai-2.0,Stat. Ensemble,0.95,0.85,1.0,0.282,0.201,0.366
71
- Moirai-2.0,AutoARIMA,0.95,0.85,1.0,0.275,0.203,0.354
72
- Moirai-2.0,AutoTheta,1.0,1.0,1.0,0.309,0.237,0.386
73
- Moirai-2.0,AutoETS,0.95,0.85,1.0,0.292,0.208,0.382
74
- Moirai-2.0,Naive,0.95,0.85,1.0,0.487,0.322,0.638
75
- Moirai-2.0,Seasonal Naive,0.95,0.85,1.0,0.356,0.268,0.451
76
- Moirai-2.0,Drift,1.0,1.0,1.0,0.515,0.369,0.657
77
- Sundial-Base,Toto-1.0,0.1,0.0,0.25,-0.128,-0.223,-0.038
78
- Sundial-Base,Chronos-2,0.1,0.0,0.25,-0.14,-0.222,-0.075
79
- Sundial-Base,TimesFM-2.5,0.15,0.0,0.3,-0.097,-0.159,-0.043
80
- Sundial-Base,TiRex,0.15,0.0,0.3,-0.059,-0.129,0.025
81
- Sundial-Base,Moirai-2.0,0.2,0.05,0.4,-0.018,-0.069,0.053
82
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
83
- Sundial-Base,Chronos-Bolt,0.5,0.25,0.7,0.051,-0.023,0.126
84
- Sundial-Base,TabPFN-TS,0.55,0.35,0.75,0.041,-0.034,0.11
85
- Sundial-Base,Stat. Ensemble,0.95,0.85,1.0,0.269,0.182,0.361
86
- Sundial-Base,AutoARIMA,0.95,0.85,1.0,0.262,0.175,0.353
87
- Sundial-Base,AutoTheta,1.0,1.0,1.0,0.296,0.22,0.377
88
- Sundial-Base,AutoETS,0.95,0.85,1.0,0.28,0.184,0.379
89
- Sundial-Base,Naive,1.0,1.0,1.0,0.478,0.322,0.626
90
- Sundial-Base,Seasonal Naive,0.95,0.85,1.0,0.344,0.246,0.453
91
- Sundial-Base,Drift,1.0,1.0,1.0,0.507,0.358,0.646
92
- Chronos-Bolt,Toto-1.0,0.1,0.0,0.25,-0.189,-0.288,-0.104
93
- Chronos-Bolt,Chronos-2,0.05,0.0,0.15,-0.202,-0.3,-0.112
94
- Chronos-Bolt,TimesFM-2.5,0.15,0.0,0.35,-0.157,-0.248,-0.073
95
- Chronos-Bolt,TiRex,0.25,0.1,0.45,-0.116,-0.198,-0.039
96
- Chronos-Bolt,Moirai-2.0,0.35,0.175,0.575,-0.073,-0.156,0.002
97
- Chronos-Bolt,Sundial-Base,0.5,0.3,0.75,-0.054,-0.144,0.022
98
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
99
- Chronos-Bolt,TabPFN-TS,0.5,0.3,0.7,-0.011,-0.128,0.086
100
- Chronos-Bolt,Stat. Ensemble,0.9,0.75,1.0,0.23,0.124,0.336
101
- Chronos-Bolt,AutoARIMA,0.9,0.75,1.0,0.222,0.121,0.337
102
- Chronos-Bolt,AutoTheta,0.95,0.85,1.0,0.258,0.171,0.353
103
- Chronos-Bolt,AutoETS,0.85,0.7,1.0,0.24,0.131,0.358
104
- Chronos-Bolt,Naive,0.95,0.85,1.0,0.449,0.297,0.601
105
- Chronos-Bolt,Seasonal Naive,0.85,0.7,1.0,0.309,0.185,0.434
106
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.48,0.341,0.624
107
- TabPFN-TS,Toto-1.0,0.15,0.0,0.35,-0.176,-0.301,-0.081
108
- TabPFN-TS,Chronos-2,0.1,0.0,0.25,-0.189,-0.305,-0.102
109
- TabPFN-TS,TimesFM-2.5,0.25,0.1,0.45,-0.144,-0.261,-0.063
110
- TabPFN-TS,TiRex,0.3,0.1,0.5,-0.104,-0.208,-0.019
111
- TabPFN-TS,Moirai-2.0,0.45,0.25,0.65,-0.061,-0.161,0.019
112
- TabPFN-TS,Sundial-Base,0.45,0.25,0.65,-0.043,-0.123,0.033
113
- TabPFN-TS,Chronos-Bolt,0.5,0.3,0.7,0.011,-0.095,0.114
114
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
115
- TabPFN-TS,Stat. Ensemble,0.8,0.6,0.95,0.238,0.095,0.36
116
- TabPFN-TS,AutoARIMA,0.8,0.6,0.95,0.231,0.096,0.351
117
- TabPFN-TS,AutoTheta,0.75,0.55,0.95,0.266,0.154,0.375
118
- TabPFN-TS,AutoETS,0.85,0.7,1.0,0.249,0.114,0.367
119
- TabPFN-TS,Naive,0.85,0.65,1.0,0.455,0.285,0.6
120
- TabPFN-TS,Seasonal Naive,0.9,0.75,1.0,0.316,0.178,0.443
121
- TabPFN-TS,Drift,0.95,0.85,1.0,0.485,0.326,0.621
122
- Stat. Ensemble,Toto-1.0,0.0,0.0,0.0,-0.544,-0.817,-0.362
123
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-0.561,-0.833,-0.374
124
- Stat. Ensemble,TimesFM-2.5,0.0,0.0,0.0,-0.502,-0.711,-0.353
125
- Stat. Ensemble,TiRex,0.0,0.0,0.0,-0.449,-0.646,-0.304
126
- Stat. Ensemble,Moirai-2.0,0.05,0.0,0.15,-0.393,-0.578,-0.251
127
- Stat. Ensemble,Sundial-Base,0.05,0.0,0.15,-0.369,-0.564,-0.223
128
- Stat. Ensemble,Chronos-Bolt,0.1,0.0,0.25,-0.298,-0.505,-0.141
129
- Stat. Ensemble,TabPFN-TS,0.2,0.05,0.4,-0.312,-0.562,-0.106
130
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
131
- Stat. Ensemble,AutoARIMA,0.475,0.274,0.675,-0.01,-0.058,0.025
132
- Stat. Ensemble,AutoTheta,0.75,0.55,0.95,0.037,-0.013,0.087
133
- Stat. Ensemble,AutoETS,0.775,0.6,0.95,0.014,-0.091,0.087
134
- Stat. Ensemble,Naive,0.75,0.55,0.95,0.285,0.06,0.51
135
- Stat. Ensemble,Seasonal Naive,0.775,0.624,0.925,0.103,-0.009,0.19
136
- Stat. Ensemble,Drift,0.8,0.6,0.95,0.325,0.11,0.536
137
- AutoARIMA,Toto-1.0,0.0,0.0,0.0,-0.529,-0.811,-0.358
138
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.546,-0.83,-0.363
139
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.487,-0.708,-0.341
140
- AutoARIMA,TiRex,0.0,0.0,0.0,-0.435,-0.619,-0.301
141
- AutoARIMA,Moirai-2.0,0.05,0.0,0.15,-0.379,-0.548,-0.255
142
- AutoARIMA,Sundial-Base,0.05,0.0,0.15,-0.355,-0.545,-0.212
143
- AutoARIMA,Chronos-Bolt,0.1,0.0,0.25,-0.286,-0.508,-0.137
144
- AutoARIMA,TabPFN-TS,0.2,0.05,0.4,-0.3,-0.541,-0.106
145
- AutoARIMA,Stat. Ensemble,0.525,0.325,0.726,0.01,-0.026,0.055
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,AutoTheta,0.6,0.4,0.8,0.046,-0.017,0.107
148
- AutoARIMA,AutoETS,0.725,0.549,0.9,0.024,-0.086,0.103
149
- AutoARIMA,Naive,0.65,0.45,0.85,0.292,0.063,0.513
150
- AutoARIMA,Seasonal Naive,0.825,0.675,0.95,0.112,0.008,0.197
151
- AutoARIMA,Drift,0.8,0.6,0.95,0.331,0.115,0.536
152
- AutoTheta,Toto-1.0,0.0,0.0,0.0,-0.603,-0.881,-0.419
153
- AutoTheta,Chronos-2,0.0,0.0,0.0,-0.621,-0.887,-0.421
154
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-0.56,-0.774,-0.394
155
- AutoTheta,TiRex,0.0,0.0,0.0,-0.505,-0.701,-0.356
156
- AutoTheta,Moirai-2.0,0.0,0.0,0.0,-0.446,-0.628,-0.31
157
- AutoTheta,Sundial-Base,0.0,0.0,0.0,-0.421,-0.606,-0.282
158
- AutoTheta,Chronos-Bolt,0.05,0.0,0.15,-0.348,-0.546,-0.206
159
- AutoTheta,TabPFN-TS,0.25,0.05,0.45,-0.363,-0.6,-0.182
160
- AutoTheta,Stat. Ensemble,0.25,0.05,0.45,-0.039,-0.096,0.013
161
- AutoTheta,AutoARIMA,0.4,0.2,0.6,-0.049,-0.12,0.017
162
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoTheta,AutoETS,0.55,0.3,0.75,-0.024,-0.139,0.055
164
- AutoTheta,Naive,0.7,0.5,0.9,0.258,0.05,0.468
165
- AutoTheta,Seasonal Naive,0.8,0.65,0.95,0.068,-0.06,0.171
166
- AutoTheta,Drift,0.85,0.7,1.0,0.299,0.104,0.491
167
- AutoETS,Toto-1.0,0.0,0.0,0.0,-0.566,-0.883,-0.364
168
- AutoETS,Chronos-2,0.0,0.0,0.0,-0.583,-0.881,-0.391
169
- AutoETS,TimesFM-2.5,0.0,0.0,0.0,-0.523,-0.762,-0.366
170
- AutoETS,TiRex,0.0,0.0,0.0,-0.47,-0.695,-0.307
171
- AutoETS,Moirai-2.0,0.05,0.0,0.15,-0.412,-0.619,-0.262
172
- AutoETS,Sundial-Base,0.05,0.0,0.15,-0.388,-0.611,-0.225
173
- AutoETS,Chronos-Bolt,0.15,0.0,0.3,-0.317,-0.559,-0.151
174
- AutoETS,TabPFN-TS,0.15,0.0,0.3,-0.331,-0.581,-0.129
175
- AutoETS,Stat. Ensemble,0.225,0.05,0.4,-0.014,-0.096,0.083
176
- AutoETS,AutoARIMA,0.275,0.1,0.451,-0.024,-0.115,0.079
177
- AutoETS,AutoTheta,0.45,0.25,0.7,0.023,-0.058,0.122
178
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoETS,Naive,0.55,0.3,0.75,0.275,0.032,0.5
180
- AutoETS,Seasonal Naive,0.725,0.55,0.9,0.09,0.026,0.154
181
- AutoETS,Drift,0.75,0.55,0.9,0.315,0.081,0.528
182
- Naive,Toto-1.0,0.0,0.0,0.0,-1.16,-2.211,-0.604
183
- Naive,Chronos-2,0.0,0.0,0.0,-1.183,-2.214,-0.628
184
- Naive,TimesFM-2.5,0.0,0.0,0.0,-1.101,-2.085,-0.578
185
- Naive,TiRex,0.05,0.0,0.15,-1.027,-1.923,-0.52
186
- Naive,Moirai-2.0,0.05,0.0,0.15,-0.948,-1.762,-0.475
187
- Naive,Sundial-Base,0.0,0.0,0.0,-0.914,-1.673,-0.476
188
- Naive,Chronos-Bolt,0.05,0.0,0.15,-0.816,-1.505,-0.423
189
- Naive,TabPFN-TS,0.15,0.0,0.35,-0.836,-1.501,-0.399
190
- Naive,Stat. Ensemble,0.25,0.05,0.45,-0.399,-1.041,-0.063
191
- Naive,AutoARIMA,0.35,0.15,0.55,-0.413,-1.052,-0.067
192
- Naive,AutoTheta,0.3,0.1,0.5,-0.347,-0.881,-0.053
193
- Naive,AutoETS,0.45,0.25,0.7,-0.379,-1.0,-0.033
194
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Naive,Seasonal Naive,0.6,0.425,0.775,-0.255,-0.874,0.087
196
- Naive,Drift,1.0,1.0,1.0,0.055,0.031,0.085
197
- Seasonal Naive,Toto-1.0,0.0,0.0,0.0,-0.721,-1.122,-0.477
198
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-0.74,-1.148,-0.496
199
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-0.674,-1.003,-0.466
200
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.615,-0.909,-0.418
201
- Seasonal Naive,Moirai-2.0,0.05,0.0,0.15,-0.552,-0.822,-0.365
202
- Seasonal Naive,Sundial-Base,0.05,0.0,0.15,-0.525,-0.83,-0.326
203
- Seasonal Naive,Chronos-Bolt,0.15,0.0,0.3,-0.447,-0.766,-0.228
204
- Seasonal Naive,TabPFN-TS,0.1,0.0,0.25,-0.463,-0.796,-0.216
205
- Seasonal Naive,Stat. Ensemble,0.225,0.075,0.376,-0.115,-0.235,0.009
206
- Seasonal Naive,AutoARIMA,0.175,0.05,0.325,-0.126,-0.245,-0.008
207
- Seasonal Naive,AutoTheta,0.2,0.05,0.35,-0.073,-0.206,0.057
208
- Seasonal Naive,AutoETS,0.275,0.1,0.45,-0.099,-0.181,-0.027
209
- Seasonal Naive,Naive,0.4,0.225,0.575,0.203,-0.096,0.466
210
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Seasonal Naive,Drift,0.6,0.35,0.8,0.247,-0.04,0.497
212
- Drift,Toto-1.0,0.0,0.0,0.0,-1.286,-2.359,-0.705
213
- Drift,Chronos-2,0.0,0.0,0.0,-1.311,-2.381,-0.728
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-1.224,-2.223,-0.673
215
- Drift,TiRex,0.0,0.0,0.0,-1.146,-2.086,-0.627
216
- Drift,Moirai-2.0,0.0,0.0,0.0,-1.062,-1.913,-0.584
217
- Drift,Sundial-Base,0.0,0.0,0.0,-1.027,-1.828,-0.559
218
- Drift,Chronos-Bolt,0.0,0.0,0.0,-0.922,-1.659,-0.517
219
- Drift,TabPFN-TS,0.05,0.0,0.15,-0.943,-1.639,-0.484
220
- Drift,Stat. Ensemble,0.2,0.05,0.4,-0.481,-1.157,-0.124
221
- Drift,AutoARIMA,0.2,0.05,0.4,-0.495,-1.156,-0.13
222
- Drift,AutoTheta,0.15,0.0,0.3,-0.426,-0.966,-0.116
223
- Drift,AutoETS,0.25,0.1,0.45,-0.46,-1.12,-0.088
224
- Drift,Naive,0.0,0.0,0.0,-0.059,-0.093,-0.032
225
- Drift,Seasonal Naive,0.4,0.2,0.65,-0.329,-0.989,0.038
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/pairwise_SQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
3
- Toto-1.0,Chronos-2,0.6,0.4,0.8,-0.024,-0.08,0.018
4
- Toto-1.0,TimesFM-2.5,0.75,0.55,0.95,0.028,-0.024,0.076
5
- Toto-1.0,TiRex,0.8,0.6,0.95,0.056,0.022,0.104
6
- Toto-1.0,Moirai-2.0,0.95,0.85,1.0,0.112,0.064,0.169
7
- Toto-1.0,Sundial-Base,0.95,0.85,1.0,0.148,0.071,0.211
8
- Toto-1.0,Chronos-Bolt,0.95,0.85,1.0,0.182,0.124,0.236
9
- Toto-1.0,TabPFN-TS,0.85,0.65,1.0,0.213,0.131,0.291
10
- Toto-1.0,AutoARIMA,1.0,1.0,1.0,0.452,0.369,0.537
11
- Toto-1.0,Stat. Ensemble,1.0,1.0,1.0,0.538,0.448,0.619
12
- Toto-1.0,AutoETS,1.0,1.0,1.0,0.666,0.482,0.815
13
- Toto-1.0,AutoTheta,1.0,1.0,1.0,0.638,0.54,0.72
14
- Toto-1.0,Seasonal Naive,1.0,1.0,1.0,0.631,0.521,0.73
15
- Toto-1.0,Naive,1.0,1.0,1.0,0.817,0.735,0.877
16
- Toto-1.0,Drift,1.0,1.0,1.0,0.824,0.744,0.882
17
- Chronos-2,Toto-1.0,0.4,0.2,0.6,0.024,-0.018,0.074
18
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
19
- Chronos-2,TimesFM-2.5,0.75,0.55,0.9,0.051,0.017,0.09
20
- Chronos-2,TiRex,0.75,0.55,0.9,0.078,0.024,0.139
21
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.133,0.069,0.2
22
- Chronos-2,Sundial-Base,0.95,0.85,1.0,0.168,0.118,0.222
23
- Chronos-2,Chronos-Bolt,1.0,1.0,1.0,0.201,0.134,0.261
24
- Chronos-2,TabPFN-TS,0.95,0.85,1.0,0.232,0.162,0.3
25
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.465,0.381,0.55
26
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.549,0.47,0.623
27
- Chronos-2,AutoETS,1.0,1.0,1.0,0.674,0.501,0.819
28
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.647,0.555,0.727
29
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.639,0.533,0.735
30
- Chronos-2,Naive,1.0,1.0,1.0,0.822,0.746,0.879
31
- Chronos-2,Drift,1.0,1.0,1.0,0.828,0.756,0.883
32
- TimesFM-2.5,Toto-1.0,0.25,0.05,0.45,-0.029,-0.082,0.024
33
- TimesFM-2.5,Chronos-2,0.25,0.1,0.45,-0.054,-0.099,-0.017
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,TiRex,0.6,0.4,0.8,0.028,-0.014,0.08
36
- TimesFM-2.5,Moirai-2.0,0.9,0.75,1.0,0.086,0.038,0.143
37
- TimesFM-2.5,Sundial-Base,0.95,0.85,1.0,0.123,0.079,0.165
38
- TimesFM-2.5,Chronos-Bolt,0.9,0.75,1.0,0.158,0.095,0.217
39
- TimesFM-2.5,TabPFN-TS,0.85,0.7,1.0,0.19,0.124,0.257
40
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.436,0.367,0.511
41
- TimesFM-2.5,Stat. Ensemble,1.0,1.0,1.0,0.524,0.451,0.594
42
- TimesFM-2.5,AutoETS,1.0,1.0,1.0,0.657,0.476,0.811
43
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.628,0.534,0.71
44
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.62,0.519,0.713
45
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.812,0.731,0.872
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.819,0.742,0.877
47
- TiRex,Toto-1.0,0.2,0.05,0.4,-0.059,-0.116,-0.023
48
- TiRex,Chronos-2,0.25,0.1,0.45,-0.085,-0.162,-0.025
49
- TiRex,TimesFM-2.5,0.4,0.2,0.6,-0.029,-0.086,0.013
50
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
51
- TiRex,Moirai-2.0,0.85,0.7,1.0,0.059,0.03,0.09
52
- TiRex,Sundial-Base,0.95,0.85,1.0,0.097,0.013,0.151
53
- TiRex,Chronos-Bolt,0.9,0.75,1.0,0.133,0.073,0.186
54
- TiRex,TabPFN-TS,0.75,0.55,0.95,0.166,0.079,0.247
55
- TiRex,AutoARIMA,1.0,1.0,1.0,0.42,0.343,0.497
56
- TiRex,Stat. Ensemble,1.0,1.0,1.0,0.51,0.425,0.585
57
- TiRex,AutoETS,1.0,1.0,1.0,0.647,0.453,0.808
58
- TiRex,AutoTheta,1.0,1.0,1.0,0.617,0.513,0.698
59
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.609,0.499,0.709
60
- TiRex,Naive,0.95,0.85,1.0,0.806,0.72,0.869
61
- TiRex,Drift,1.0,1.0,1.0,0.814,0.729,0.874
62
- Moirai-2.0,Toto-1.0,0.05,0.0,0.15,-0.126,-0.203,-0.068
63
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.153,-0.249,-0.074
64
- Moirai-2.0,TimesFM-2.5,0.1,0.0,0.25,-0.094,-0.167,-0.039
65
- Moirai-2.0,TiRex,0.15,0.0,0.3,-0.063,-0.099,-0.03
66
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
67
- Moirai-2.0,Sundial-Base,0.9,0.75,1.0,0.041,-0.045,0.091
68
- Moirai-2.0,Chronos-Bolt,0.8,0.6,0.95,0.079,0.017,0.14
69
- Moirai-2.0,TabPFN-TS,0.75,0.55,0.901,0.114,0.03,0.194
70
- Moirai-2.0,AutoARIMA,1.0,1.0,1.0,0.383,0.305,0.459
71
- Moirai-2.0,Stat. Ensemble,0.95,0.85,1.0,0.48,0.391,0.559
72
- Moirai-2.0,AutoETS,0.95,0.85,1.0,0.628,0.423,0.798
73
- Moirai-2.0,AutoTheta,0.95,0.85,1.0,0.593,0.49,0.673
74
- Moirai-2.0,Seasonal Naive,1.0,1.0,1.0,0.584,0.469,0.689
75
- Moirai-2.0,Naive,0.95,0.85,1.0,0.794,0.704,0.859
76
- Moirai-2.0,Drift,1.0,1.0,1.0,0.802,0.716,0.864
77
- Sundial-Base,Toto-1.0,0.05,0.0,0.15,-0.174,-0.268,-0.077
78
- Sundial-Base,Chronos-2,0.05,0.0,0.15,-0.202,-0.285,-0.134
79
- Sundial-Base,TimesFM-2.5,0.05,0.0,0.15,-0.14,-0.198,-0.085
80
- Sundial-Base,TiRex,0.05,0.0,0.15,-0.108,-0.178,-0.013
81
- Sundial-Base,Moirai-2.0,0.1,0.0,0.25,-0.043,-0.1,0.043
82
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
83
- Sundial-Base,Chronos-Bolt,0.4,0.2,0.6,0.04,-0.037,0.117
84
- Sundial-Base,TabPFN-TS,0.65,0.45,0.85,0.076,0.006,0.138
85
- Sundial-Base,AutoARIMA,1.0,1.0,1.0,0.357,0.281,0.436
86
- Sundial-Base,Stat. Ensemble,1.0,1.0,1.0,0.457,0.378,0.534
87
- Sundial-Base,AutoETS,1.0,1.0,1.0,0.615,0.404,0.793
88
- Sundial-Base,AutoTheta,1.0,1.0,1.0,0.575,0.487,0.657
89
- Sundial-Base,Seasonal Naive,1.0,1.0,1.0,0.566,0.454,0.668
90
- Sundial-Base,Naive,1.0,1.0,1.0,0.786,0.704,0.848
91
- Sundial-Base,Drift,1.0,1.0,1.0,0.794,0.716,0.854
92
- Chronos-Bolt,Toto-1.0,0.05,0.0,0.15,-0.222,-0.31,-0.142
93
- Chronos-Bolt,Chronos-2,0.0,0.0,0.0,-0.251,-0.352,-0.155
94
- Chronos-Bolt,TimesFM-2.5,0.1,0.0,0.25,-0.187,-0.277,-0.105
95
- Chronos-Bolt,TiRex,0.1,0.0,0.25,-0.154,-0.229,-0.079
96
- Chronos-Bolt,Moirai-2.0,0.2,0.05,0.4,-0.086,-0.162,-0.017
97
- Chronos-Bolt,Sundial-Base,0.6,0.4,0.8,-0.041,-0.133,0.036
98
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
99
- Chronos-Bolt,TabPFN-TS,0.7,0.5,0.851,0.038,-0.082,0.139
100
- Chronos-Bolt,AutoARIMA,0.9,0.75,1.0,0.33,0.236,0.434
101
- Chronos-Bolt,Stat. Ensemble,0.85,0.7,1.0,0.435,0.328,0.531
102
- Chronos-Bolt,AutoETS,0.9,0.75,1.0,0.597,0.372,0.786
103
- Chronos-Bolt,AutoTheta,0.95,0.85,1.0,0.558,0.453,0.645
104
- Chronos-Bolt,Seasonal Naive,0.9,0.75,1.0,0.549,0.413,0.67
105
- Chronos-Bolt,Naive,0.95,0.85,1.0,0.777,0.686,0.844
106
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.785,0.7,0.851
107
- TabPFN-TS,Toto-1.0,0.15,0.0,0.35,-0.271,-0.41,-0.151
108
- TabPFN-TS,Chronos-2,0.05,0.0,0.15,-0.301,-0.429,-0.193
109
- TabPFN-TS,TimesFM-2.5,0.15,0.0,0.3,-0.235,-0.346,-0.141
110
- TabPFN-TS,TiRex,0.25,0.05,0.45,-0.2,-0.328,-0.086
111
- TabPFN-TS,Moirai-2.0,0.25,0.099,0.45,-0.129,-0.24,-0.03
112
- TabPFN-TS,Sundial-Base,0.35,0.15,0.55,-0.083,-0.161,-0.006
113
- TabPFN-TS,Chronos-Bolt,0.3,0.149,0.5,-0.04,-0.162,0.076
114
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
115
- TabPFN-TS,AutoARIMA,0.9,0.75,1.0,0.304,0.205,0.404
116
- TabPFN-TS,Stat. Ensemble,0.9,0.75,1.0,0.412,0.312,0.505
117
- TabPFN-TS,AutoETS,0.85,0.7,1.0,0.586,0.354,0.779
118
- TabPFN-TS,AutoTheta,0.95,0.85,1.0,0.54,0.433,0.639
119
- TabPFN-TS,Seasonal Naive,0.95,0.85,1.0,0.531,0.407,0.643
120
- TabPFN-TS,Naive,1.0,1.0,1.0,0.768,0.681,0.835
121
- TabPFN-TS,Drift,1.0,1.0,1.0,0.777,0.696,0.841
122
- AutoARIMA,Toto-1.0,0.0,0.0,0.0,-0.825,-1.161,-0.584
123
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.869,-1.222,-0.615
124
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.773,-1.043,-0.58
125
- AutoARIMA,TiRex,0.0,0.0,0.0,-0.723,-0.989,-0.521
126
- AutoARIMA,Moirai-2.0,0.0,0.0,0.0,-0.621,-0.848,-0.438
127
- AutoARIMA,Sundial-Base,0.0,0.0,0.0,-0.555,-0.773,-0.392
128
- AutoARIMA,Chronos-Bolt,0.1,0.0,0.25,-0.493,-0.768,-0.309
129
- AutoARIMA,TabPFN-TS,0.1,0.0,0.25,-0.436,-0.677,-0.258
130
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
131
- AutoARIMA,Stat. Ensemble,0.725,0.525,0.875,0.156,0.073,0.244
132
- AutoARIMA,AutoETS,0.775,0.6,0.95,0.434,0.08,0.713
133
- AutoARIMA,AutoTheta,0.85,0.7,1.0,0.34,0.203,0.476
134
- AutoARIMA,Seasonal Naive,0.875,0.75,0.975,0.326,0.18,0.457
135
- AutoARIMA,Naive,0.95,0.85,1.0,0.667,0.521,0.764
136
- AutoARIMA,Drift,0.95,0.85,1.0,0.679,0.545,0.773
137
- Stat. Ensemble,Toto-1.0,0.0,0.0,0.0,-1.163,-1.625,-0.811
138
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-1.215,-1.654,-0.887
139
- Stat. Ensemble,TimesFM-2.5,0.0,0.0,0.0,-1.102,-1.466,-0.82
140
- Stat. Ensemble,TiRex,0.0,0.0,0.0,-1.042,-1.411,-0.739
141
- Stat. Ensemble,Moirai-2.0,0.05,0.0,0.15,-0.921,-1.267,-0.642
142
- Stat. Ensemble,Sundial-Base,0.0,0.0,0.0,-0.843,-1.145,-0.609
143
- Stat. Ensemble,Chronos-Bolt,0.15,0.0,0.3,-0.77,-1.134,-0.488
144
- Stat. Ensemble,TabPFN-TS,0.1,0.0,0.25,-0.702,-1.019,-0.454
145
- Stat. Ensemble,AutoARIMA,0.275,0.125,0.475,-0.185,-0.323,-0.078
146
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
147
- Stat. Ensemble,AutoETS,0.725,0.55,0.9,0.363,-0.063,0.684
148
- Stat. Ensemble,AutoTheta,0.9,0.75,1.0,0.218,0.089,0.375
149
- Stat. Ensemble,Seasonal Naive,0.725,0.525,0.9,0.201,-0.013,0.369
150
- Stat. Ensemble,Naive,0.95,0.85,1.0,0.605,0.443,0.725
151
- Stat. Ensemble,Drift,1.0,1.0,1.0,0.62,0.465,0.735
152
- AutoETS,Toto-1.0,0.0,0.0,0.0,-1.991,-4.394,-0.932
153
- AutoETS,Chronos-2,0.0,0.0,0.0,-2.072,-4.534,-1.004
154
- AutoETS,TimesFM-2.5,0.0,0.0,0.0,-1.92,-4.291,-0.907
155
- AutoETS,TiRex,0.0,0.0,0.0,-1.837,-4.196,-0.83
156
- AutoETS,Moirai-2.0,0.05,0.0,0.15,-1.688,-3.95,-0.734
157
- AutoETS,Sundial-Base,0.0,0.0,0.0,-1.595,-3.825,-0.677
158
- AutoETS,Chronos-Bolt,0.1,0.0,0.25,-1.484,-3.678,-0.591
159
- AutoETS,TabPFN-TS,0.15,0.0,0.3,-1.414,-3.528,-0.549
160
- AutoETS,AutoARIMA,0.225,0.05,0.4,-0.767,-2.48,-0.087
161
- AutoETS,Stat. Ensemble,0.275,0.1,0.45,-0.569,-2.162,0.059
162
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoETS,AutoTheta,0.75,0.55,0.9,-0.25,-1.708,0.289
164
- AutoETS,Seasonal Naive,0.725,0.525,0.875,-0.269,-1.702,0.285
165
- AutoETS,Naive,0.8,0.6,0.95,0.34,-0.612,0.679
166
- AutoETS,Drift,0.85,0.65,1.0,0.363,-0.555,0.692
167
- AutoTheta,Toto-1.0,0.0,0.0,0.0,-1.764,-2.575,-1.173
168
- AutoTheta,Chronos-2,0.0,0.0,0.0,-1.831,-2.661,-1.245
169
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-1.686,-2.451,-1.145
170
- AutoTheta,TiRex,0.0,0.0,0.0,-1.61,-2.316,-1.054
171
- AutoTheta,Moirai-2.0,0.05,0.0,0.15,-1.456,-2.054,-0.962
172
- AutoTheta,Sundial-Base,0.0,0.0,0.0,-1.355,-1.914,-0.951
173
- AutoTheta,Chronos-Bolt,0.05,0.0,0.15,-1.262,-1.821,-0.828
174
- AutoTheta,TabPFN-TS,0.05,0.0,0.15,-1.175,-1.772,-0.765
175
- AutoTheta,AutoARIMA,0.15,0.0,0.3,-0.515,-0.908,-0.255
176
- AutoTheta,Stat. Ensemble,0.1,0.0,0.25,-0.278,-0.6,-0.097
177
- AutoTheta,AutoETS,0.25,0.1,0.45,0.2,-0.407,0.631
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,Seasonal Naive,0.7,0.5,0.9,-0.021,-0.413,0.245
180
- AutoTheta,Naive,0.85,0.65,1.0,0.495,0.359,0.605
181
- AutoTheta,Drift,0.9,0.75,1.0,0.514,0.382,0.62
182
- Seasonal Naive,Toto-1.0,0.0,0.0,0.0,-1.707,-2.709,-1.086
183
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-1.772,-2.77,-1.139
184
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-1.631,-2.486,-1.077
185
- Seasonal Naive,TiRex,0.0,0.0,0.0,-1.556,-2.437,-0.997
186
- Seasonal Naive,Moirai-2.0,0.0,0.0,0.0,-1.405,-2.214,-0.883
187
- Seasonal Naive,Sundial-Base,0.0,0.0,0.0,-1.307,-2.016,-0.832
188
- Seasonal Naive,Chronos-Bolt,0.1,0.0,0.25,-1.215,-2.027,-0.704
189
- Seasonal Naive,TabPFN-TS,0.05,0.0,0.15,-1.13,-1.801,-0.687
190
- Seasonal Naive,AutoARIMA,0.125,0.025,0.25,-0.483,-0.841,-0.219
191
- Seasonal Naive,Stat. Ensemble,0.275,0.1,0.475,-0.252,-0.585,0.013
192
- Seasonal Naive,AutoETS,0.275,0.125,0.475,0.212,-0.398,0.63
193
- Seasonal Naive,AutoTheta,0.3,0.1,0.5,0.021,-0.325,0.292
194
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Seasonal Naive,Naive,0.7,0.525,0.85,0.505,0.274,0.671
196
- Seasonal Naive,Drift,0.85,0.65,1.0,0.524,0.302,0.688
197
- Naive,Toto-1.0,0.0,0.0,0.0,-4.472,-7.119,-2.772
198
- Naive,Chronos-2,0.0,0.0,0.0,-4.604,-7.24,-2.94
199
- Naive,TimesFM-2.5,0.0,0.0,0.0,-4.318,-6.839,-2.721
200
- Naive,TiRex,0.05,0.0,0.15,-4.166,-6.661,-2.569
201
- Naive,Moirai-2.0,0.05,0.0,0.15,-3.861,-6.072,-2.376
202
- Naive,Sundial-Base,0.0,0.0,0.0,-3.663,-5.597,-2.374
203
- Naive,Chronos-Bolt,0.05,0.0,0.15,-3.478,-5.427,-2.188
204
- Naive,TabPFN-TS,0.0,0.0,0.0,-3.306,-5.057,-2.139
205
- Naive,AutoARIMA,0.05,0.0,0.15,-1.999,-3.24,-1.086
206
- Naive,Stat. Ensemble,0.05,0.0,0.15,-1.53,-2.642,-0.795
207
- Naive,AutoETS,0.2,0.05,0.4,-0.516,-2.117,0.38
208
- Naive,AutoTheta,0.15,0.0,0.35,-0.98,-1.531,-0.56
209
- Naive,Seasonal Naive,0.3,0.15,0.475,-1.021,-2.041,-0.378
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Drift,1.0,1.0,1.0,0.039,0.023,0.057
212
- Drift,Toto-1.0,0.0,0.0,0.0,-4.691,-7.48,-2.9
213
- Drift,Chronos-2,0.0,0.0,0.0,-4.828,-7.54,-3.102
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-4.531,-7.137,-2.871
215
- Drift,TiRex,0.0,0.0,0.0,-4.373,-6.957,-2.696
216
- Drift,Moirai-2.0,0.0,0.0,0.0,-4.056,-6.361,-2.518
217
- Drift,Sundial-Base,0.0,0.0,0.0,-3.85,-5.831,-2.522
218
- Drift,Chronos-Bolt,0.0,0.0,0.0,-3.658,-5.689,-2.332
219
- Drift,TabPFN-TS,0.0,0.0,0.0,-3.479,-5.272,-2.287
220
- Drift,AutoARIMA,0.05,0.0,0.15,-2.119,-3.399,-1.198
221
- Drift,Stat. Ensemble,0.0,0.0,0.0,-1.632,-2.779,-0.87
222
- Drift,AutoETS,0.15,0.0,0.35,-0.569,-2.25,0.357
223
- Drift,AutoTheta,0.1,0.0,0.25,-1.059,-1.634,-0.617
224
- Drift,Seasonal Naive,0.15,0.0,0.35,-1.102,-2.206,-0.433
225
- Drift,Naive,0.0,0.0,0.0,-0.04,-0.061,-0.024
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/pairwise_WAPE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,Toto-1.0,0.35,0.15,0.55,0.065,-0.014,0.173
4
- Chronos-2,TimesFM-2.5,0.75,0.55,0.9,0.08,0.03,0.14
5
- Chronos-2,TiRex,0.8,0.6,0.95,0.109,0.032,0.209
6
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.136,0.059,0.226
7
- Chronos-2,TabPFN-TS,0.95,0.85,1.0,0.129,0.084,0.186
8
- Chronos-2,Chronos-Bolt,1.0,1.0,1.0,0.178,0.103,0.259
9
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.148,0.099,0.198
10
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.415,0.319,0.51
11
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.421,0.312,0.531
12
- Chronos-2,AutoETS,1.0,1.0,1.0,0.476,0.341,0.599
13
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.434,0.333,0.532
14
- Chronos-2,Naive,1.0,1.0,1.0,0.482,0.382,0.572
15
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.506,0.388,0.617
16
- Chronos-2,Drift,1.0,1.0,1.0,0.514,0.42,0.599
17
- Toto-1.0,Chronos-2,0.65,0.45,0.85,-0.07,-0.21,0.014
18
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
19
- Toto-1.0,TimesFM-2.5,0.8,0.6,0.95,0.016,-0.048,0.069
20
- Toto-1.0,TiRex,0.75,0.55,0.9,0.047,-0.0,0.1
21
- Toto-1.0,Moirai-2.0,0.9,0.75,1.0,0.076,0.017,0.14
22
- Toto-1.0,TabPFN-TS,0.8,0.6,0.95,0.069,-0.015,0.137
23
- Toto-1.0,Chronos-Bolt,0.95,0.85,1.0,0.12,0.053,0.182
24
- Toto-1.0,Sundial-Base,0.9,0.75,1.0,0.089,-0.011,0.164
25
- Toto-1.0,Stat. Ensemble,1.0,1.0,1.0,0.374,0.288,0.473
26
- Toto-1.0,AutoARIMA,1.0,1.0,1.0,0.38,0.29,0.482
27
- Toto-1.0,AutoETS,1.0,1.0,1.0,0.439,0.315,0.574
28
- Toto-1.0,AutoTheta,1.0,1.0,1.0,0.395,0.314,0.488
29
- Toto-1.0,Naive,1.0,1.0,1.0,0.446,0.353,0.545
30
- Toto-1.0,Seasonal Naive,1.0,1.0,1.0,0.471,0.37,0.581
31
- Toto-1.0,Drift,1.0,1.0,1.0,0.48,0.397,0.565
32
- TimesFM-2.5,Chronos-2,0.25,0.1,0.45,-0.088,-0.162,-0.031
33
- TimesFM-2.5,Toto-1.0,0.2,0.05,0.4,-0.017,-0.074,0.046
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,TiRex,0.6,0.4,0.8,0.031,-0.009,0.083
36
- TimesFM-2.5,Moirai-2.0,0.85,0.65,1.0,0.06,0.024,0.101
37
- TimesFM-2.5,TabPFN-TS,0.7,0.5,0.9,0.053,0.014,0.094
38
- TimesFM-2.5,Chronos-Bolt,0.9,0.75,1.0,0.106,0.065,0.148
39
- TimesFM-2.5,Sundial-Base,0.9,0.75,1.0,0.074,0.024,0.112
40
- TimesFM-2.5,Stat. Ensemble,1.0,1.0,1.0,0.363,0.285,0.453
41
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.37,0.28,0.472
42
- TimesFM-2.5,AutoETS,1.0,1.0,1.0,0.43,0.307,0.558
43
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.385,0.303,0.473
44
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.437,0.35,0.527
45
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.462,0.364,0.568
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.472,0.391,0.548
47
- TiRex,Chronos-2,0.2,0.05,0.4,-0.122,-0.264,-0.033
48
- TiRex,Toto-1.0,0.25,0.1,0.45,-0.049,-0.111,0.0
49
- TiRex,TimesFM-2.5,0.4,0.2,0.6,-0.032,-0.091,0.009
50
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
51
- TiRex,Moirai-2.0,0.75,0.55,0.9,0.03,0.004,0.056
52
- TiRex,TabPFN-TS,0.7,0.5,0.9,0.023,-0.05,0.081
53
- TiRex,Chronos-Bolt,0.85,0.65,1.0,0.077,0.03,0.124
54
- TiRex,Sundial-Base,0.85,0.7,0.95,0.044,-0.063,0.112
55
- TiRex,Stat. Ensemble,0.95,0.85,1.0,0.343,0.258,0.427
56
- TiRex,AutoARIMA,1.0,1.0,1.0,0.35,0.265,0.443
57
- TiRex,AutoETS,0.95,0.85,1.0,0.412,0.283,0.547
58
- TiRex,AutoTheta,1.0,1.0,1.0,0.365,0.286,0.45
59
- TiRex,Naive,0.95,0.85,1.0,0.419,0.321,0.511
60
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.445,0.35,0.545
61
- TiRex,Drift,1.0,1.0,1.0,0.455,0.377,0.533
62
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.157,-0.292,-0.062
63
- Moirai-2.0,Toto-1.0,0.1,0.0,0.25,-0.082,-0.162,-0.017
64
- Moirai-2.0,TimesFM-2.5,0.15,0.0,0.35,-0.064,-0.112,-0.025
65
- Moirai-2.0,TiRex,0.25,0.1,0.45,-0.031,-0.06,-0.004
66
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
67
- Moirai-2.0,TabPFN-TS,0.5,0.3,0.75,-0.008,-0.075,0.05
68
- Moirai-2.0,Chronos-Bolt,0.75,0.55,0.9,0.048,0.003,0.096
69
- Moirai-2.0,Sundial-Base,0.75,0.55,0.9,0.014,-0.074,0.068
70
- Moirai-2.0,Stat. Ensemble,1.0,1.0,1.0,0.322,0.239,0.411
71
- Moirai-2.0,AutoARIMA,1.0,1.0,1.0,0.33,0.244,0.426
72
- Moirai-2.0,AutoETS,1.0,1.0,1.0,0.394,0.265,0.533
73
- Moirai-2.0,AutoTheta,1.0,1.0,1.0,0.345,0.268,0.435
74
- Moirai-2.0,Naive,0.95,0.85,1.0,0.401,0.311,0.491
75
- Moirai-2.0,Seasonal Naive,1.0,1.0,1.0,0.428,0.333,0.532
76
- Moirai-2.0,Drift,1.0,1.0,1.0,0.438,0.36,0.513
77
- TabPFN-TS,Chronos-2,0.05,0.0,0.15,-0.149,-0.229,-0.091
78
- TabPFN-TS,Toto-1.0,0.2,0.05,0.4,-0.074,-0.159,0.015
79
- TabPFN-TS,TimesFM-2.5,0.3,0.1,0.5,-0.056,-0.103,-0.015
80
- TabPFN-TS,TiRex,0.3,0.1,0.5,-0.023,-0.089,0.048
81
- TabPFN-TS,Moirai-2.0,0.5,0.25,0.7,0.008,-0.053,0.07
82
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
83
- TabPFN-TS,Chronos-Bolt,0.6,0.4,0.8,0.055,-0.022,0.125
84
- TabPFN-TS,Sundial-Base,0.5,0.3,0.7,0.022,-0.044,0.083
85
- TabPFN-TS,Stat. Ensemble,1.0,1.0,1.0,0.327,0.226,0.427
86
- TabPFN-TS,AutoARIMA,1.0,1.0,1.0,0.335,0.224,0.447
87
- TabPFN-TS,AutoETS,0.95,0.85,1.0,0.398,0.258,0.535
88
- TabPFN-TS,AutoTheta,0.95,0.85,1.0,0.35,0.246,0.448
89
- TabPFN-TS,Naive,1.0,1.0,1.0,0.405,0.295,0.51
90
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.432,0.311,0.55
91
- TabPFN-TS,Drift,1.0,1.0,1.0,0.442,0.337,0.534
92
- Chronos-Bolt,Chronos-2,0.0,0.0,0.0,-0.216,-0.349,-0.115
93
- Chronos-Bolt,Toto-1.0,0.05,0.0,0.15,-0.137,-0.223,-0.056
94
- Chronos-Bolt,TimesFM-2.5,0.1,0.0,0.25,-0.118,-0.174,-0.069
95
- Chronos-Bolt,TiRex,0.15,0.0,0.35,-0.083,-0.141,-0.031
96
- Chronos-Bolt,Moirai-2.0,0.25,0.1,0.45,-0.051,-0.106,-0.003
97
- Chronos-Bolt,TabPFN-TS,0.4,0.2,0.6,-0.059,-0.143,0.022
98
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
99
- Chronos-Bolt,Sundial-Base,0.6,0.4,0.8,-0.036,-0.131,0.028
100
- Chronos-Bolt,Stat. Ensemble,1.0,1.0,1.0,0.288,0.207,0.384
101
- Chronos-Bolt,AutoARIMA,1.0,1.0,1.0,0.296,0.204,0.41
102
- Chronos-Bolt,AutoETS,1.0,1.0,1.0,0.363,0.23,0.509
103
- Chronos-Bolt,AutoTheta,1.0,1.0,1.0,0.312,0.234,0.407
104
- Chronos-Bolt,Naive,0.95,0.85,1.0,0.371,0.282,0.464
105
- Chronos-Bolt,Seasonal Naive,1.0,1.0,1.0,0.399,0.295,0.512
106
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.409,0.333,0.49
107
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.174,-0.247,-0.11
108
- Sundial-Base,Toto-1.0,0.1,0.0,0.25,-0.097,-0.196,0.011
109
- Sundial-Base,TimesFM-2.5,0.1,0.0,0.25,-0.08,-0.126,-0.024
110
- Sundial-Base,TiRex,0.15,0.05,0.3,-0.046,-0.126,0.059
111
- Sundial-Base,Moirai-2.0,0.25,0.1,0.45,-0.014,-0.073,0.069
112
- Sundial-Base,TabPFN-TS,0.5,0.3,0.7,-0.022,-0.09,0.042
113
- Sundial-Base,Chronos-Bolt,0.4,0.2,0.6,0.034,-0.029,0.116
114
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
115
- Sundial-Base,Stat. Ensemble,1.0,1.0,1.0,0.313,0.227,0.408
116
- Sundial-Base,AutoARIMA,1.0,1.0,1.0,0.32,0.224,0.423
117
- Sundial-Base,AutoETS,1.0,1.0,1.0,0.385,0.251,0.518
118
- Sundial-Base,AutoTheta,1.0,1.0,1.0,0.336,0.245,0.426
119
- Sundial-Base,Naive,1.0,1.0,1.0,0.392,0.306,0.478
120
- Sundial-Base,Seasonal Naive,1.0,1.0,1.0,0.42,0.312,0.53
121
- Sundial-Base,Drift,1.0,1.0,1.0,0.43,0.347,0.507
122
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-0.708,-1.042,-0.469
123
- Stat. Ensemble,Toto-1.0,0.0,0.0,0.0,-0.596,-0.898,-0.404
124
- Stat. Ensemble,TimesFM-2.5,0.0,0.0,0.0,-0.571,-0.828,-0.398
125
- Stat. Ensemble,TiRex,0.05,0.0,0.15,-0.522,-0.746,-0.348
126
- Stat. Ensemble,Moirai-2.0,0.0,0.0,0.0,-0.476,-0.699,-0.314
127
- Stat. Ensemble,TabPFN-TS,0.0,0.0,0.0,-0.487,-0.746,-0.292
128
- Stat. Ensemble,Chronos-Bolt,0.0,0.0,0.0,-0.405,-0.624,-0.261
129
- Stat. Ensemble,Sundial-Base,0.0,0.0,0.0,-0.455,-0.688,-0.293
130
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
131
- Stat. Ensemble,AutoARIMA,0.525,0.325,0.725,0.011,-0.045,0.063
132
- Stat. Ensemble,AutoETS,0.725,0.525,0.9,0.105,-0.033,0.285
133
- Stat. Ensemble,AutoTheta,0.85,0.7,1.0,0.033,-0.007,0.072
134
- Stat. Ensemble,Naive,0.7,0.5,0.9,0.116,0.037,0.193
135
- Stat. Ensemble,Seasonal Naive,0.875,0.75,0.975,0.156,0.072,0.236
136
- Stat. Ensemble,Drift,0.8,0.6,0.95,0.17,0.096,0.237
137
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.727,-1.133,-0.453
138
- AutoARIMA,Toto-1.0,0.0,0.0,0.0,-0.614,-0.93,-0.408
139
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.588,-0.895,-0.389
140
- AutoARIMA,TiRex,0.0,0.0,0.0,-0.539,-0.797,-0.361
141
- AutoARIMA,Moirai-2.0,0.0,0.0,0.0,-0.492,-0.744,-0.323
142
- AutoARIMA,TabPFN-TS,0.0,0.0,0.0,-0.503,-0.808,-0.289
143
- AutoARIMA,Chronos-Bolt,0.0,0.0,0.0,-0.42,-0.694,-0.256
144
- AutoARIMA,Sundial-Base,0.0,0.0,0.0,-0.471,-0.734,-0.288
145
- AutoARIMA,Stat. Ensemble,0.475,0.275,0.675,-0.011,-0.067,0.043
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,AutoETS,0.625,0.425,0.825,0.095,-0.068,0.276
148
- AutoARIMA,AutoTheta,0.65,0.4,0.85,0.023,-0.039,0.083
149
- AutoARIMA,Naive,0.6,0.399,0.8,0.106,-0.005,0.204
150
- AutoARIMA,Seasonal Naive,0.875,0.75,0.975,0.146,0.061,0.226
151
- AutoARIMA,Drift,0.75,0.55,0.95,0.161,0.062,0.25
152
- AutoETS,Chronos-2,0.0,0.0,0.0,-0.909,-1.495,-0.517
153
- AutoETS,Toto-1.0,0.0,0.0,0.0,-0.784,-1.345,-0.461
154
- AutoETS,TimesFM-2.5,0.0,0.0,0.0,-0.755,-1.264,-0.444
155
- AutoETS,TiRex,0.05,0.0,0.15,-0.7,-1.21,-0.394
156
- AutoETS,Moirai-2.0,0.0,0.0,0.0,-0.649,-1.141,-0.36
157
- AutoETS,TabPFN-TS,0.05,0.0,0.15,-0.662,-1.151,-0.347
158
- AutoETS,Chronos-Bolt,0.0,0.0,0.0,-0.57,-1.035,-0.299
159
- AutoETS,Sundial-Base,0.0,0.0,0.0,-0.626,-1.074,-0.334
160
- AutoETS,Stat. Ensemble,0.275,0.1,0.475,-0.117,-0.399,0.032
161
- AutoETS,AutoARIMA,0.375,0.175,0.575,-0.105,-0.381,0.063
162
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoETS,AutoTheta,0.6,0.4,0.8,-0.08,-0.32,0.051
164
- AutoETS,Naive,0.5,0.3,0.7,0.012,-0.2,0.142
165
- AutoETS,Seasonal Naive,0.725,0.55,0.9,0.057,-0.196,0.202
166
- AutoETS,Drift,0.75,0.55,0.9,0.073,-0.137,0.197
167
- AutoTheta,Chronos-2,0.0,0.0,0.0,-0.767,-1.135,-0.5
168
- AutoTheta,Toto-1.0,0.0,0.0,0.0,-0.652,-0.955,-0.458
169
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-0.625,-0.898,-0.434
170
- AutoTheta,TiRex,0.0,0.0,0.0,-0.574,-0.817,-0.401
171
- AutoTheta,Moirai-2.0,0.0,0.0,0.0,-0.527,-0.77,-0.366
172
- AutoTheta,TabPFN-TS,0.05,0.0,0.15,-0.538,-0.812,-0.326
173
- AutoTheta,Chronos-Bolt,0.0,0.0,0.0,-0.453,-0.687,-0.305
174
- AutoTheta,Sundial-Base,0.0,0.0,0.0,-0.505,-0.741,-0.324
175
- AutoTheta,Stat. Ensemble,0.15,0.0,0.3,-0.035,-0.077,0.007
176
- AutoTheta,AutoARIMA,0.35,0.15,0.6,-0.023,-0.09,0.037
177
- AutoTheta,AutoETS,0.4,0.2,0.6,0.074,-0.054,0.242
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,Naive,0.7,0.5,0.9,0.085,0.008,0.16
180
- AutoTheta,Seasonal Naive,0.8,0.65,0.95,0.127,0.043,0.209
181
- AutoTheta,Drift,0.85,0.7,1.0,0.142,0.077,0.202
182
- Naive,Chronos-2,0.0,0.0,0.0,-0.932,-1.338,-0.617
183
- Naive,Toto-1.0,0.0,0.0,0.0,-0.806,-1.198,-0.545
184
- Naive,TimesFM-2.5,0.0,0.0,0.0,-0.777,-1.112,-0.538
185
- Naive,TiRex,0.05,0.0,0.15,-0.721,-1.045,-0.472
186
- Naive,Moirai-2.0,0.05,0.0,0.15,-0.669,-0.964,-0.452
187
- Naive,TabPFN-TS,0.0,0.0,0.0,-0.682,-1.039,-0.417
188
- Naive,Chronos-Bolt,0.05,0.0,0.15,-0.589,-0.866,-0.392
189
- Naive,Sundial-Base,0.0,0.0,0.0,-0.646,-0.915,-0.441
190
- Naive,Stat. Ensemble,0.3,0.1,0.5,-0.131,-0.239,-0.038
191
- Naive,AutoARIMA,0.4,0.2,0.601,-0.119,-0.256,0.005
192
- Naive,AutoETS,0.5,0.3,0.7,-0.012,-0.166,0.167
193
- Naive,AutoTheta,0.3,0.1,0.5,-0.093,-0.19,-0.008
194
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Naive,Seasonal Naive,0.6,0.425,0.775,0.045,-0.064,0.152
196
- Naive,Drift,1.0,1.0,1.0,0.061,0.032,0.1
197
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-1.023,-1.609,-0.634
198
- Seasonal Naive,Toto-1.0,0.0,0.0,0.0,-0.891,-1.385,-0.588
199
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-0.86,-1.313,-0.572
200
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.802,-1.198,-0.539
201
- Seasonal Naive,Moirai-2.0,0.0,0.0,0.0,-0.748,-1.138,-0.499
202
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.761,-1.22,-0.452
203
- Seasonal Naive,Chronos-Bolt,0.0,0.0,0.0,-0.664,-1.049,-0.418
204
- Seasonal Naive,Sundial-Base,0.0,0.0,0.0,-0.723,-1.13,-0.453
205
- Seasonal Naive,Stat. Ensemble,0.125,0.025,0.25,-0.185,-0.309,-0.078
206
- Seasonal Naive,AutoARIMA,0.125,0.025,0.25,-0.172,-0.293,-0.065
207
- Seasonal Naive,AutoETS,0.275,0.1,0.45,-0.06,-0.252,0.164
208
- Seasonal Naive,AutoTheta,0.2,0.05,0.35,-0.145,-0.264,-0.045
209
- Seasonal Naive,Naive,0.4,0.225,0.575,-0.047,-0.179,0.06
210
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Seasonal Naive,Drift,0.6,0.35,0.8,0.017,-0.096,0.115
212
- Drift,Chronos-2,0.0,0.0,0.0,-1.059,-1.492,-0.723
213
- Drift,Toto-1.0,0.0,0.0,0.0,-0.924,-1.3,-0.659
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-0.893,-1.214,-0.641
215
- Drift,TiRex,0.0,0.0,0.0,-0.834,-1.143,-0.606
216
- Drift,Moirai-2.0,0.0,0.0,0.0,-0.779,-1.054,-0.562
217
- Drift,TabPFN-TS,0.0,0.0,0.0,-0.792,-1.147,-0.508
218
- Drift,Chronos-Bolt,0.0,0.0,0.0,-0.693,-0.96,-0.5
219
- Drift,Sundial-Base,0.0,0.0,0.0,-0.753,-1.028,-0.532
220
- Drift,Stat. Ensemble,0.2,0.05,0.4,-0.205,-0.311,-0.106
221
- Drift,AutoARIMA,0.25,0.05,0.45,-0.192,-0.333,-0.066
222
- Drift,AutoETS,0.25,0.1,0.45,-0.079,-0.245,0.12
223
- Drift,AutoTheta,0.15,0.0,0.3,-0.165,-0.253,-0.083
224
- Drift,Naive,0.0,0.0,0.0,-0.065,-0.111,-0.033
225
- Drift,Seasonal Naive,0.4,0.2,0.65,-0.018,-0.129,0.088
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_cloud/pairwise_WQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,Toto-1.0,0.35,0.15,0.55,0.07,-0.014,0.185
4
- Chronos-2,TimesFM-2.5,0.75,0.55,0.9,0.088,0.033,0.155
5
- Chronos-2,TiRex,0.85,0.7,1.0,0.113,0.032,0.222
6
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.151,0.066,0.248
7
- Chronos-2,TabPFN-TS,1.0,1.0,1.0,0.19,0.134,0.247
8
- Chronos-2,Chronos-Bolt,1.0,1.0,1.0,0.21,0.134,0.3
9
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.19,0.145,0.236
10
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.504,0.412,0.597
11
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.582,0.505,0.655
12
- Chronos-2,AutoETS,1.0,1.0,1.0,0.73,0.573,0.855
13
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.643,0.566,0.707
14
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.669,0.582,0.746
15
- Chronos-2,Naive,1.0,1.0,1.0,0.8,0.757,0.837
16
- Chronos-2,Drift,1.0,1.0,1.0,0.808,0.765,0.845
17
- Toto-1.0,Chronos-2,0.65,0.45,0.85,-0.075,-0.227,0.013
18
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
19
- Toto-1.0,TimesFM-2.5,0.75,0.55,0.95,0.019,-0.047,0.073
20
- Toto-1.0,TiRex,0.75,0.55,0.9,0.046,0.0,0.095
21
- Toto-1.0,Moirai-2.0,0.85,0.7,1.0,0.087,0.03,0.145
22
- Toto-1.0,TabPFN-TS,0.8,0.6,0.95,0.129,0.024,0.218
23
- Toto-1.0,Chronos-Bolt,0.95,0.85,1.0,0.151,0.089,0.21
24
- Toto-1.0,Sundial-Base,0.9,0.75,1.0,0.129,0.024,0.201
25
- Toto-1.0,AutoARIMA,1.0,1.0,1.0,0.467,0.398,0.544
26
- Toto-1.0,Stat. Ensemble,1.0,1.0,1.0,0.551,0.473,0.628
27
- Toto-1.0,AutoETS,1.0,1.0,1.0,0.709,0.535,0.843
28
- Toto-1.0,AutoTheta,1.0,1.0,1.0,0.616,0.541,0.683
29
- Toto-1.0,Seasonal Naive,1.0,1.0,1.0,0.645,0.562,0.725
30
- Toto-1.0,Naive,1.0,1.0,1.0,0.785,0.726,0.829
31
- Toto-1.0,Drift,1.0,1.0,1.0,0.793,0.736,0.838
32
- TimesFM-2.5,Chronos-2,0.25,0.1,0.45,-0.097,-0.183,-0.034
33
- TimesFM-2.5,Toto-1.0,0.25,0.05,0.45,-0.02,-0.079,0.045
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,TiRex,0.6,0.4,0.8,0.027,-0.018,0.086
36
- TimesFM-2.5,Moirai-2.0,0.85,0.65,1.0,0.069,0.028,0.116
37
- TimesFM-2.5,TabPFN-TS,0.7,0.5,0.9,0.112,0.044,0.188
38
- TimesFM-2.5,Chronos-Bolt,0.9,0.75,1.0,0.134,0.092,0.179
39
- TimesFM-2.5,Sundial-Base,0.95,0.85,1.0,0.111,0.06,0.148
40
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.456,0.384,0.539
41
- TimesFM-2.5,Stat. Ensemble,1.0,1.0,1.0,0.542,0.468,0.619
42
- TimesFM-2.5,AutoETS,1.0,1.0,1.0,0.704,0.532,0.841
43
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.609,0.535,0.678
44
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.638,0.557,0.714
45
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.78,0.727,0.825
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.789,0.739,0.832
47
- TiRex,Chronos-2,0.15,0.0,0.3,-0.127,-0.285,-0.033
48
- TiRex,Toto-1.0,0.25,0.1,0.45,-0.048,-0.105,-0.0
49
- TiRex,TimesFM-2.5,0.4,0.2,0.6,-0.028,-0.095,0.018
50
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
51
- TiRex,Moirai-2.0,0.9,0.75,1.0,0.043,0.019,0.07
52
- TiRex,TabPFN-TS,0.75,0.55,0.95,0.087,-0.014,0.174
53
- TiRex,Chronos-Bolt,0.9,0.75,1.0,0.11,0.064,0.155
54
- TiRex,Sundial-Base,0.95,0.85,1.0,0.087,-0.025,0.153
55
- TiRex,AutoARIMA,1.0,1.0,1.0,0.441,0.376,0.512
56
- TiRex,Stat. Ensemble,1.0,1.0,1.0,0.529,0.452,0.602
57
- TiRex,AutoETS,1.0,1.0,1.0,0.697,0.514,0.839
58
- TiRex,AutoTheta,1.0,1.0,1.0,0.598,0.521,0.663
59
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.627,0.54,0.71
60
- TiRex,Naive,1.0,1.0,1.0,0.774,0.712,0.821
61
- TiRex,Drift,1.0,1.0,1.0,0.783,0.724,0.829
62
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.177,-0.33,-0.071
63
- Moirai-2.0,Toto-1.0,0.15,0.0,0.3,-0.095,-0.169,-0.031
64
- Moirai-2.0,TimesFM-2.5,0.15,0.0,0.35,-0.074,-0.131,-0.029
65
- Moirai-2.0,TiRex,0.1,0.0,0.25,-0.044,-0.075,-0.02
66
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
67
- Moirai-2.0,TabPFN-TS,0.5,0.3,0.7,0.047,-0.055,0.137
68
- Moirai-2.0,Chronos-Bolt,0.85,0.699,0.975,0.07,0.023,0.118
69
- Moirai-2.0,Sundial-Base,0.9,0.75,1.0,0.046,-0.056,0.104
70
- Moirai-2.0,AutoARIMA,1.0,1.0,1.0,0.416,0.347,0.496
71
- Moirai-2.0,Stat. Ensemble,1.0,1.0,1.0,0.508,0.423,0.589
72
- Moirai-2.0,AutoETS,1.0,1.0,1.0,0.685,0.494,0.835
73
- Moirai-2.0,AutoTheta,1.0,1.0,1.0,0.58,0.498,0.651
74
- Moirai-2.0,Seasonal Naive,1.0,1.0,1.0,0.611,0.523,0.693
75
- Moirai-2.0,Naive,1.0,1.0,1.0,0.764,0.7,0.813
76
- Moirai-2.0,Drift,1.0,1.0,1.0,0.773,0.715,0.821
77
- TabPFN-TS,Chronos-2,0.0,0.0,0.0,-0.235,-0.329,-0.154
78
- TabPFN-TS,Toto-1.0,0.2,0.05,0.4,-0.149,-0.279,-0.025
79
- TabPFN-TS,TimesFM-2.5,0.3,0.1,0.5,-0.126,-0.232,-0.046
80
- TabPFN-TS,TiRex,0.25,0.05,0.45,-0.096,-0.211,0.014
81
- TabPFN-TS,Moirai-2.0,0.5,0.3,0.7,-0.049,-0.159,0.053
82
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
83
- TabPFN-TS,Chronos-Bolt,0.5,0.3,0.7,0.024,-0.094,0.121
84
- TabPFN-TS,Sundial-Base,0.5,0.3,0.7,-0.001,-0.093,0.08
85
- TabPFN-TS,AutoARIMA,0.95,0.85,1.0,0.387,0.274,0.491
86
- TabPFN-TS,Stat. Ensemble,0.95,0.85,1.0,0.484,0.39,0.568
87
- TabPFN-TS,AutoETS,0.95,0.85,1.0,0.676,0.465,0.829
88
- TabPFN-TS,AutoTheta,1.0,1.0,1.0,0.559,0.468,0.641
89
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.592,0.483,0.685
90
- TabPFN-TS,Naive,1.0,1.0,1.0,0.753,0.698,0.798
91
- TabPFN-TS,Drift,1.0,1.0,1.0,0.762,0.711,0.806
92
- Chronos-Bolt,Chronos-2,0.0,0.0,0.0,-0.266,-0.429,-0.154
93
- Chronos-Bolt,Toto-1.0,0.05,0.0,0.15,-0.177,-0.266,-0.097
94
- Chronos-Bolt,TimesFM-2.5,0.1,0.0,0.25,-0.154,-0.218,-0.101
95
- Chronos-Bolt,TiRex,0.1,0.0,0.25,-0.123,-0.183,-0.069
96
- Chronos-Bolt,Moirai-2.0,0.15,0.025,0.301,-0.075,-0.134,-0.024
97
- Chronos-Bolt,TabPFN-TS,0.5,0.3,0.7,-0.025,-0.137,0.086
98
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
99
- Chronos-Bolt,Sundial-Base,0.55,0.35,0.75,-0.026,-0.135,0.046
100
- Chronos-Bolt,AutoARIMA,1.0,1.0,1.0,0.372,0.287,0.466
101
- Chronos-Bolt,Stat. Ensemble,1.0,1.0,1.0,0.471,0.385,0.561
102
- Chronos-Bolt,AutoETS,1.0,1.0,1.0,0.664,0.456,0.823
103
- Chronos-Bolt,AutoTheta,1.0,1.0,1.0,0.548,0.464,0.625
104
- Chronos-Bolt,Seasonal Naive,1.0,1.0,1.0,0.582,0.486,0.675
105
- Chronos-Bolt,Naive,1.0,1.0,1.0,0.746,0.682,0.796
106
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.756,0.698,0.805
107
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.234,-0.309,-0.169
108
- Sundial-Base,Toto-1.0,0.1,0.0,0.25,-0.148,-0.251,-0.025
109
- Sundial-Base,TimesFM-2.5,0.05,0.0,0.15,-0.125,-0.173,-0.063
110
- Sundial-Base,TiRex,0.05,0.0,0.15,-0.095,-0.18,0.025
111
- Sundial-Base,Moirai-2.0,0.1,0.0,0.25,-0.048,-0.116,0.053
112
- Sundial-Base,TabPFN-TS,0.5,0.3,0.7,0.001,-0.086,0.085
113
- Sundial-Base,Chronos-Bolt,0.45,0.25,0.65,0.025,-0.048,0.119
114
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
115
- Sundial-Base,AutoARIMA,1.0,1.0,1.0,0.388,0.302,0.48
116
- Sundial-Base,Stat. Ensemble,1.0,1.0,1.0,0.485,0.404,0.566
117
- Sundial-Base,AutoETS,1.0,1.0,1.0,0.672,0.475,0.826
118
- Sundial-Base,AutoTheta,1.0,1.0,1.0,0.56,0.48,0.633
119
- Sundial-Base,Seasonal Naive,1.0,1.0,1.0,0.592,0.504,0.674
120
- Sundial-Base,Naive,1.0,1.0,1.0,0.753,0.702,0.798
121
- Sundial-Base,Drift,1.0,1.0,1.0,0.763,0.715,0.808
122
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-1.016,-1.48,-0.701
123
- AutoARIMA,Toto-1.0,0.0,0.0,0.0,-0.875,-1.195,-0.66
124
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.839,-1.168,-0.623
125
- AutoARIMA,TiRex,0.0,0.0,0.0,-0.789,-1.05,-0.601
126
- AutoARIMA,Moirai-2.0,0.0,0.0,0.0,-0.713,-0.984,-0.532
127
- AutoARIMA,TabPFN-TS,0.05,0.0,0.15,-0.632,-0.965,-0.378
128
- AutoARIMA,Chronos-Bolt,0.0,0.0,0.0,-0.593,-0.873,-0.403
129
- AutoARIMA,Sundial-Base,0.0,0.0,0.0,-0.634,-0.922,-0.434
130
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
131
- AutoARIMA,Stat. Ensemble,0.775,0.6,0.925,0.158,0.066,0.258
132
- AutoARIMA,AutoETS,0.775,0.6,0.95,0.493,0.136,0.747
133
- AutoARIMA,AutoTheta,0.8,0.6,0.95,0.281,0.178,0.381
134
- AutoARIMA,Seasonal Naive,0.875,0.75,0.975,0.334,0.206,0.456
135
- AutoARIMA,Naive,0.9,0.75,1.0,0.596,0.479,0.687
136
- AutoARIMA,Drift,0.9,0.75,1.0,0.612,0.496,0.7
137
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-1.395,-1.901,-1.02
138
- Stat. Ensemble,Toto-1.0,0.0,0.0,0.0,-1.227,-1.689,-0.897
139
- Stat. Ensemble,TimesFM-2.5,0.0,0.0,0.0,-1.184,-1.624,-0.88
140
- Stat. Ensemble,TiRex,0.0,0.0,0.0,-1.125,-1.511,-0.826
141
- Stat. Ensemble,Moirai-2.0,0.0,0.0,0.0,-1.034,-1.432,-0.734
142
- Stat. Ensemble,TabPFN-TS,0.05,0.0,0.15,-0.939,-1.317,-0.639
143
- Stat. Ensemble,Chronos-Bolt,0.0,0.0,0.0,-0.892,-1.278,-0.626
144
- Stat. Ensemble,Sundial-Base,0.0,0.0,0.0,-0.941,-1.304,-0.678
145
- Stat. Ensemble,AutoARIMA,0.225,0.075,0.4,-0.188,-0.348,-0.07
146
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
147
- Stat. Ensemble,AutoETS,0.725,0.55,0.9,0.432,0.004,0.725
148
- Stat. Ensemble,AutoTheta,0.9,0.75,1.0,0.146,0.088,0.213
149
- Stat. Ensemble,Seasonal Naive,0.725,0.525,0.9,0.208,-0.0,0.366
150
- Stat. Ensemble,Naive,0.95,0.85,1.0,0.52,0.395,0.624
151
- Stat. Ensemble,Drift,1.0,1.0,1.0,0.539,0.421,0.64
152
- AutoETS,Chronos-2,0.0,0.0,0.0,-2.71,-5.874,-1.34
153
- AutoETS,Toto-1.0,0.0,0.0,0.0,-2.437,-5.367,-1.149
154
- AutoETS,TimesFM-2.5,0.0,0.0,0.0,-2.382,-5.287,-1.137
155
- AutoETS,TiRex,0.0,0.0,0.0,-2.298,-5.196,-1.056
156
- AutoETS,Moirai-2.0,0.0,0.0,0.0,-2.171,-5.048,-0.977
157
- AutoETS,TabPFN-TS,0.05,0.0,0.15,-2.09,-4.847,-0.87
158
- AutoETS,Chronos-Bolt,0.0,0.0,0.0,-1.974,-4.644,-0.837
159
- AutoETS,Sundial-Base,0.0,0.0,0.0,-2.046,-4.752,-0.906
160
- AutoETS,AutoARIMA,0.225,0.05,0.4,-0.971,-2.951,-0.157
161
- AutoETS,Stat. Ensemble,0.275,0.1,0.45,-0.759,-2.642,-0.004
162
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoETS,AutoTheta,0.65,0.45,0.85,-0.532,-2.194,0.136
164
- AutoETS,Seasonal Naive,0.725,0.525,0.875,-0.4,-1.897,0.22
165
- AutoETS,Naive,0.75,0.55,0.95,0.097,-1.154,0.533
166
- AutoETS,Drift,0.8,0.6,0.95,0.128,-1.084,0.55
167
- AutoTheta,Chronos-2,0.0,0.0,0.0,-1.802,-2.419,-1.302
168
- AutoTheta,Toto-1.0,0.0,0.0,0.0,-1.606,-2.154,-1.18
169
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-1.556,-2.107,-1.153
170
- AutoTheta,TiRex,0.0,0.0,0.0,-1.487,-1.971,-1.087
171
- AutoTheta,Moirai-2.0,0.0,0.0,0.0,-1.381,-1.863,-0.992
172
- AutoTheta,TabPFN-TS,0.0,0.0,0.0,-1.269,-1.786,-0.878
173
- AutoTheta,Chronos-Bolt,0.0,0.0,0.0,-1.214,-1.668,-0.867
174
- AutoTheta,Sundial-Base,0.0,0.0,0.0,-1.271,-1.728,-0.922
175
- AutoTheta,AutoARIMA,0.2,0.05,0.4,-0.39,-0.615,-0.216
176
- AutoTheta,Stat. Ensemble,0.1,0.0,0.25,-0.17,-0.271,-0.097
177
- AutoTheta,AutoETS,0.35,0.15,0.55,0.347,-0.157,0.687
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,Seasonal Naive,0.7,0.5,0.9,0.074,-0.208,0.273
180
- AutoTheta,Naive,0.85,0.65,1.0,0.438,0.308,0.555
181
- AutoTheta,Drift,0.9,0.75,1.0,0.461,0.336,0.574
182
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-2.025,-2.936,-1.394
183
- Seasonal Naive,Toto-1.0,0.0,0.0,0.0,-1.813,-2.641,-1.285
184
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-1.759,-2.495,-1.259
185
- Seasonal Naive,TiRex,0.0,0.0,0.0,-1.684,-2.448,-1.175
186
- Seasonal Naive,Moirai-2.0,0.0,0.0,0.0,-1.57,-2.259,-1.095
187
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-1.449,-2.176,-0.936
188
- Seasonal Naive,Chronos-Bolt,0.0,0.0,0.0,-1.39,-2.073,-0.945
189
- Seasonal Naive,Sundial-Base,0.0,0.0,0.0,-1.452,-2.065,-1.017
190
- Seasonal Naive,AutoARIMA,0.125,0.025,0.25,-0.5,-0.839,-0.259
191
- Seasonal Naive,Stat. Ensemble,0.275,0.1,0.475,-0.263,-0.577,0.0
192
- Seasonal Naive,AutoETS,0.275,0.125,0.475,0.286,-0.282,0.655
193
- Seasonal Naive,AutoTheta,0.3,0.1,0.5,-0.079,-0.376,0.172
194
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Seasonal Naive,Naive,0.7,0.525,0.85,0.394,0.19,0.555
196
- Seasonal Naive,Drift,0.85,0.65,1.0,0.418,0.219,0.575
197
- Naive,Chronos-2,0.0,0.0,0.0,-3.991,-5.118,-3.118
198
- Naive,Toto-1.0,0.0,0.0,0.0,-3.642,-4.863,-2.647
199
- Naive,TimesFM-2.5,0.0,0.0,0.0,-3.552,-4.721,-2.668
200
- Naive,TiRex,0.0,0.0,0.0,-3.428,-4.591,-2.469
201
- Naive,Moirai-2.0,0.0,0.0,0.0,-3.24,-4.339,-2.329
202
- Naive,TabPFN-TS,0.0,0.0,0.0,-3.041,-3.961,-2.314
203
- Naive,Chronos-Bolt,0.0,0.0,0.0,-2.943,-3.911,-2.141
204
- Naive,Sundial-Base,0.0,0.0,0.0,-3.045,-3.943,-2.361
205
- Naive,AutoARIMA,0.1,0.0,0.25,-1.475,-2.196,-0.918
206
- Naive,Stat. Ensemble,0.05,0.0,0.15,-1.084,-1.66,-0.654
207
- Naive,AutoETS,0.25,0.05,0.45,-0.107,-1.141,0.536
208
- Naive,AutoTheta,0.15,0.0,0.35,-0.781,-1.246,-0.445
209
- Naive,Seasonal Naive,0.3,0.15,0.475,-0.65,-1.249,-0.234
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Drift,1.0,1.0,1.0,0.039,0.023,0.06
212
- Drift,Chronos-2,0.0,0.0,0.0,-4.196,-5.431,-3.251
213
- Drift,Toto-1.0,0.0,0.0,0.0,-3.832,-5.185,-2.788
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-3.738,-4.957,-2.832
215
- Drift,TiRex,0.0,0.0,0.0,-3.61,-4.86,-2.621
216
- Drift,Moirai-2.0,0.0,0.0,0.0,-3.414,-4.579,-2.506
217
- Drift,TabPFN-TS,0.0,0.0,0.0,-3.207,-4.157,-2.457
218
- Drift,Chronos-Bolt,0.0,0.0,0.0,-3.105,-4.13,-2.31
219
- Drift,Sundial-Base,0.0,0.0,0.0,-3.211,-4.217,-2.506
220
- Drift,AutoARIMA,0.1,0.0,0.25,-1.577,-2.334,-0.985
221
- Drift,Stat. Ensemble,0.0,0.0,0.0,-1.17,-1.781,-0.726
222
- Drift,AutoETS,0.2,0.05,0.4,-0.147,-1.224,0.52
223
- Drift,AutoTheta,0.1,0.0,0.25,-0.854,-1.347,-0.507
224
- Drift,Seasonal Naive,0.15,0.0,0.35,-0.717,-1.355,-0.28
225
- Drift,Naive,0.0,0.0,0.0,-0.041,-0.064,-0.024
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/leaderboard_MASE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Stat. Ensemble,86.30952380952381,38.477174892111854,0.0,74.63583592838711,0.0,0.0
3
- TiRex,75.5952380952381,36.87592043229662,0.0,0.1577310358200435,0.0,0.0
4
- Chronos-2,72.61904761904762,36.307128150625054,0.0,0.14296296275774828,0.0,0.0
5
- AutoETS,70.23809523809524,35.90887063284529,0.0,1.6644653908191018,0.0,0.0
6
- Toto-1.0,62.5,32.38399314015735,0.0,4.9976272313807435,0.16666666666666666,0.0
7
- AutoTheta,55.952380952380956,32.52926187728524,0.0,0.9511661320240354,0.0,0.0
8
- AutoARIMA,55.35714285714286,33.893658153503736,0.0,6.514016621697861,0.0,0.0
9
- Drift,54.76190476190476,29.639454010750132,0.0,0.41642842294117643,0.0,0.0
10
- TimesFM-2.5,50.5952380952381,32.83344752168508,0.0,0.3182864435592606,0.16666666666666666,0.0
11
- TabPFN-TS,44.04761904761905,28.784283550853363,0.0,19.382406450060035,0.0,0.0
12
- Chronos-Bolt,40.476190476190474,30.133730037212626,0.0,0.1589436945079739,0.0,0.0
13
- Moirai-2.0,39.285714285714285,28.236498726371508,0.0,0.2231920597172889,0.16666666666666666,0.0
14
- Sundial-Base,21.428571428571434,19.709514878708077,0.0,8.00438149490214,0.0,0.0
15
- Naive,13.69047619047619,15.593612409564518,0.0,0.43518170598039213,0.0,0.0
16
- Seasonal Naive,7.142857142857142,0.0,0.0,0.4101321087745098,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/leaderboard_SQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- TiRex,86.30952380952381,38.167872268859085,0.0,0.1577310358200435,0.0,0.0
3
- Chronos-2,85.11904761904762,37.73601239308477,0.0,0.14296296275774828,0.0,0.0
4
- Stat. Ensemble,78.57142857142858,37.11985318962454,0.0,74.63583592838711,0.0,0.0
5
- Toto-1.0,65.47619047619048,33.46942523050611,0.0,4.9976272313807435,0.16666666666666666,0.0
6
- AutoETS,62.5,29.185864234223647,0.0,1.6644653908191018,0.0,0.0
7
- TimesFM-2.5,54.761904761904766,32.77207032704751,0.0,0.3182864435592606,0.16666666666666666,0.0
8
- AutoARIMA,50.5952380952381,31.721452512923896,0.0,6.514016621697861,0.0,0.0
9
- TabPFN-TS,50.000000000000014,31.399100116246935,0.0,19.382406450060035,0.0,0.0
10
- Chronos-Bolt,50.0,31.818155351289157,0.0,0.1589436945079739,0.0,0.0
11
- Drift,43.45238095238095,25.58270358587049,0.0,0.41642842294117643,0.0,0.0
12
- Moirai-2.0,43.45238095238095,29.079628772649933,0.0,0.2231920597172889,0.16666666666666666,0.0
13
- AutoTheta,42.26190476190476,29.618968091908858,0.0,0.9511661320240354,0.0,0.0
14
- Sundial-Base,16.666666666666668,14.51026077809361,0.0,8.00438149490214,0.0,0.0
15
- Naive,13.69047619047619,11.510639986489046,0.0,0.43518170598039213,0.0,0.0
16
- Seasonal Naive,7.142857142857142,0.0,0.0,0.4101321087745098,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/leaderboard_WAPE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- TiRex,75.5952380952381,37.758720420324885,0.0,0.1577310358200435,0.0,0.0
3
- Stat. Ensemble,72.02380952380952,38.17771635240618,0.0,74.63583592838711,0.0,0.0
4
- Chronos-2,67.26190476190477,36.55873563764085,0.0,0.14296296275774828,0.0,0.0
5
- Toto-1.0,66.66666666666667,33.97027771884091,0.0,4.9976272313807435,0.16666666666666666,0.0
6
- TimesFM-2.5,63.09523809523808,35.892561173334556,0.0,0.3182864435592606,0.16666666666666666,0.0
7
- AutoETS,58.92857142857143,34.84116701303258,0.0,1.6644653908191018,0.0,0.0
8
- TabPFN-TS,58.333333333333336,34.28532719607597,0.0,19.382406450060035,0.0,0.0
9
- AutoARIMA,50.5952380952381,35.73217052389407,0.0,6.514016621697861,0.0,0.0
10
- Chronos-Bolt,49.404761904761905,30.47814442259772,0.0,0.1589436945079739,0.0,0.0
11
- Drift,48.214285714285715,28.435825322795704,0.0,0.41642842294117643,0.0,0.0
12
- Moirai-2.0,47.023809523809526,28.27751413794253,0.0,0.2231920597172889,0.16666666666666666,0.0
13
- AutoTheta,45.83333333333333,31.81292365948549,0.0,0.9511661320240354,0.0,0.0
14
- Naive,20.833333333333332,15.872311480991785,0.0,0.43518170598039213,0.0,0.0
15
- Sundial-Base,17.261904761904763,17.737236553561875,0.0,8.00438149490214,0.0,0.0
16
- Seasonal Naive,8.928571428571427,0.0,0.0,0.4101321087745098,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/leaderboard_WQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- TiRex,83.33333333333334,42.09146585989647,0.0,0.1577310358200435,0.0,0.0
3
- Chronos-2,76.78571428571426,40.960757226189024,0.0,0.14296296275774828,0.0,0.0
4
- Toto-1.0,70.83333333333334,38.07263005492061,0.0,4.9976272313807435,0.16666666666666666,0.0
5
- TimesFM-2.5,66.07142857142857,39.26649292250398,0.0,0.3182864435592606,0.16666666666666666,0.0
6
- TabPFN-TS,62.5,38.29459805551885,0.0,19.382406450060035,0.0,0.0
7
- Stat. Ensemble,61.9047619047619,37.99224244127147,0.0,74.63583592838711,0.0,0.0
8
- Chronos-Bolt,58.33333333333333,35.53495230927829,0.0,0.1589436945079739,0.0,0.0
9
- AutoETS,55.35714285714286,36.76571975679717,0.0,1.6644653908191018,0.0,0.0
10
- Moirai-2.0,52.976190476190474,32.793225134905136,0.0,0.2231920597172889,0.16666666666666666,0.0
11
- AutoARIMA,46.42857142857143,34.4461529963365,0.0,6.514016621697861,0.0,0.0
12
- AutoTheta,36.90476190476191,29.146207716766202,0.0,0.9511661320240354,0.0,0.0
13
- Drift,33.92857142857143,23.82864899904751,0.0,0.41642842294117643,0.0,0.0
14
- Sundial-Base,19.642857142857142,17.532849713917752,0.0,8.00438149490214,0.0,0.0
15
- Naive,15.47619047619048,11.37195841028552,0.0,0.43518170598039213,0.0,0.0
16
- Seasonal Naive,9.523809523809524,0.0,0.0,0.4101321087745098,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/pairwise_MASE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
3
- Stat. Ensemble,TiRex,0.75,0.5,1.0,0.025,-0.023,0.072
4
- Stat. Ensemble,Chronos-2,0.75,0.5,1.0,0.034,-0.014,0.087
5
- Stat. Ensemble,AutoETS,0.75,0.5,1.0,0.04,0.016,0.067
6
- Stat. Ensemble,Toto-1.0,0.75,0.417,1.0,0.09,0.025,0.15
7
- Stat. Ensemble,AutoTheta,0.917,0.75,1.0,0.088,0.053,0.126
8
- Stat. Ensemble,AutoARIMA,0.833,0.583,1.0,0.069,0.021,0.135
9
- Stat. Ensemble,Drift,0.917,0.75,1.0,0.126,0.051,0.221
10
- Stat. Ensemble,TimesFM-2.5,0.833,0.583,1.0,0.084,0.03,0.143
11
- Stat. Ensemble,TabPFN-TS,0.833,0.583,1.0,0.136,0.054,0.213
12
- Stat. Ensemble,Chronos-Bolt,0.917,0.75,1.0,0.119,0.068,0.168
13
- Stat. Ensemble,Moirai-2.0,0.917,0.75,1.0,0.143,0.081,0.197
14
- Stat. Ensemble,Sundial-Base,0.917,0.75,1.0,0.234,0.156,0.302
15
- Stat. Ensemble,Naive,1.0,1.0,1.0,0.271,0.195,0.346
16
- Stat. Ensemble,Seasonal Naive,1.0,1.0,1.0,0.385,0.289,0.476
17
- TiRex,Stat. Ensemble,0.25,0.0,0.5,-0.026,-0.078,0.023
18
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
19
- TiRex,Chronos-2,0.583,0.333,0.833,0.009,-0.026,0.043
20
- TiRex,AutoETS,0.5,0.25,0.75,0.015,-0.013,0.045
21
- TiRex,Toto-1.0,0.583,0.333,0.833,0.066,-0.018,0.15
22
- TiRex,AutoTheta,0.833,0.583,1.0,0.064,-0.009,0.126
23
- TiRex,AutoARIMA,0.667,0.417,0.917,0.045,-0.033,0.133
24
- TiRex,Drift,0.75,0.5,1.0,0.103,-0.002,0.214
25
- TiRex,TimesFM-2.5,0.833,0.583,1.0,0.06,0.02,0.109
26
- TiRex,TabPFN-TS,0.833,0.583,1.0,0.114,0.033,0.194
27
- TiRex,Chronos-Bolt,0.917,0.75,1.0,0.097,0.05,0.149
28
- TiRex,Moirai-2.0,0.917,0.75,1.0,0.12,0.05,0.194
29
- TiRex,Sundial-Base,0.917,0.75,1.0,0.214,0.149,0.271
30
- TiRex,Naive,1.0,1.0,1.0,0.252,0.163,0.335
31
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.369,0.261,0.466
32
- Chronos-2,Stat. Ensemble,0.25,0.0,0.5,-0.035,-0.095,0.014
33
- Chronos-2,TiRex,0.417,0.167,0.667,-0.009,-0.045,0.026
34
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
35
- Chronos-2,AutoETS,0.5,0.25,0.75,0.006,-0.027,0.041
36
- Chronos-2,Toto-1.0,0.583,0.333,0.833,0.058,-0.037,0.145
37
- Chronos-2,AutoTheta,0.583,0.333,0.833,0.056,-0.023,0.124
38
- Chronos-2,AutoARIMA,0.667,0.417,0.917,0.037,-0.046,0.122
39
- Chronos-2,Drift,0.583,0.333,0.833,0.095,-0.024,0.22
40
- Chronos-2,TimesFM-2.5,0.75,0.5,1.0,0.052,0.004,0.099
41
- Chronos-2,TabPFN-TS,0.833,0.583,1.0,0.106,0.025,0.194
42
- Chronos-2,Chronos-Bolt,1.0,1.0,1.0,0.088,0.044,0.143
43
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.112,0.045,0.186
44
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.207,0.156,0.255
45
- Chronos-2,Naive,1.0,1.0,1.0,0.245,0.159,0.328
46
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.363,0.264,0.451
47
- AutoETS,Stat. Ensemble,0.25,0.0,0.5,-0.042,-0.072,-0.016
48
- AutoETS,TiRex,0.5,0.25,0.75,-0.015,-0.047,0.013
49
- AutoETS,Chronos-2,0.5,0.25,0.75,-0.006,-0.043,0.026
50
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
51
- AutoETS,Toto-1.0,0.583,0.333,0.833,0.052,-0.025,0.122
52
- AutoETS,AutoTheta,0.667,0.417,0.917,0.05,-0.003,0.1
53
- AutoETS,AutoARIMA,0.667,0.417,0.917,0.03,-0.036,0.112
54
- AutoETS,Drift,0.667,0.417,0.917,0.089,-0.005,0.196
55
- AutoETS,TimesFM-2.5,0.75,0.5,1.0,0.046,-0.003,0.098
56
- AutoETS,TabPFN-TS,0.667,0.417,0.917,0.1,0.02,0.183
57
- AutoETS,Chronos-Bolt,0.833,0.583,1.0,0.083,0.041,0.127
58
- AutoETS,Moirai-2.0,0.833,0.583,1.0,0.107,0.05,0.164
59
- AutoETS,Sundial-Base,0.917,0.75,1.0,0.202,0.132,0.262
60
- AutoETS,Naive,1.0,1.0,1.0,0.241,0.162,0.316
61
- AutoETS,Seasonal Naive,1.0,1.0,1.0,0.359,0.256,0.456
62
- Toto-1.0,Stat. Ensemble,0.25,0.0,0.583,-0.099,-0.177,-0.025
63
- Toto-1.0,TiRex,0.417,0.167,0.667,-0.071,-0.176,0.018
64
- Toto-1.0,Chronos-2,0.417,0.167,0.667,-0.062,-0.17,0.035
65
- Toto-1.0,AutoETS,0.417,0.167,0.667,-0.055,-0.139,0.024
66
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
67
- Toto-1.0,AutoTheta,0.417,0.167,0.75,-0.002,-0.089,0.077
68
- Toto-1.0,AutoARIMA,0.583,0.333,0.833,-0.023,-0.108,0.048
69
- Toto-1.0,Drift,0.5,0.25,0.75,0.039,-0.065,0.133
70
- Toto-1.0,TimesFM-2.5,0.75,0.5,0.958,-0.007,-0.103,0.063
71
- Toto-1.0,TabPFN-TS,0.667,0.417,0.917,0.051,-0.107,0.186
72
- Toto-1.0,Chronos-Bolt,0.75,0.5,0.958,0.032,-0.034,0.096
73
- Toto-1.0,Moirai-2.0,0.833,0.625,0.958,0.058,0.018,0.106
74
- Toto-1.0,Sundial-Base,0.833,0.583,1.0,0.158,0.042,0.265
75
- Toto-1.0,Naive,1.0,1.0,1.0,0.199,0.116,0.285
76
- Toto-1.0,Seasonal Naive,0.917,0.75,1.0,0.324,0.199,0.433
77
- AutoTheta,Stat. Ensemble,0.083,0.0,0.25,-0.097,-0.144,-0.056
78
- AutoTheta,TiRex,0.167,0.0,0.417,-0.069,-0.144,0.009
79
- AutoTheta,Chronos-2,0.417,0.167,0.667,-0.059,-0.142,0.022
80
- AutoTheta,AutoETS,0.333,0.083,0.583,-0.053,-0.111,0.003
81
- AutoTheta,Toto-1.0,0.583,0.25,0.833,0.002,-0.083,0.082
82
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
83
- AutoTheta,AutoARIMA,0.5,0.25,0.75,-0.021,-0.102,0.059
84
- AutoTheta,Drift,0.333,0.083,0.583,0.041,-0.014,0.136
85
- AutoTheta,TimesFM-2.5,0.5,0.25,0.75,-0.005,-0.086,0.079
86
- AutoTheta,TabPFN-TS,0.583,0.25,0.833,0.053,-0.043,0.132
87
- AutoTheta,Chronos-Bolt,0.667,0.417,0.917,0.034,-0.034,0.1
88
- AutoTheta,Moirai-2.0,0.75,0.5,1.0,0.06,-0.011,0.123
89
- AutoTheta,Sundial-Base,0.917,0.75,1.0,0.16,0.076,0.236
90
- AutoTheta,Naive,1.0,1.0,1.0,0.201,0.136,0.266
91
- AutoTheta,Seasonal Naive,1.0,1.0,1.0,0.325,0.231,0.412
92
- AutoARIMA,Stat. Ensemble,0.167,0.0,0.417,-0.075,-0.156,-0.022
93
- AutoARIMA,TiRex,0.333,0.083,0.583,-0.047,-0.154,0.032
94
- AutoARIMA,Chronos-2,0.333,0.083,0.583,-0.038,-0.139,0.044
95
- AutoARIMA,AutoETS,0.333,0.083,0.583,-0.031,-0.126,0.035
96
- AutoARIMA,Toto-1.0,0.417,0.167,0.667,0.022,-0.051,0.097
97
- AutoARIMA,AutoTheta,0.5,0.25,0.75,0.02,-0.063,0.093
98
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
99
- AutoARIMA,Drift,0.5,0.25,0.75,0.06,-0.052,0.172
100
- AutoARIMA,TimesFM-2.5,0.583,0.25,0.833,0.016,-0.047,0.078
101
- AutoARIMA,TabPFN-TS,0.667,0.333,0.917,0.072,-0.036,0.171
102
- AutoARIMA,Chronos-Bolt,0.667,0.417,0.917,0.054,-0.009,0.113
103
- AutoARIMA,Moirai-2.0,0.583,0.333,0.833,0.079,-0.006,0.156
104
- AutoARIMA,Sundial-Base,0.75,0.5,1.0,0.177,0.068,0.264
105
- AutoARIMA,Naive,0.917,0.75,1.0,0.217,0.128,0.295
106
- AutoARIMA,Seasonal Naive,1.0,1.0,1.0,0.339,0.256,0.422
107
- Drift,Stat. Ensemble,0.083,0.0,0.25,-0.144,-0.284,-0.054
108
- Drift,TiRex,0.25,0.0,0.5,-0.115,-0.273,0.002
109
- Drift,Chronos-2,0.417,0.167,0.667,-0.105,-0.282,0.024
110
- Drift,AutoETS,0.333,0.083,0.583,-0.098,-0.244,0.005
111
- Drift,Toto-1.0,0.5,0.25,0.75,-0.041,-0.153,0.061
112
- Drift,AutoTheta,0.667,0.417,0.917,-0.043,-0.158,0.014
113
- Drift,AutoARIMA,0.5,0.25,0.75,-0.064,-0.208,0.05
114
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
115
- Drift,TimesFM-2.5,0.417,0.167,0.667,-0.048,-0.227,0.071
116
- Drift,TabPFN-TS,0.583,0.25,0.833,0.012,-0.193,0.142
117
- Drift,Chronos-Bolt,0.583,0.333,0.833,-0.007,-0.119,0.086
118
- Drift,Moirai-2.0,0.667,0.417,0.917,0.02,-0.09,0.108
119
- Drift,Sundial-Base,0.833,0.583,1.0,0.124,-0.064,0.234
120
- Drift,Naive,0.917,0.75,1.0,0.166,0.089,0.241
121
- Drift,Seasonal Naive,0.917,0.75,1.0,0.296,0.127,0.41
122
- TimesFM-2.5,Stat. Ensemble,0.167,0.0,0.417,-0.092,-0.167,-0.031
123
- TimesFM-2.5,TiRex,0.167,0.0,0.417,-0.064,-0.122,-0.02
124
- TimesFM-2.5,Chronos-2,0.25,0.0,0.5,-0.055,-0.11,-0.004
125
- TimesFM-2.5,AutoETS,0.25,0.0,0.5,-0.048,-0.109,0.003
126
- TimesFM-2.5,Toto-1.0,0.25,0.042,0.5,0.007,-0.067,0.093
127
- TimesFM-2.5,AutoTheta,0.5,0.25,0.75,0.005,-0.086,0.079
128
- TimesFM-2.5,AutoARIMA,0.417,0.167,0.75,-0.016,-0.085,0.045
129
- TimesFM-2.5,Drift,0.583,0.333,0.833,0.045,-0.076,0.185
130
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
131
- TimesFM-2.5,TabPFN-TS,0.5,0.25,0.75,0.057,-0.034,0.15
132
- TimesFM-2.5,Chronos-Bolt,0.583,0.333,0.833,0.039,-0.007,0.091
133
- TimesFM-2.5,Moirai-2.0,0.667,0.417,0.917,0.064,-0.008,0.143
134
- TimesFM-2.5,Sundial-Base,0.917,0.75,1.0,0.163,0.095,0.228
135
- TimesFM-2.5,Naive,0.917,0.75,1.0,0.204,0.109,0.293
136
- TimesFM-2.5,Seasonal Naive,0.917,0.75,1.0,0.328,0.226,0.415
137
- TabPFN-TS,Stat. Ensemble,0.167,0.0,0.417,-0.158,-0.271,-0.057
138
- TabPFN-TS,TiRex,0.167,0.0,0.417,-0.128,-0.241,-0.034
139
- TabPFN-TS,Chronos-2,0.167,0.0,0.417,-0.118,-0.241,-0.026
140
- TabPFN-TS,AutoETS,0.333,0.083,0.583,-0.111,-0.224,-0.02
141
- TabPFN-TS,Toto-1.0,0.333,0.083,0.583,-0.053,-0.228,0.097
142
- TabPFN-TS,AutoTheta,0.417,0.167,0.75,-0.056,-0.153,0.042
143
- TabPFN-TS,AutoARIMA,0.333,0.083,0.667,-0.077,-0.207,0.035
144
- TabPFN-TS,Drift,0.417,0.167,0.75,-0.012,-0.165,0.162
145
- TabPFN-TS,TimesFM-2.5,0.5,0.25,0.75,-0.06,-0.177,0.033
146
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
147
- TabPFN-TS,Chronos-Bolt,0.5,0.25,0.75,-0.019,-0.156,0.089
148
- TabPFN-TS,Moirai-2.0,0.5,0.25,0.75,0.008,-0.146,0.14
149
- TabPFN-TS,Sundial-Base,0.667,0.417,0.917,0.113,0.02,0.191
150
- TabPFN-TS,Naive,0.667,0.417,0.917,0.156,0.023,0.274
151
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.288,0.181,0.389
152
- Chronos-Bolt,Stat. Ensemble,0.083,0.0,0.25,-0.136,-0.202,-0.073
153
- Chronos-Bolt,TiRex,0.083,0.0,0.25,-0.107,-0.175,-0.053
154
- Chronos-Bolt,Chronos-2,0.0,0.0,0.0,-0.097,-0.167,-0.046
155
- Chronos-Bolt,AutoETS,0.167,0.0,0.417,-0.09,-0.145,-0.043
156
- Chronos-Bolt,Toto-1.0,0.25,0.042,0.5,-0.033,-0.107,0.033
157
- Chronos-Bolt,AutoTheta,0.333,0.083,0.583,-0.036,-0.112,0.033
158
- Chronos-Bolt,AutoARIMA,0.333,0.083,0.583,-0.057,-0.128,0.009
159
- Chronos-Bolt,Drift,0.417,0.167,0.667,0.007,-0.094,0.106
160
- Chronos-Bolt,TimesFM-2.5,0.417,0.167,0.667,-0.04,-0.1,0.007
161
- Chronos-Bolt,TabPFN-TS,0.5,0.25,0.75,0.019,-0.097,0.135
162
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
163
- Chronos-Bolt,Moirai-2.0,0.417,0.167,0.667,0.026,-0.016,0.079
164
- Chronos-Bolt,Sundial-Base,0.917,0.75,1.0,0.13,0.036,0.202
165
- Chronos-Bolt,Naive,0.917,0.75,1.0,0.172,0.107,0.235
166
- Chronos-Bolt,Seasonal Naive,0.833,0.583,1.0,0.301,0.198,0.389
167
- Moirai-2.0,Stat. Ensemble,0.083,0.0,0.25,-0.166,-0.245,-0.088
168
- Moirai-2.0,TiRex,0.083,0.0,0.25,-0.137,-0.24,-0.053
169
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.127,-0.229,-0.048
170
- Moirai-2.0,AutoETS,0.167,0.0,0.417,-0.12,-0.196,-0.053
171
- Moirai-2.0,Toto-1.0,0.167,0.042,0.375,-0.061,-0.118,-0.018
172
- Moirai-2.0,AutoTheta,0.25,0.0,0.5,-0.064,-0.141,0.011
173
- Moirai-2.0,AutoARIMA,0.417,0.167,0.667,-0.086,-0.185,0.006
174
- Moirai-2.0,Drift,0.333,0.083,0.583,-0.02,-0.121,0.083
175
- Moirai-2.0,TimesFM-2.5,0.333,0.083,0.583,-0.068,-0.166,0.008
176
- Moirai-2.0,TabPFN-TS,0.5,0.25,0.75,-0.008,-0.163,0.127
177
- Moirai-2.0,Chronos-Bolt,0.583,0.333,0.833,-0.027,-0.086,0.015
178
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
179
- Moirai-2.0,Sundial-Base,0.833,0.583,1.0,0.106,-0.0,0.194
180
- Moirai-2.0,Naive,0.917,0.75,1.0,0.15,0.086,0.21
181
- Moirai-2.0,Seasonal Naive,0.833,0.583,1.0,0.282,0.162,0.388
182
- Sundial-Base,Stat. Ensemble,0.083,0.0,0.25,-0.305,-0.434,-0.185
183
- Sundial-Base,TiRex,0.083,0.0,0.25,-0.272,-0.372,-0.176
184
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.261,-0.342,-0.184
185
- Sundial-Base,AutoETS,0.083,0.0,0.25,-0.253,-0.356,-0.153
186
- Sundial-Base,Toto-1.0,0.167,0.0,0.417,-0.187,-0.361,-0.044
187
- Sundial-Base,AutoTheta,0.083,0.0,0.25,-0.19,-0.309,-0.083
188
- Sundial-Base,AutoARIMA,0.25,0.0,0.5,-0.215,-0.358,-0.073
189
- Sundial-Base,Drift,0.167,0.0,0.417,-0.141,-0.305,0.06
190
- Sundial-Base,TimesFM-2.5,0.083,0.0,0.25,-0.195,-0.296,-0.104
191
- Sundial-Base,TabPFN-TS,0.333,0.083,0.583,-0.127,-0.237,-0.021
192
- Sundial-Base,Chronos-Bolt,0.083,0.0,0.25,-0.149,-0.254,-0.037
193
- Sundial-Base,Moirai-2.0,0.167,0.0,0.417,-0.119,-0.24,0.0
194
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
195
- Sundial-Base,Naive,0.583,0.333,0.833,0.049,-0.051,0.164
196
- Sundial-Base,Seasonal Naive,0.833,0.583,1.0,0.197,0.082,0.299
197
- Naive,Stat. Ensemble,0.0,0.0,0.0,-0.372,-0.529,-0.242
198
- Naive,TiRex,0.0,0.0,0.0,-0.337,-0.504,-0.194
199
- Naive,Chronos-2,0.0,0.0,0.0,-0.325,-0.488,-0.189
200
- Naive,AutoETS,0.0,0.0,0.0,-0.317,-0.462,-0.193
201
- Naive,Toto-1.0,0.0,0.0,0.0,-0.248,-0.398,-0.131
202
- Naive,AutoTheta,0.0,0.0,0.0,-0.251,-0.362,-0.157
203
- Naive,AutoARIMA,0.083,0.0,0.25,-0.277,-0.419,-0.147
204
- Naive,Drift,0.083,0.0,0.25,-0.2,-0.318,-0.098
205
- Naive,TimesFM-2.5,0.083,0.0,0.25,-0.257,-0.415,-0.122
206
- Naive,TabPFN-TS,0.333,0.083,0.583,-0.185,-0.377,-0.024
207
- Naive,Chronos-Bolt,0.083,0.0,0.25,-0.208,-0.307,-0.119
208
- Naive,Moirai-2.0,0.083,0.0,0.25,-0.176,-0.265,-0.094
209
- Naive,Sundial-Base,0.417,0.167,0.667,-0.051,-0.197,0.049
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Seasonal Naive,0.75,0.542,0.917,0.156,0.021,0.261
212
- Seasonal Naive,Stat. Ensemble,0.0,0.0,0.0,-0.625,-0.907,-0.407
213
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.584,-0.872,-0.353
214
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-0.57,-0.821,-0.358
215
- Seasonal Naive,AutoETS,0.0,0.0,0.0,-0.56,-0.837,-0.345
216
- Seasonal Naive,Toto-1.0,0.083,0.0,0.25,-0.479,-0.763,-0.248
217
- Seasonal Naive,AutoTheta,0.0,0.0,0.0,-0.482,-0.7,-0.301
218
- Seasonal Naive,AutoARIMA,0.0,0.0,0.0,-0.513,-0.73,-0.344
219
- Seasonal Naive,Drift,0.083,0.0,0.25,-0.421,-0.694,-0.146
220
- Seasonal Naive,TimesFM-2.5,0.083,0.0,0.25,-0.489,-0.709,-0.292
221
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.404,-0.637,-0.222
222
- Seasonal Naive,Chronos-Bolt,0.167,0.0,0.417,-0.431,-0.638,-0.246
223
- Seasonal Naive,Moirai-2.0,0.167,0.0,0.417,-0.393,-0.634,-0.193
224
- Seasonal Naive,Sundial-Base,0.167,0.0,0.417,-0.245,-0.427,-0.09
225
- Seasonal Naive,Naive,0.25,0.083,0.458,-0.185,-0.353,-0.022
226
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/pairwise_SQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
3
- TiRex,Chronos-2,0.667,0.417,0.917,0.007,-0.028,0.036
4
- TiRex,Stat. Ensemble,0.75,0.5,1.0,0.017,-0.034,0.06
5
- TiRex,Toto-1.0,0.667,0.333,0.917,0.071,-0.005,0.143
6
- TiRex,AutoETS,0.75,0.5,1.0,0.127,0.025,0.268
7
- TiRex,TimesFM-2.5,0.917,0.75,1.0,0.08,0.038,0.122
8
- TiRex,AutoARIMA,0.833,0.583,1.0,0.094,0.019,0.181
9
- TiRex,Chronos-Bolt,0.917,0.75,1.0,0.093,0.05,0.141
10
- TiRex,TabPFN-TS,0.833,0.583,1.0,0.099,0.031,0.166
11
- TiRex,Moirai-2.0,0.917,0.75,1.0,0.128,0.063,0.197
12
- TiRex,Drift,0.917,0.75,1.0,0.169,0.055,0.293
13
- TiRex,AutoTheta,0.917,0.75,1.0,0.121,0.053,0.185
14
- TiRex,Sundial-Base,1.0,1.0,1.0,0.277,0.209,0.334
15
- TiRex,Naive,1.0,1.0,1.0,0.301,0.213,0.382
16
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.382,0.282,0.467
17
- Chronos-2,TiRex,0.333,0.083,0.583,-0.007,-0.037,0.027
18
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
19
- Chronos-2,Stat. Ensemble,0.833,0.583,1.0,0.01,-0.042,0.051
20
- Chronos-2,Toto-1.0,0.667,0.333,0.917,0.064,-0.02,0.141
21
- Chronos-2,AutoETS,0.75,0.5,1.0,0.121,0.02,0.261
22
- Chronos-2,TimesFM-2.5,0.917,0.75,1.0,0.074,0.039,0.111
23
- Chronos-2,AutoARIMA,0.75,0.5,1.0,0.088,0.009,0.174
24
- Chronos-2,Chronos-Bolt,1.0,1.0,1.0,0.087,0.043,0.142
25
- Chronos-2,TabPFN-TS,0.833,0.583,1.0,0.092,0.031,0.161
26
- Chronos-2,Moirai-2.0,1.0,1.0,1.0,0.122,0.059,0.192
27
- Chronos-2,Drift,0.917,0.75,1.0,0.163,0.036,0.303
28
- Chronos-2,AutoTheta,0.917,0.75,1.0,0.115,0.046,0.178
29
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.272,0.215,0.324
30
- Chronos-2,Naive,1.0,1.0,1.0,0.296,0.202,0.388
31
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.377,0.286,0.457
32
- Stat. Ensemble,TiRex,0.25,0.0,0.5,-0.017,-0.064,0.033
33
- Stat. Ensemble,Chronos-2,0.167,0.0,0.417,-0.01,-0.053,0.04
34
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
35
- Stat. Ensemble,Toto-1.0,0.667,0.333,0.917,0.055,-0.011,0.117
36
- Stat. Ensemble,AutoETS,0.667,0.417,0.917,0.112,0.011,0.257
37
- Stat. Ensemble,TimesFM-2.5,0.833,0.583,1.0,0.065,0.013,0.121
38
- Stat. Ensemble,AutoARIMA,0.833,0.583,1.0,0.079,0.03,0.146
39
- Stat. Ensemble,Chronos-Bolt,0.917,0.75,1.0,0.078,0.024,0.131
40
- Stat. Ensemble,TabPFN-TS,0.833,0.583,1.0,0.083,0.023,0.143
41
- Stat. Ensemble,Moirai-2.0,0.917,0.75,1.0,0.113,0.048,0.171
42
- Stat. Ensemble,Drift,0.917,0.75,1.0,0.155,0.061,0.279
43
- Stat. Ensemble,AutoTheta,1.0,1.0,1.0,0.107,0.07,0.148
44
- Stat. Ensemble,Sundial-Base,1.0,1.0,1.0,0.264,0.191,0.329
45
- Stat. Ensemble,Naive,1.0,1.0,1.0,0.289,0.206,0.375
46
- Stat. Ensemble,Seasonal Naive,1.0,1.0,1.0,0.371,0.288,0.452
47
- Toto-1.0,TiRex,0.333,0.083,0.667,-0.076,-0.167,0.005
48
- Toto-1.0,Chronos-2,0.333,0.083,0.667,-0.069,-0.164,0.019
49
- Toto-1.0,Stat. Ensemble,0.333,0.083,0.667,-0.058,-0.133,0.011
50
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
51
- Toto-1.0,AutoETS,0.5,0.25,0.752,0.06,-0.086,0.229
52
- Toto-1.0,TimesFM-2.5,0.75,0.5,0.958,0.01,-0.075,0.075
53
- Toto-1.0,AutoARIMA,0.667,0.417,0.917,0.026,-0.06,0.101
54
- Toto-1.0,Chronos-Bolt,0.667,0.417,0.875,0.024,-0.031,0.081
55
- Toto-1.0,TabPFN-TS,0.667,0.417,0.917,0.03,-0.103,0.146
56
- Toto-1.0,Moirai-2.0,0.833,0.625,0.958,0.062,0.022,0.109
57
- Toto-1.0,Drift,0.75,0.5,1.0,0.106,-0.014,0.216
58
- Toto-1.0,AutoTheta,0.667,0.417,0.917,0.055,-0.028,0.135
59
- Toto-1.0,Sundial-Base,0.833,0.583,1.0,0.222,0.115,0.321
60
- Toto-1.0,Naive,0.917,0.75,1.0,0.248,0.157,0.335
61
- Toto-1.0,Seasonal Naive,0.917,0.75,1.0,0.335,0.216,0.435
62
- AutoETS,TiRex,0.25,0.0,0.5,-0.145,-0.367,-0.025
63
- AutoETS,Chronos-2,0.25,0.0,0.5,-0.137,-0.354,-0.02
64
- AutoETS,Stat. Ensemble,0.333,0.083,0.583,-0.126,-0.345,-0.012
65
- AutoETS,Toto-1.0,0.5,0.248,0.75,-0.064,-0.297,0.08
66
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
67
- AutoETS,TimesFM-2.5,0.5,0.25,0.75,-0.053,-0.259,0.072
68
- AutoETS,AutoARIMA,0.5,0.167,0.75,-0.037,-0.271,0.112
69
- AutoETS,Chronos-Bolt,0.75,0.5,1.0,-0.039,-0.251,0.086
70
- AutoETS,TabPFN-TS,0.667,0.417,0.917,-0.032,-0.263,0.102
71
- AutoETS,Moirai-2.0,0.75,0.5,1.0,0.001,-0.211,0.131
72
- AutoETS,Drift,0.75,0.5,1.0,0.048,-0.2,0.237
73
- AutoETS,AutoTheta,0.75,0.5,1.0,-0.006,-0.228,0.119
74
- AutoETS,Sundial-Base,0.917,0.75,1.0,0.172,0.002,0.286
75
- AutoETS,Naive,0.917,0.75,1.0,0.2,0.001,0.337
76
- AutoETS,Seasonal Naive,0.917,0.75,1.0,0.292,0.084,0.43
77
- TimesFM-2.5,TiRex,0.083,0.0,0.25,-0.087,-0.139,-0.039
78
- TimesFM-2.5,Chronos-2,0.083,0.0,0.25,-0.08,-0.124,-0.041
79
- TimesFM-2.5,Stat. Ensemble,0.167,0.0,0.417,-0.069,-0.137,-0.014
80
- TimesFM-2.5,Toto-1.0,0.25,0.042,0.5,-0.01,-0.081,0.07
81
- TimesFM-2.5,AutoETS,0.5,0.25,0.75,0.051,-0.077,0.205
82
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
83
- TimesFM-2.5,AutoARIMA,0.583,0.333,0.833,0.015,-0.066,0.089
84
- TimesFM-2.5,Chronos-Bolt,0.583,0.333,0.833,0.014,-0.034,0.072
85
- TimesFM-2.5,TabPFN-TS,0.5,0.25,0.75,0.02,-0.059,0.098
86
- TimesFM-2.5,Moirai-2.0,0.583,0.333,0.833,0.052,-0.014,0.127
87
- TimesFM-2.5,Drift,0.75,0.5,1.0,0.097,-0.046,0.255
88
- TimesFM-2.5,AutoTheta,0.75,0.5,1.0,0.045,-0.043,0.116
89
- TimesFM-2.5,Sundial-Base,1.0,1.0,1.0,0.214,0.15,0.272
90
- TimesFM-2.5,Naive,0.917,0.75,1.0,0.24,0.136,0.34
91
- TimesFM-2.5,Seasonal Naive,0.917,0.75,1.0,0.328,0.224,0.407
92
- AutoARIMA,TiRex,0.167,0.0,0.417,-0.104,-0.221,-0.019
93
- AutoARIMA,Chronos-2,0.25,0.0,0.5,-0.097,-0.21,-0.009
94
- AutoARIMA,Stat. Ensemble,0.167,0.0,0.417,-0.086,-0.171,-0.031
95
- AutoARIMA,Toto-1.0,0.333,0.083,0.583,-0.026,-0.112,0.056
96
- AutoARIMA,AutoETS,0.5,0.25,0.833,0.036,-0.126,0.213
97
- AutoARIMA,TimesFM-2.5,0.417,0.167,0.667,-0.016,-0.097,0.062
98
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
99
- AutoARIMA,Chronos-Bolt,0.417,0.167,0.667,-0.001,-0.08,0.069
100
- AutoARIMA,TabPFN-TS,0.5,0.167,0.75,0.005,-0.101,0.09
101
- AutoARIMA,Moirai-2.0,0.5,0.25,0.75,0.037,-0.064,0.125
102
- AutoARIMA,Drift,0.583,0.333,0.833,0.082,-0.049,0.224
103
- AutoARIMA,AutoTheta,0.583,0.25,0.833,0.03,-0.058,0.107
104
- AutoARIMA,Sundial-Base,0.75,0.5,1.0,0.201,0.09,0.294
105
- AutoARIMA,Naive,0.917,0.75,1.0,0.228,0.128,0.322
106
- AutoARIMA,Seasonal Naive,1.0,1.0,1.0,0.317,0.242,0.396
107
- Chronos-Bolt,TiRex,0.083,0.0,0.25,-0.103,-0.164,-0.052
108
- Chronos-Bolt,Chronos-2,0.0,0.0,0.0,-0.095,-0.165,-0.044
109
- Chronos-Bolt,Stat. Ensemble,0.083,0.0,0.25,-0.084,-0.151,-0.024
110
- Chronos-Bolt,Toto-1.0,0.333,0.125,0.583,-0.025,-0.088,0.03
111
- Chronos-Bolt,AutoETS,0.25,0.0,0.5,0.037,-0.094,0.201
112
- Chronos-Bolt,TimesFM-2.5,0.417,0.167,0.667,-0.014,-0.077,0.032
113
- Chronos-Bolt,AutoARIMA,0.583,0.333,0.833,0.001,-0.074,0.074
114
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
115
- Chronos-Bolt,TabPFN-TS,0.5,0.25,0.75,0.006,-0.093,0.108
116
- Chronos-Bolt,Moirai-2.0,0.5,0.25,0.75,0.039,-0.001,0.09
117
- Chronos-Bolt,Drift,0.75,0.5,1.0,0.084,-0.028,0.199
118
- Chronos-Bolt,AutoTheta,0.667,0.417,0.917,0.031,-0.044,0.102
119
- Chronos-Bolt,Sundial-Base,0.917,0.75,1.0,0.202,0.109,0.273
120
- Chronos-Bolt,Naive,1.0,1.0,1.0,0.229,0.158,0.296
121
- Chronos-Bolt,Seasonal Naive,0.917,0.75,1.0,0.318,0.219,0.402
122
- TabPFN-TS,TiRex,0.167,0.0,0.417,-0.109,-0.2,-0.031
123
- TabPFN-TS,Chronos-2,0.167,0.0,0.417,-0.102,-0.192,-0.032
124
- TabPFN-TS,Stat. Ensemble,0.167,0.0,0.417,-0.091,-0.167,-0.023
125
- TabPFN-TS,Toto-1.0,0.333,0.083,0.583,-0.031,-0.171,0.093
126
- TabPFN-TS,AutoETS,0.333,0.083,0.583,0.031,-0.114,0.208
127
- TabPFN-TS,TimesFM-2.5,0.5,0.25,0.75,-0.02,-0.109,0.056
128
- TabPFN-TS,AutoARIMA,0.5,0.25,0.833,-0.005,-0.099,0.092
129
- TabPFN-TS,Chronos-Bolt,0.5,0.25,0.75,-0.006,-0.121,0.085
130
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
131
- TabPFN-TS,Moirai-2.0,0.583,0.331,0.833,0.033,-0.089,0.147
132
- TabPFN-TS,Drift,0.5,0.25,0.833,0.078,-0.057,0.251
133
- TabPFN-TS,AutoTheta,0.5,0.25,0.833,0.025,-0.038,0.091
134
- TabPFN-TS,Sundial-Base,0.917,0.75,1.0,0.198,0.122,0.263
135
- TabPFN-TS,Naive,0.833,0.583,1.0,0.225,0.109,0.337
136
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.314,0.229,0.401
137
- Moirai-2.0,TiRex,0.083,0.0,0.25,-0.147,-0.245,-0.067
138
- Moirai-2.0,Chronos-2,0.0,0.0,0.0,-0.139,-0.237,-0.063
139
- Moirai-2.0,Stat. Ensemble,0.083,0.0,0.25,-0.128,-0.206,-0.05
140
- Moirai-2.0,Toto-1.0,0.167,0.042,0.375,-0.066,-0.123,-0.022
141
- Moirai-2.0,AutoETS,0.25,0.0,0.5,-0.002,-0.151,0.175
142
- Moirai-2.0,TimesFM-2.5,0.417,0.167,0.667,-0.055,-0.145,0.014
143
- Moirai-2.0,AutoARIMA,0.5,0.25,0.75,-0.039,-0.142,0.06
144
- Moirai-2.0,Chronos-Bolt,0.5,0.25,0.75,-0.04,-0.099,0.001
145
- Moirai-2.0,TabPFN-TS,0.417,0.167,0.669,-0.034,-0.173,0.082
146
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
147
- Moirai-2.0,Drift,0.583,0.333,0.833,0.047,-0.076,0.17
148
- Moirai-2.0,AutoTheta,0.5,0.25,0.75,-0.008,-0.087,0.069
149
- Moirai-2.0,Sundial-Base,0.833,0.583,1.0,0.17,0.072,0.253
150
- Moirai-2.0,Naive,0.917,0.75,1.0,0.199,0.12,0.272
151
- Moirai-2.0,Seasonal Naive,0.833,0.583,1.0,0.291,0.176,0.391
152
- Drift,TiRex,0.083,0.0,0.25,-0.204,-0.414,-0.058
153
- Drift,Chronos-2,0.083,0.0,0.25,-0.195,-0.435,-0.037
154
- Drift,Stat. Ensemble,0.083,0.0,0.25,-0.183,-0.386,-0.065
155
- Drift,Toto-1.0,0.25,0.0,0.5,-0.119,-0.276,0.014
156
- Drift,AutoETS,0.25,0.0,0.5,-0.051,-0.31,0.166
157
- Drift,TimesFM-2.5,0.25,0.0,0.5,-0.107,-0.342,0.044
158
- Drift,AutoARIMA,0.417,0.167,0.667,-0.09,-0.289,0.047
159
- Drift,Chronos-Bolt,0.25,0.0,0.5,-0.091,-0.248,0.027
160
- Drift,TabPFN-TS,0.5,0.167,0.75,-0.085,-0.334,0.054
161
- Drift,Moirai-2.0,0.417,0.167,0.667,-0.049,-0.204,0.07
162
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
163
- Drift,AutoTheta,0.75,0.5,0.917,-0.057,-0.232,0.028
164
- Drift,Sundial-Base,0.917,0.75,1.0,0.13,-0.097,0.26
165
- Drift,Naive,0.917,0.75,1.0,0.159,0.082,0.234
166
- Drift,Seasonal Naive,0.917,0.75,1.0,0.256,0.054,0.378
167
- AutoTheta,TiRex,0.083,0.0,0.25,-0.138,-0.227,-0.056
168
- AutoTheta,Chronos-2,0.083,0.0,0.25,-0.13,-0.217,-0.048
169
- AutoTheta,Stat. Ensemble,0.0,0.0,0.0,-0.119,-0.173,-0.075
170
- AutoTheta,Toto-1.0,0.333,0.083,0.583,-0.058,-0.156,0.027
171
- AutoTheta,AutoETS,0.25,0.0,0.5,0.006,-0.136,0.186
172
- AutoTheta,TimesFM-2.5,0.25,0.0,0.5,-0.047,-0.131,0.041
173
- AutoTheta,AutoARIMA,0.417,0.167,0.75,-0.031,-0.12,0.055
174
- AutoTheta,Chronos-Bolt,0.333,0.083,0.583,-0.032,-0.114,0.042
175
- AutoTheta,TabPFN-TS,0.5,0.167,0.75,-0.026,-0.101,0.037
176
- AutoTheta,Moirai-2.0,0.5,0.25,0.75,0.008,-0.075,0.08
177
- AutoTheta,Drift,0.25,0.083,0.5,0.054,-0.029,0.188
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,Sundial-Base,0.917,0.75,1.0,0.177,0.099,0.254
180
- AutoTheta,Naive,1.0,1.0,1.0,0.205,0.128,0.291
181
- AutoTheta,Seasonal Naive,1.0,1.0,1.0,0.296,0.216,0.375
182
- Sundial-Base,TiRex,0.0,0.0,0.0,-0.383,-0.501,-0.264
183
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.373,-0.48,-0.274
184
- Sundial-Base,Stat. Ensemble,0.0,0.0,0.0,-0.36,-0.49,-0.236
185
- Sundial-Base,Toto-1.0,0.167,0.0,0.417,-0.285,-0.472,-0.13
186
- Sundial-Base,AutoETS,0.083,0.0,0.25,-0.207,-0.401,-0.002
187
- Sundial-Base,TimesFM-2.5,0.0,0.0,0.0,-0.272,-0.374,-0.176
188
- Sundial-Base,AutoARIMA,0.25,0.0,0.5,-0.252,-0.417,-0.099
189
- Sundial-Base,Chronos-Bolt,0.083,0.0,0.25,-0.254,-0.376,-0.123
190
- Sundial-Base,TabPFN-TS,0.083,0.0,0.25,-0.246,-0.357,-0.139
191
- Sundial-Base,Moirai-2.0,0.167,0.0,0.417,-0.205,-0.339,-0.077
192
- Sundial-Base,Drift,0.083,0.0,0.25,-0.149,-0.351,0.088
193
- Sundial-Base,AutoTheta,0.083,0.0,0.25,-0.215,-0.341,-0.11
194
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
195
- Sundial-Base,Naive,0.5,0.25,0.75,0.034,-0.089,0.177
196
- Sundial-Base,Seasonal Naive,0.833,0.583,1.0,0.145,0.018,0.25
197
- Naive,TiRex,0.0,0.0,0.0,-0.431,-0.619,-0.27
198
- Naive,Chronos-2,0.0,0.0,0.0,-0.421,-0.633,-0.253
199
- Naive,Stat. Ensemble,0.0,0.0,0.0,-0.407,-0.6,-0.259
200
- Naive,Toto-1.0,0.083,0.0,0.25,-0.33,-0.503,-0.186
201
- Naive,AutoETS,0.083,0.0,0.25,-0.25,-0.508,-0.001
202
- Naive,TimesFM-2.5,0.083,0.0,0.25,-0.316,-0.515,-0.157
203
- Naive,AutoARIMA,0.083,0.0,0.25,-0.296,-0.475,-0.146
204
- Naive,Chronos-Bolt,0.0,0.0,0.0,-0.298,-0.42,-0.188
205
- Naive,TabPFN-TS,0.167,0.0,0.417,-0.29,-0.508,-0.122
206
- Naive,Moirai-2.0,0.083,0.0,0.25,-0.248,-0.374,-0.137
207
- Naive,Drift,0.083,0.0,0.25,-0.189,-0.305,-0.089
208
- Naive,AutoTheta,0.0,0.0,0.0,-0.257,-0.41,-0.146
209
- Naive,Sundial-Base,0.5,0.25,0.75,-0.035,-0.215,0.082
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Seasonal Naive,0.75,0.542,0.917,0.115,-0.055,0.229
212
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.617,-0.877,-0.392
213
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-0.606,-0.842,-0.401
214
- Seasonal Naive,Stat. Ensemble,0.0,0.0,0.0,-0.59,-0.824,-0.404
215
- Seasonal Naive,Toto-1.0,0.083,0.0,0.25,-0.503,-0.771,-0.275
216
- Seasonal Naive,AutoETS,0.083,0.0,0.25,-0.412,-0.755,-0.091
217
- Seasonal Naive,TimesFM-2.5,0.083,0.0,0.25,-0.487,-0.686,-0.288
218
- Seasonal Naive,AutoARIMA,0.0,0.0,0.0,-0.465,-0.656,-0.319
219
- Seasonal Naive,Chronos-Bolt,0.083,0.0,0.25,-0.467,-0.673,-0.28
220
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.458,-0.668,-0.296
221
- Seasonal Naive,Moirai-2.0,0.167,0.0,0.417,-0.41,-0.643,-0.214
222
- Seasonal Naive,Drift,0.083,0.0,0.25,-0.344,-0.607,-0.057
223
- Seasonal Naive,AutoTheta,0.0,0.0,0.0,-0.421,-0.599,-0.275
224
- Seasonal Naive,Sundial-Base,0.167,0.0,0.417,-0.17,-0.333,-0.019
225
- Seasonal Naive,Naive,0.25,0.083,0.458,-0.13,-0.297,0.052
226
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/pairwise_WAPE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
3
- TiRex,Stat. Ensemble,0.583,0.333,0.833,-0.007,-0.118,0.073
4
- TiRex,Chronos-2,0.667,0.333,0.917,0.019,-0.026,0.055
5
- TiRex,Toto-1.0,0.5,0.25,0.833,0.057,-0.013,0.132
6
- TiRex,TimesFM-2.5,0.833,0.667,1.0,0.029,-0.01,0.066
7
- TiRex,AutoETS,0.667,0.417,0.917,0.045,-0.022,0.113
8
- TiRex,TabPFN-TS,0.75,0.5,1.0,0.053,-0.079,0.157
9
- TiRex,AutoARIMA,0.583,0.333,0.833,0.032,-0.075,0.116
10
- TiRex,Chronos-Bolt,0.917,0.75,1.0,0.105,0.043,0.176
11
- TiRex,Drift,0.75,0.5,1.0,0.13,-0.069,0.279
12
- TiRex,Moirai-2.0,0.917,0.75,1.0,0.132,0.04,0.235
13
- TiRex,AutoTheta,0.667,0.417,0.917,0.087,-0.092,0.23
14
- TiRex,Naive,0.917,0.75,1.0,0.26,0.135,0.394
15
- TiRex,Sundial-Base,0.917,0.75,1.0,0.243,0.154,0.322
16
- TiRex,Seasonal Naive,0.917,0.75,1.0,0.378,0.252,0.503
17
- Stat. Ensemble,TiRex,0.417,0.167,0.667,0.007,-0.079,0.105
18
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
19
- Stat. Ensemble,Chronos-2,0.333,0.083,0.583,0.026,-0.052,0.124
20
- Stat. Ensemble,Toto-1.0,0.583,0.25,0.833,0.064,-0.026,0.153
21
- Stat. Ensemble,TimesFM-2.5,0.5,0.25,0.75,0.036,-0.044,0.138
22
- Stat. Ensemble,AutoETS,0.75,0.5,1.0,0.051,0.006,0.104
23
- Stat. Ensemble,TabPFN-TS,0.5,0.25,0.75,0.059,-0.02,0.135
24
- Stat. Ensemble,AutoARIMA,0.833,0.583,1.0,0.038,-0.015,0.077
25
- Stat. Ensemble,Chronos-Bolt,0.75,0.5,1.0,0.111,0.016,0.202
26
- Stat. Ensemble,Drift,0.833,0.583,1.0,0.136,0.017,0.263
27
- Stat. Ensemble,Moirai-2.0,0.667,0.333,0.917,0.138,0.035,0.235
28
- Stat. Ensemble,AutoTheta,0.917,0.75,1.0,0.093,0.006,0.186
29
- Stat. Ensemble,Naive,1.0,1.0,1.0,0.265,0.136,0.387
30
- Stat. Ensemble,Sundial-Base,1.0,1.0,1.0,0.248,0.146,0.339
31
- Stat. Ensemble,Seasonal Naive,1.0,1.0,1.0,0.382,0.284,0.478
32
- Chronos-2,TiRex,0.333,0.083,0.667,-0.019,-0.059,0.025
33
- Chronos-2,Stat. Ensemble,0.667,0.417,0.917,-0.026,-0.142,0.05
34
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
35
- Chronos-2,Toto-1.0,0.5,0.25,0.833,0.039,-0.05,0.132
36
- Chronos-2,TimesFM-2.5,0.5,0.167,0.75,0.01,-0.025,0.05
37
- Chronos-2,AutoETS,0.667,0.417,0.917,0.026,-0.04,0.085
38
- Chronos-2,TabPFN-TS,0.667,0.417,0.917,0.035,-0.112,0.148
39
- Chronos-2,AutoARIMA,0.583,0.333,0.833,0.013,-0.106,0.11
40
- Chronos-2,Chronos-Bolt,0.75,0.5,1.0,0.087,0.014,0.175
41
- Chronos-2,Drift,0.667,0.417,0.917,0.114,-0.101,0.291
42
- Chronos-2,Moirai-2.0,0.667,0.333,0.917,0.115,0.018,0.219
43
- Chronos-2,AutoTheta,0.667,0.417,0.917,0.07,-0.127,0.214
44
- Chronos-2,Naive,0.833,0.583,1.0,0.246,0.102,0.391
45
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.229,0.153,0.304
46
- Chronos-2,Seasonal Naive,0.917,0.75,1.0,0.366,0.237,0.489
47
- Toto-1.0,TiRex,0.5,0.167,0.75,-0.061,-0.152,0.013
48
- Toto-1.0,Stat. Ensemble,0.417,0.167,0.75,-0.068,-0.181,0.025
49
- Toto-1.0,Chronos-2,0.5,0.167,0.75,-0.041,-0.152,0.047
50
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
51
- Toto-1.0,TimesFM-2.5,0.583,0.333,0.833,-0.03,-0.14,0.051
52
- Toto-1.0,AutoETS,0.5,0.25,0.75,-0.013,-0.1,0.063
53
- Toto-1.0,TabPFN-TS,0.583,0.333,0.833,-0.005,-0.164,0.134
54
- Toto-1.0,AutoARIMA,0.583,0.333,0.833,-0.027,-0.14,0.072
55
- Toto-1.0,Chronos-Bolt,0.75,0.542,0.958,0.05,0.006,0.092
56
- Toto-1.0,Drift,0.667,0.417,0.917,0.077,-0.097,0.197
57
- Toto-1.0,Moirai-2.0,0.833,0.625,1.0,0.079,0.026,0.142
58
- Toto-1.0,AutoTheta,0.667,0.417,0.917,0.032,-0.139,0.166
59
- Toto-1.0,Naive,1.0,1.0,1.0,0.215,0.104,0.331
60
- Toto-1.0,Sundial-Base,0.833,0.583,1.0,0.197,0.07,0.294
61
- Toto-1.0,Seasonal Naive,0.917,0.75,1.0,0.34,0.203,0.463
62
- TimesFM-2.5,TiRex,0.167,0.0,0.333,-0.03,-0.071,0.01
63
- TimesFM-2.5,Stat. Ensemble,0.5,0.25,0.75,-0.037,-0.161,0.042
64
- TimesFM-2.5,Chronos-2,0.5,0.25,0.833,-0.011,-0.052,0.025
65
- TimesFM-2.5,Toto-1.0,0.417,0.167,0.667,0.029,-0.054,0.123
66
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
67
- TimesFM-2.5,AutoETS,0.583,0.331,0.833,0.016,-0.058,0.087
68
- TimesFM-2.5,TabPFN-TS,0.5,0.25,0.75,0.024,-0.121,0.142
69
- TimesFM-2.5,AutoARIMA,0.667,0.417,0.917,0.002,-0.108,0.082
70
- TimesFM-2.5,Chronos-Bolt,0.667,0.417,0.875,0.078,0.002,0.167
71
- TimesFM-2.5,Drift,0.75,0.5,1.0,0.104,-0.116,0.272
72
- TimesFM-2.5,Moirai-2.0,0.583,0.333,0.833,0.106,0.001,0.216
73
- TimesFM-2.5,AutoTheta,0.75,0.5,1.0,0.06,-0.138,0.209
74
- TimesFM-2.5,Naive,0.833,0.583,1.0,0.238,0.088,0.384
75
- TimesFM-2.5,Sundial-Base,1.0,1.0,1.0,0.221,0.142,0.296
76
- TimesFM-2.5,Seasonal Naive,0.917,0.75,1.0,0.359,0.235,0.479
77
- AutoETS,TiRex,0.333,0.083,0.583,-0.047,-0.127,0.022
78
- AutoETS,Stat. Ensemble,0.25,0.0,0.5,-0.054,-0.115,-0.006
79
- AutoETS,Chronos-2,0.333,0.083,0.583,-0.027,-0.093,0.039
80
- AutoETS,Toto-1.0,0.5,0.25,0.75,0.013,-0.067,0.091
81
- AutoETS,TimesFM-2.5,0.417,0.167,0.669,-0.016,-0.095,0.055
82
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
83
- AutoETS,TabPFN-TS,0.417,0.167,0.75,0.008,-0.094,0.112
84
- AutoETS,AutoARIMA,0.583,0.333,0.833,-0.014,-0.108,0.063
85
- AutoETS,Chronos-Bolt,0.667,0.333,0.917,0.063,-0.003,0.135
86
- AutoETS,Drift,0.583,0.333,0.833,0.09,-0.071,0.231
87
- AutoETS,Moirai-2.0,0.667,0.333,0.917,0.092,0.012,0.167
88
- AutoETS,AutoTheta,0.667,0.417,0.917,0.044,-0.086,0.148
89
- AutoETS,Naive,0.917,0.75,1.0,0.225,0.099,0.346
90
- AutoETS,Sundial-Base,0.917,0.75,1.0,0.208,0.127,0.279
91
- AutoETS,Seasonal Naive,1.0,1.0,1.0,0.348,0.248,0.455
92
- TabPFN-TS,TiRex,0.25,0.0,0.5,-0.056,-0.187,0.073
93
- TabPFN-TS,Stat. Ensemble,0.5,0.25,0.75,-0.063,-0.156,0.019
94
- TabPFN-TS,Chronos-2,0.333,0.083,0.583,-0.036,-0.174,0.101
95
- TabPFN-TS,Toto-1.0,0.417,0.167,0.667,0.005,-0.155,0.141
96
- TabPFN-TS,TimesFM-2.5,0.5,0.25,0.75,-0.025,-0.165,0.108
97
- TabPFN-TS,AutoETS,0.583,0.25,0.833,-0.009,-0.126,0.086
98
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
99
- TabPFN-TS,AutoARIMA,0.5,0.25,0.75,-0.023,-0.135,0.081
100
- TabPFN-TS,Chronos-Bolt,0.583,0.331,0.833,0.055,-0.107,0.185
101
- TabPFN-TS,Drift,0.583,0.333,0.833,0.082,-0.067,0.234
102
- TabPFN-TS,Moirai-2.0,0.583,0.331,0.833,0.084,-0.089,0.226
103
- TabPFN-TS,AutoTheta,0.583,0.333,0.833,0.036,-0.065,0.138
104
- TabPFN-TS,Naive,0.833,0.583,1.0,0.219,0.073,0.351
105
- TabPFN-TS,Sundial-Base,0.917,0.75,1.0,0.201,0.088,0.308
106
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.343,0.242,0.437
107
- AutoARIMA,TiRex,0.417,0.167,0.667,-0.033,-0.131,0.07
108
- AutoARIMA,Stat. Ensemble,0.167,0.0,0.417,-0.04,-0.084,0.014
109
- AutoARIMA,Chronos-2,0.417,0.167,0.667,-0.013,-0.124,0.096
110
- AutoARIMA,Toto-1.0,0.417,0.167,0.667,0.027,-0.077,0.123
111
- AutoARIMA,TimesFM-2.5,0.333,0.083,0.583,-0.003,-0.089,0.098
112
- AutoARIMA,AutoETS,0.417,0.167,0.667,0.014,-0.067,0.097
113
- AutoARIMA,TabPFN-TS,0.5,0.25,0.75,0.022,-0.089,0.119
114
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
115
- AutoARIMA,Chronos-Bolt,0.417,0.167,0.667,0.076,-0.037,0.184
116
- AutoARIMA,Drift,0.5,0.25,0.75,0.102,-0.037,0.249
117
- AutoARIMA,Moirai-2.0,0.5,0.25,0.75,0.104,-0.025,0.23
118
- AutoARIMA,AutoTheta,0.417,0.167,0.667,0.057,-0.064,0.19
119
- AutoARIMA,Naive,0.75,0.5,0.917,0.236,0.084,0.383
120
- AutoARIMA,Sundial-Base,0.833,0.583,1.0,0.219,0.084,0.33
121
- AutoARIMA,Seasonal Naive,1.0,1.0,1.0,0.357,0.24,0.471
122
- Chronos-Bolt,TiRex,0.083,0.0,0.25,-0.117,-0.213,-0.045
123
- Chronos-Bolt,Stat. Ensemble,0.25,0.0,0.5,-0.125,-0.253,-0.016
124
- Chronos-Bolt,Chronos-2,0.25,0.0,0.5,-0.096,-0.213,-0.014
125
- Chronos-Bolt,Toto-1.0,0.25,0.042,0.458,-0.053,-0.101,-0.006
126
- Chronos-Bolt,TimesFM-2.5,0.333,0.125,0.583,-0.084,-0.2,-0.002
127
- Chronos-Bolt,AutoETS,0.333,0.083,0.667,-0.067,-0.156,0.003
128
- Chronos-Bolt,TabPFN-TS,0.417,0.167,0.669,-0.058,-0.227,0.097
129
- Chronos-Bolt,AutoARIMA,0.583,0.333,0.833,-0.082,-0.226,0.035
130
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
131
- Chronos-Bolt,Drift,0.75,0.5,1.0,0.029,-0.15,0.149
132
- Chronos-Bolt,Moirai-2.0,0.417,0.167,0.667,0.031,-0.022,0.094
133
- Chronos-Bolt,AutoTheta,0.667,0.417,0.917,-0.02,-0.201,0.115
134
- Chronos-Bolt,Naive,0.833,0.583,1.0,0.174,0.068,0.284
135
- Chronos-Bolt,Sundial-Base,0.917,0.75,1.0,0.155,0.026,0.239
136
- Chronos-Bolt,Seasonal Naive,0.833,0.583,1.0,0.305,0.17,0.432
137
- Drift,TiRex,0.25,0.0,0.5,-0.15,-0.388,0.065
138
- Drift,Stat. Ensemble,0.167,0.0,0.417,-0.158,-0.356,-0.017
139
- Drift,Chronos-2,0.333,0.083,0.583,-0.128,-0.41,0.092
140
- Drift,Toto-1.0,0.333,0.083,0.583,-0.084,-0.246,0.088
141
- Drift,TimesFM-2.5,0.25,0.0,0.5,-0.116,-0.374,0.104
142
- Drift,AutoETS,0.417,0.167,0.667,-0.098,-0.3,0.066
143
- Drift,TabPFN-TS,0.417,0.167,0.667,-0.089,-0.305,0.063
144
- Drift,AutoARIMA,0.5,0.25,0.75,-0.114,-0.332,0.036
145
- Drift,Chronos-Bolt,0.25,0.0,0.5,-0.029,-0.175,0.13
146
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
147
- Drift,Moirai-2.0,0.417,0.167,0.667,0.002,-0.146,0.163
148
- Drift,AutoTheta,0.75,0.5,1.0,-0.05,-0.184,0.016
149
- Drift,Naive,0.917,0.75,1.0,0.149,0.045,0.251
150
- Drift,Sundial-Base,0.833,0.583,1.0,0.13,-0.09,0.296
151
- Drift,Seasonal Naive,0.917,0.75,1.0,0.284,0.11,0.391
152
- Moirai-2.0,TiRex,0.083,0.0,0.25,-0.152,-0.307,-0.042
153
- Moirai-2.0,Stat. Ensemble,0.333,0.083,0.667,-0.16,-0.308,-0.037
154
- Moirai-2.0,Chronos-2,0.333,0.083,0.667,-0.131,-0.281,-0.018
155
- Moirai-2.0,Toto-1.0,0.167,0.0,0.375,-0.086,-0.166,-0.027
156
- Moirai-2.0,TimesFM-2.5,0.417,0.167,0.667,-0.119,-0.275,-0.001
157
- Moirai-2.0,AutoETS,0.333,0.083,0.667,-0.101,-0.201,-0.012
158
- Moirai-2.0,TabPFN-TS,0.417,0.167,0.669,-0.091,-0.291,0.082
159
- Moirai-2.0,AutoARIMA,0.5,0.25,0.75,-0.116,-0.299,0.024
160
- Moirai-2.0,Chronos-Bolt,0.583,0.333,0.833,-0.032,-0.104,0.022
161
- Moirai-2.0,Drift,0.583,0.333,0.833,-0.002,-0.195,0.127
162
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
163
- Moirai-2.0,AutoTheta,0.5,0.25,0.75,-0.052,-0.228,0.084
164
- Moirai-2.0,Naive,0.75,0.5,1.0,0.147,0.051,0.248
165
- Moirai-2.0,Sundial-Base,0.833,0.583,1.0,0.128,-0.003,0.227
166
- Moirai-2.0,Seasonal Naive,0.75,0.5,1.0,0.283,0.152,0.405
167
- AutoTheta,TiRex,0.333,0.083,0.583,-0.096,-0.299,0.084
168
- AutoTheta,Stat. Ensemble,0.083,0.0,0.25,-0.103,-0.229,-0.006
169
- AutoTheta,Chronos-2,0.333,0.083,0.583,-0.075,-0.273,0.113
170
- AutoTheta,Toto-1.0,0.333,0.083,0.583,-0.033,-0.199,0.122
171
- AutoTheta,TimesFM-2.5,0.25,0.0,0.5,-0.064,-0.263,0.122
172
- AutoTheta,AutoETS,0.333,0.083,0.583,-0.046,-0.174,0.079
173
- AutoTheta,TabPFN-TS,0.417,0.167,0.667,-0.038,-0.16,0.061
174
- AutoTheta,AutoARIMA,0.583,0.333,0.833,-0.061,-0.235,0.061
175
- AutoTheta,Chronos-Bolt,0.333,0.083,0.583,0.019,-0.13,0.167
176
- AutoTheta,Drift,0.25,0.0,0.5,0.047,-0.016,0.156
177
- AutoTheta,Moirai-2.0,0.5,0.25,0.75,0.049,-0.092,0.186
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,Naive,0.833,0.583,1.0,0.189,0.084,0.29
180
- AutoTheta,Sundial-Base,0.833,0.583,1.0,0.171,0.057,0.303
181
- AutoTheta,Seasonal Naive,1.0,1.0,1.0,0.318,0.237,0.391
182
- Naive,TiRex,0.083,0.0,0.25,-0.352,-0.649,-0.156
183
- Naive,Stat. Ensemble,0.0,0.0,0.0,-0.361,-0.631,-0.157
184
- Naive,Chronos-2,0.167,0.0,0.417,-0.326,-0.642,-0.113
185
- Naive,Toto-1.0,0.0,0.0,0.0,-0.274,-0.494,-0.116
186
- Naive,TimesFM-2.5,0.167,0.0,0.417,-0.312,-0.623,-0.097
187
- Naive,AutoETS,0.083,0.0,0.25,-0.291,-0.53,-0.11
188
- Naive,TabPFN-TS,0.167,0.0,0.417,-0.28,-0.54,-0.079
189
- Naive,AutoARIMA,0.25,0.083,0.5,-0.309,-0.62,-0.092
190
- Naive,Chronos-Bolt,0.167,0.0,0.417,-0.21,-0.396,-0.073
191
- Naive,Drift,0.083,0.0,0.25,-0.176,-0.335,-0.047
192
- Naive,Moirai-2.0,0.25,0.0,0.5,-0.173,-0.33,-0.053
193
- Naive,AutoTheta,0.167,0.0,0.417,-0.234,-0.408,-0.092
194
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Naive,Sundial-Base,0.583,0.333,0.833,-0.023,-0.218,0.11
196
- Naive,Seasonal Naive,0.75,0.542,0.917,0.159,0.001,0.281
197
- Sundial-Base,TiRex,0.083,0.0,0.25,-0.322,-0.476,-0.182
198
- Sundial-Base,Stat. Ensemble,0.0,0.0,0.0,-0.331,-0.513,-0.171
199
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.297,-0.436,-0.181
200
- Sundial-Base,Toto-1.0,0.167,0.0,0.417,-0.246,-0.416,-0.075
201
- Sundial-Base,TimesFM-2.5,0.0,0.0,0.0,-0.283,-0.421,-0.165
202
- Sundial-Base,AutoETS,0.083,0.0,0.25,-0.262,-0.386,-0.145
203
- Sundial-Base,TabPFN-TS,0.083,0.0,0.25,-0.252,-0.445,-0.097
204
- Sundial-Base,AutoARIMA,0.167,0.0,0.417,-0.28,-0.493,-0.091
205
- Sundial-Base,Chronos-Bolt,0.083,0.0,0.25,-0.183,-0.315,-0.026
206
- Sundial-Base,Drift,0.167,0.0,0.417,-0.149,-0.421,0.082
207
- Sundial-Base,Moirai-2.0,0.167,0.0,0.417,-0.147,-0.294,0.002
208
- Sundial-Base,AutoTheta,0.167,0.0,0.417,-0.206,-0.435,-0.061
209
- Sundial-Base,Naive,0.417,0.167,0.667,0.022,-0.123,0.179
210
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
211
- Sundial-Base,Seasonal Naive,0.833,0.583,1.0,0.177,0.057,0.284
212
- Seasonal Naive,TiRex,0.083,0.0,0.25,-0.607,-1.014,-0.337
213
- Seasonal Naive,Stat. Ensemble,0.0,0.0,0.0,-0.618,-0.915,-0.396
214
- Seasonal Naive,Chronos-2,0.083,0.0,0.25,-0.576,-0.957,-0.311
215
- Seasonal Naive,Toto-1.0,0.083,0.0,0.25,-0.514,-0.863,-0.255
216
- Seasonal Naive,TimesFM-2.5,0.083,0.0,0.25,-0.56,-0.92,-0.308
217
- Seasonal Naive,AutoETS,0.0,0.0,0.0,-0.535,-0.834,-0.33
218
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.522,-0.775,-0.319
219
- Seasonal Naive,AutoARIMA,0.0,0.0,0.0,-0.556,-0.891,-0.315
220
- Seasonal Naive,Chronos-Bolt,0.167,0.0,0.417,-0.438,-0.76,-0.205
221
- Seasonal Naive,Drift,0.083,0.0,0.25,-0.397,-0.643,-0.124
222
- Seasonal Naive,Moirai-2.0,0.25,0.0,0.5,-0.394,-0.681,-0.179
223
- Seasonal Naive,AutoTheta,0.0,0.0,0.0,-0.467,-0.643,-0.311
224
- Seasonal Naive,Naive,0.25,0.083,0.458,-0.189,-0.391,-0.001
225
- Seasonal Naive,Sundial-Base,0.167,0.0,0.417,-0.216,-0.396,-0.06
226
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_econ/pairwise_WQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
3
- TiRex,Chronos-2,0.583,0.333,0.833,0.019,-0.029,0.058
4
- TiRex,Toto-1.0,0.667,0.333,0.917,0.065,0.009,0.127
5
- TiRex,TimesFM-2.5,0.833,0.667,1.0,0.047,0.005,0.084
6
- TiRex,TabPFN-TS,0.833,0.583,1.0,0.062,-0.038,0.149
7
- TiRex,Stat. Ensemble,0.75,0.5,1.0,0.066,-0.055,0.161
8
- TiRex,Chronos-Bolt,0.917,0.75,1.0,0.102,0.047,0.167
9
- TiRex,AutoETS,0.833,0.583,1.0,0.084,0.017,0.145
10
- TiRex,Moirai-2.0,0.833,0.583,1.0,0.138,0.051,0.236
11
- TiRex,AutoARIMA,0.75,0.5,1.0,0.117,-0.008,0.227
12
- TiRex,AutoTheta,0.833,0.583,1.0,0.183,0.017,0.322
13
- TiRex,Drift,0.917,0.75,1.0,0.24,0.039,0.386
14
- TiRex,Sundial-Base,0.917,0.75,1.0,0.298,0.205,0.38
15
- TiRex,Naive,1.0,1.0,1.0,0.347,0.221,0.471
16
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.421,0.287,0.546
17
- Chronos-2,TiRex,0.417,0.167,0.667,-0.02,-0.062,0.028
18
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
19
- Chronos-2,Toto-1.0,0.5,0.25,0.833,0.047,-0.03,0.132
20
- Chronos-2,TimesFM-2.5,0.667,0.333,0.917,0.028,-0.008,0.061
21
- Chronos-2,TabPFN-TS,0.667,0.417,0.917,0.043,-0.066,0.134
22
- Chronos-2,Stat. Ensemble,0.833,0.583,1.0,0.048,-0.082,0.141
23
- Chronos-2,Chronos-Bolt,0.667,0.417,0.917,0.084,0.014,0.167
24
- Chronos-2,AutoETS,0.917,0.75,1.0,0.066,0.004,0.112
25
- Chronos-2,Moirai-2.0,0.667,0.333,0.917,0.122,0.031,0.22
26
- Chronos-2,AutoARIMA,0.75,0.5,1.0,0.099,-0.035,0.22
27
- Chronos-2,AutoTheta,0.917,0.75,1.0,0.167,-0.006,0.299
28
- Chronos-2,Drift,0.917,0.75,1.0,0.225,0.01,0.391
29
- Chronos-2,Sundial-Base,1.0,1.0,1.0,0.284,0.209,0.358
30
- Chronos-2,Naive,0.917,0.75,1.0,0.334,0.191,0.471
31
- Chronos-2,Seasonal Naive,0.917,0.75,1.0,0.41,0.281,0.527
32
- Toto-1.0,TiRex,0.333,0.083,0.667,-0.069,-0.145,-0.009
33
- Toto-1.0,Chronos-2,0.5,0.167,0.75,-0.049,-0.152,0.029
34
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
35
- Toto-1.0,TimesFM-2.5,0.583,0.333,0.833,-0.02,-0.115,0.049
36
- Toto-1.0,TabPFN-TS,0.583,0.333,0.833,-0.004,-0.135,0.114
37
- Toto-1.0,Stat. Ensemble,0.583,0.333,0.833,0.001,-0.133,0.109
38
- Toto-1.0,Chronos-Bolt,0.833,0.667,1.0,0.039,0.004,0.074
39
- Toto-1.0,AutoETS,0.667,0.417,0.917,0.021,-0.071,0.095
40
- Toto-1.0,Moirai-2.0,0.917,0.792,1.0,0.079,0.025,0.148
41
- Toto-1.0,AutoARIMA,0.583,0.333,0.833,0.055,-0.086,0.182
42
- Toto-1.0,AutoTheta,0.75,0.5,1.0,0.126,-0.046,0.264
43
- Toto-1.0,Drift,0.917,0.75,1.0,0.187,-0.005,0.316
44
- Toto-1.0,Sundial-Base,0.917,0.75,1.0,0.249,0.132,0.337
45
- Toto-1.0,Naive,0.917,0.75,1.0,0.301,0.189,0.415
46
- Toto-1.0,Seasonal Naive,0.833,0.583,1.0,0.381,0.238,0.505
47
- TimesFM-2.5,TiRex,0.167,0.0,0.333,-0.049,-0.092,-0.005
48
- TimesFM-2.5,Chronos-2,0.333,0.083,0.667,-0.029,-0.064,0.008
49
- TimesFM-2.5,Toto-1.0,0.417,0.167,0.667,0.019,-0.052,0.103
50
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
51
- TimesFM-2.5,TabPFN-TS,0.5,0.25,0.75,0.016,-0.106,0.118
52
- TimesFM-2.5,Stat. Ensemble,0.667,0.417,0.917,0.021,-0.12,0.113
53
- TimesFM-2.5,Chronos-Bolt,0.583,0.333,0.833,0.058,-0.015,0.143
54
- TimesFM-2.5,AutoETS,0.75,0.5,1.0,0.04,-0.045,0.104
55
- TimesFM-2.5,Moirai-2.0,0.583,0.333,0.833,0.096,-0.002,0.206
56
- TimesFM-2.5,AutoARIMA,0.75,0.5,1.0,0.074,-0.073,0.186
57
- TimesFM-2.5,AutoTheta,0.833,0.583,1.0,0.143,-0.049,0.286
58
- TimesFM-2.5,Drift,0.833,0.583,1.0,0.203,-0.028,0.375
59
- TimesFM-2.5,Sundial-Base,1.0,1.0,1.0,0.264,0.182,0.346
60
- TimesFM-2.5,Naive,0.917,0.75,1.0,0.315,0.161,0.458
61
- TimesFM-2.5,Seasonal Naive,0.917,0.75,1.0,0.393,0.257,0.517
62
- TabPFN-TS,TiRex,0.167,0.0,0.417,-0.066,-0.175,0.037
63
- TabPFN-TS,Chronos-2,0.333,0.083,0.583,-0.045,-0.154,0.062
64
- TabPFN-TS,Toto-1.0,0.417,0.167,0.667,0.004,-0.129,0.119
65
- TabPFN-TS,TimesFM-2.5,0.5,0.25,0.75,-0.016,-0.134,0.096
66
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
67
- TabPFN-TS,Stat. Ensemble,0.417,0.167,0.667,0.005,-0.084,0.091
68
- TabPFN-TS,Chronos-Bolt,0.583,0.331,0.833,0.043,-0.082,0.159
69
- TabPFN-TS,AutoETS,0.75,0.5,1.0,0.024,-0.064,0.095
70
- TabPFN-TS,Moirai-2.0,0.583,0.331,0.833,0.082,-0.067,0.207
71
- TabPFN-TS,AutoARIMA,0.5,0.25,0.833,0.059,-0.057,0.177
72
- TabPFN-TS,AutoTheta,0.833,0.583,1.0,0.129,0.023,0.236
73
- TabPFN-TS,Drift,0.75,0.5,1.0,0.19,0.03,0.353
74
- TabPFN-TS,Sundial-Base,1.0,1.0,1.0,0.252,0.154,0.347
75
- TabPFN-TS,Naive,0.917,0.75,1.0,0.304,0.18,0.427
76
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.383,0.282,0.484
77
- Stat. Ensemble,TiRex,0.25,0.0,0.5,-0.071,-0.192,0.052
78
- Stat. Ensemble,Chronos-2,0.167,0.0,0.417,-0.05,-0.164,0.076
79
- Stat. Ensemble,Toto-1.0,0.417,0.167,0.667,-0.001,-0.123,0.117
80
- Stat. Ensemble,TimesFM-2.5,0.333,0.083,0.583,-0.021,-0.127,0.107
81
- Stat. Ensemble,TabPFN-TS,0.583,0.333,0.833,-0.005,-0.1,0.077
82
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
83
- Stat. Ensemble,Chronos-Bolt,0.417,0.167,0.667,0.038,-0.083,0.155
84
- Stat. Ensemble,AutoETS,0.417,0.167,0.667,0.019,-0.068,0.095
85
- Stat. Ensemble,Moirai-2.0,0.5,0.25,0.75,0.077,-0.064,0.201
86
- Stat. Ensemble,AutoARIMA,0.917,0.75,1.0,0.054,-0.0,0.099
87
- Stat. Ensemble,AutoTheta,0.917,0.75,1.0,0.125,0.037,0.224
88
- Stat. Ensemble,Drift,0.917,0.75,1.0,0.186,0.041,0.338
89
- Stat. Ensemble,Sundial-Base,0.833,0.583,1.0,0.248,0.115,0.362
90
- Stat. Ensemble,Naive,1.0,1.0,1.0,0.3,0.159,0.433
91
- Stat. Ensemble,Seasonal Naive,1.0,1.0,1.0,0.38,0.283,0.482
92
- Chronos-Bolt,TiRex,0.083,0.0,0.25,-0.113,-0.2,-0.049
93
- Chronos-Bolt,Chronos-2,0.333,0.083,0.583,-0.092,-0.2,-0.014
94
- Chronos-Bolt,Toto-1.0,0.167,0.0,0.333,-0.041,-0.079,-0.004
95
- Chronos-Bolt,TimesFM-2.5,0.417,0.167,0.667,-0.061,-0.167,0.014
96
- Chronos-Bolt,TabPFN-TS,0.417,0.167,0.669,-0.045,-0.189,0.075
97
- Chronos-Bolt,Stat. Ensemble,0.583,0.333,0.833,-0.04,-0.184,0.077
98
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
99
- Chronos-Bolt,AutoETS,0.583,0.333,0.833,-0.019,-0.125,0.057
100
- Chronos-Bolt,Moirai-2.0,0.5,0.25,0.75,0.041,-0.012,0.105
101
- Chronos-Bolt,AutoARIMA,0.667,0.417,0.917,0.017,-0.147,0.157
102
- Chronos-Bolt,AutoTheta,0.833,0.583,1.0,0.09,-0.083,0.223
103
- Chronos-Bolt,Drift,0.917,0.75,1.0,0.154,-0.033,0.28
104
- Chronos-Bolt,Sundial-Base,0.917,0.75,1.0,0.218,0.093,0.302
105
- Chronos-Bolt,Naive,0.917,0.75,1.0,0.273,0.167,0.374
106
- Chronos-Bolt,Seasonal Naive,0.833,0.583,1.0,0.355,0.212,0.48
107
- AutoETS,TiRex,0.167,0.0,0.417,-0.092,-0.169,-0.018
108
- AutoETS,Chronos-2,0.083,0.0,0.25,-0.071,-0.126,-0.004
109
- AutoETS,Toto-1.0,0.333,0.083,0.583,-0.021,-0.105,0.066
110
- AutoETS,TimesFM-2.5,0.25,0.0,0.5,-0.041,-0.116,0.043
111
- AutoETS,TabPFN-TS,0.25,0.0,0.5,-0.025,-0.104,0.06
112
- AutoETS,Stat. Ensemble,0.583,0.333,0.833,-0.02,-0.105,0.064
113
- AutoETS,Chronos-Bolt,0.417,0.167,0.667,0.019,-0.06,0.111
114
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
115
- AutoETS,Moirai-2.0,0.417,0.167,0.667,0.059,-0.034,0.15
116
- AutoETS,AutoARIMA,0.75,0.5,1.0,0.035,-0.082,0.151
117
- AutoETS,AutoTheta,0.833,0.583,1.0,0.108,-0.026,0.227
118
- AutoETS,Drift,0.833,0.583,1.0,0.17,-0.016,0.334
119
- AutoETS,Sundial-Base,1.0,1.0,1.0,0.233,0.138,0.322
120
- AutoETS,Naive,0.917,0.75,1.0,0.287,0.148,0.417
121
- AutoETS,Seasonal Naive,0.917,0.75,1.0,0.368,0.243,0.486
122
- Moirai-2.0,TiRex,0.167,0.0,0.417,-0.161,-0.309,-0.053
123
- Moirai-2.0,Chronos-2,0.333,0.083,0.667,-0.138,-0.282,-0.032
124
- Moirai-2.0,Toto-1.0,0.083,0.0,0.208,-0.085,-0.173,-0.026
125
- Moirai-2.0,TimesFM-2.5,0.417,0.167,0.667,-0.107,-0.259,0.002
126
- Moirai-2.0,TabPFN-TS,0.417,0.167,0.669,-0.089,-0.262,0.063
127
- Moirai-2.0,Stat. Ensemble,0.5,0.25,0.75,-0.084,-0.251,0.061
128
- Moirai-2.0,Chronos-Bolt,0.5,0.25,0.75,-0.043,-0.118,0.012
129
- Moirai-2.0,AutoETS,0.583,0.333,0.833,-0.063,-0.177,0.033
130
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
131
- Moirai-2.0,AutoARIMA,0.583,0.333,0.833,-0.025,-0.226,0.141
132
- Moirai-2.0,AutoTheta,0.667,0.417,0.917,0.051,-0.119,0.187
133
- Moirai-2.0,Drift,0.75,0.5,1.0,0.118,-0.083,0.257
134
- Moirai-2.0,Sundial-Base,0.833,0.583,1.0,0.185,0.061,0.279
135
- Moirai-2.0,Naive,0.833,0.583,1.0,0.242,0.127,0.337
136
- Moirai-2.0,Seasonal Naive,0.75,0.5,1.0,0.328,0.183,0.456
137
- AutoARIMA,TiRex,0.25,0.0,0.5,-0.132,-0.294,0.008
138
- AutoARIMA,Chronos-2,0.25,0.0,0.5,-0.11,-0.282,0.034
139
- AutoARIMA,Toto-1.0,0.417,0.167,0.667,-0.059,-0.222,0.079
140
- AutoARIMA,TimesFM-2.5,0.25,0.0,0.5,-0.079,-0.228,0.068
141
- AutoARIMA,TabPFN-TS,0.5,0.167,0.75,-0.062,-0.215,0.054
142
- AutoARIMA,Stat. Ensemble,0.083,0.0,0.25,-0.057,-0.109,0.0
143
- AutoARIMA,Chronos-Bolt,0.333,0.083,0.583,-0.017,-0.187,0.128
144
- AutoARIMA,AutoETS,0.25,0.0,0.5,-0.037,-0.178,0.076
145
- AutoARIMA,Moirai-2.0,0.417,0.167,0.667,0.025,-0.164,0.185
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,AutoTheta,0.5,0.25,0.75,0.075,-0.043,0.214
148
- AutoARIMA,Drift,0.667,0.417,0.917,0.139,-0.024,0.304
149
- AutoARIMA,Sundial-Base,0.75,0.5,1.0,0.205,0.026,0.344
150
- AutoARIMA,Naive,0.833,0.583,1.0,0.26,0.092,0.423
151
- AutoARIMA,Seasonal Naive,1.0,1.0,1.0,0.344,0.225,0.467
152
- AutoTheta,TiRex,0.167,0.0,0.417,-0.224,-0.476,-0.017
153
- AutoTheta,Chronos-2,0.083,0.0,0.25,-0.2,-0.426,0.006
154
- AutoTheta,Toto-1.0,0.25,0.0,0.5,-0.144,-0.358,0.044
155
- AutoTheta,TimesFM-2.5,0.167,0.0,0.417,-0.167,-0.401,0.046
156
- AutoTheta,TabPFN-TS,0.167,0.0,0.417,-0.148,-0.309,-0.024
157
- AutoTheta,Stat. Ensemble,0.083,0.0,0.25,-0.143,-0.289,-0.038
158
- AutoTheta,Chronos-Bolt,0.167,0.0,0.417,-0.099,-0.286,0.076
159
- AutoTheta,AutoETS,0.167,0.0,0.417,-0.12,-0.293,0.025
160
- AutoTheta,Moirai-2.0,0.333,0.083,0.583,-0.054,-0.229,0.106
161
- AutoTheta,AutoARIMA,0.5,0.25,0.75,-0.081,-0.273,0.041
162
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoTheta,Drift,0.333,0.083,0.583,0.07,-0.028,0.226
164
- AutoTheta,Sundial-Base,0.833,0.583,1.0,0.141,0.015,0.293
165
- AutoTheta,Naive,0.917,0.75,1.0,0.201,0.088,0.318
166
- AutoTheta,Seasonal Naive,1.0,1.0,1.0,0.291,0.222,0.357
167
- Drift,TiRex,0.083,0.0,0.25,-0.315,-0.628,-0.04
168
- Drift,Chronos-2,0.083,0.0,0.25,-0.29,-0.643,-0.01
169
- Drift,Toto-1.0,0.083,0.0,0.25,-0.23,-0.461,0.005
170
- Drift,TimesFM-2.5,0.167,0.0,0.417,-0.254,-0.6,0.027
171
- Drift,TabPFN-TS,0.25,0.0,0.5,-0.234,-0.546,-0.031
172
- Drift,Stat. Ensemble,0.083,0.0,0.25,-0.228,-0.511,-0.043
173
- Drift,Chronos-Bolt,0.083,0.0,0.25,-0.182,-0.389,0.032
174
- Drift,AutoETS,0.167,0.0,0.417,-0.205,-0.5,0.015
175
- Drift,Moirai-2.0,0.25,0.0,0.5,-0.133,-0.347,0.076
176
- Drift,AutoARIMA,0.333,0.083,0.583,-0.162,-0.437,0.023
177
- Drift,AutoTheta,0.667,0.417,0.917,-0.075,-0.292,0.027
178
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
179
- Drift,Sundial-Base,0.667,0.417,0.917,0.076,-0.217,0.284
180
- Drift,Naive,0.917,0.75,1.0,0.141,0.034,0.242
181
- Drift,Seasonal Naive,0.917,0.75,1.0,0.238,0.002,0.366
182
- Sundial-Base,TiRex,0.083,0.0,0.25,-0.424,-0.613,-0.258
183
- Sundial-Base,Chronos-2,0.0,0.0,0.0,-0.397,-0.557,-0.264
184
- Sundial-Base,Toto-1.0,0.083,0.0,0.25,-0.332,-0.509,-0.153
185
- Sundial-Base,TimesFM-2.5,0.0,0.0,0.0,-0.358,-0.528,-0.222
186
- Sundial-Base,TabPFN-TS,0.0,0.0,0.0,-0.336,-0.532,-0.181
187
- Sundial-Base,Stat. Ensemble,0.167,0.0,0.417,-0.33,-0.567,-0.13
188
- Sundial-Base,Chronos-Bolt,0.083,0.0,0.25,-0.279,-0.433,-0.103
189
- Sundial-Base,AutoETS,0.0,0.0,0.0,-0.304,-0.474,-0.159
190
- Sundial-Base,Moirai-2.0,0.167,0.0,0.417,-0.227,-0.387,-0.066
191
- Sundial-Base,AutoARIMA,0.25,0.0,0.5,-0.258,-0.524,-0.027
192
- Sundial-Base,AutoTheta,0.167,0.0,0.417,-0.164,-0.415,-0.015
193
- Sundial-Base,Drift,0.333,0.083,0.583,-0.083,-0.397,0.178
194
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
195
- Sundial-Base,Naive,0.583,0.333,0.833,0.07,-0.096,0.23
196
- Sundial-Base,Seasonal Naive,0.833,0.583,1.0,0.175,0.01,0.298
197
- Naive,TiRex,0.0,0.0,0.0,-0.53,-0.889,-0.283
198
- Naive,Chronos-2,0.083,0.0,0.25,-0.501,-0.89,-0.236
199
- Naive,Toto-1.0,0.083,0.0,0.25,-0.431,-0.708,-0.233
200
- Naive,TimesFM-2.5,0.083,0.0,0.25,-0.459,-0.846,-0.192
201
- Naive,TabPFN-TS,0.083,0.0,0.25,-0.436,-0.746,-0.219
202
- Naive,Stat. Ensemble,0.0,0.0,0.0,-0.429,-0.765,-0.19
203
- Naive,Chronos-Bolt,0.083,0.0,0.25,-0.375,-0.598,-0.2
204
- Naive,AutoETS,0.083,0.0,0.25,-0.402,-0.715,-0.174
205
- Naive,Moirai-2.0,0.167,0.0,0.417,-0.319,-0.508,-0.145
206
- Naive,AutoARIMA,0.167,0.0,0.417,-0.352,-0.732,-0.102
207
- Naive,AutoTheta,0.083,0.0,0.25,-0.251,-0.466,-0.096
208
- Naive,Drift,0.083,0.0,0.25,-0.164,-0.32,-0.035
209
- Naive,Sundial-Base,0.417,0.167,0.667,-0.075,-0.298,0.087
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Seasonal Naive,0.75,0.542,0.917,0.114,-0.095,0.248
212
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.727,-1.204,-0.402
213
- Seasonal Naive,Chronos-2,0.083,0.0,0.25,-0.694,-1.116,-0.391
214
- Seasonal Naive,Toto-1.0,0.167,0.0,0.417,-0.615,-1.022,-0.312
215
- Seasonal Naive,TimesFM-2.5,0.083,0.0,0.25,-0.647,-1.071,-0.346
216
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.621,-0.938,-0.393
217
- Seasonal Naive,Stat. Ensemble,0.0,0.0,0.0,-0.613,-0.931,-0.394
218
- Seasonal Naive,Chronos-Bolt,0.167,0.0,0.417,-0.551,-0.924,-0.269
219
- Seasonal Naive,AutoETS,0.083,0.0,0.25,-0.581,-0.947,-0.321
220
- Seasonal Naive,Moirai-2.0,0.25,0.0,0.5,-0.488,-0.838,-0.223
221
- Seasonal Naive,AutoARIMA,0.0,0.0,0.0,-0.525,-0.875,-0.291
222
- Seasonal Naive,AutoTheta,0.0,0.0,0.0,-0.411,-0.556,-0.286
223
- Seasonal Naive,Drift,0.083,0.0,0.25,-0.313,-0.578,-0.002
224
- Seasonal Naive,Sundial-Base,0.167,0.0,0.417,-0.213,-0.424,-0.01
225
- Seasonal Naive,Naive,0.25,0.083,0.458,-0.128,-0.33,0.087
226
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/leaderboard_MASE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,91.20879120879124,35.398352594765115,0.0,3.4626783322727275,0.0,0.0
3
- TiRex,75.41208791208793,25.111125985019022,0.0,1.3772318233333332,0.038461538461538464,0.0
4
- TimesFM-2.5,74.17582417582418,24.470577245081294,0.0,36.094269344715904,0.15384615384615385,0.0
5
- Chronos-Bolt,70.05494505494507,24.79493117234646,0.0,1.1812673024242424,0.0,0.0
6
- TabPFN-TS,67.03296703296704,28.931010322468566,0.0,213.1836282907102,0.0,0.0
7
- Moirai-2.0,65.65934065934066,23.276855755353097,0.0,2.6483478834375003,0.3076923076923077,0.0
8
- Sundial-Base,64.69780219780219,25.973734598235776,0.0,8.734264116875,0.038461538461538464,0.0
9
- Toto-1.0,64.56043956043956,22.175386726803193,0.0,64.5943206675,0.15384615384615385,0.0
10
- Stat. Ensemble,39.83516483516484,5.123802463450922,0.0,2087.2821065800895,0.0,15.384615384615385
11
- AutoARIMA,36.53846153846153,3.34881134283036,0.0,1914.2135565203125,0.0,15.384615384615385
12
- Seasonal Naive,30.35714285714285,0.0,0.0,1.0154424039285712,0.0,0.0
13
- AutoTheta,28.57142857142857,1.1815957493498508,0.0,6.5463739134469705,0.0,0.0
14
- AutoETS,17.857142857142858,-34.975498607867614,0.0,13.10401027465909,0.0,0.0
15
- Naive,17.44505494505494,-44.77392099300393,0.0,1.012792872857143,0.0,0.0
16
- Drift,6.593406593406594,-52.299123287549286,0.0,1.0261745340625001,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/leaderboard_SQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,93.13186813186815,43.69603844728434,0.0,3.4626783322727275,0.0,0.0
3
- TiRex,82.28021978021978,34.44791487661235,0.0,1.3772318233333332,0.038461538461538464,0.0
4
- TimesFM-2.5,75.82417582417582,33.04019456805117,0.0,36.094269344715904,0.15384615384615385,0.0
5
- TabPFN-TS,73.62637362637363,38.076295973948326,0.0,213.1836282907102,0.0,0.0
6
- Chronos-Bolt,70.6043956043956,32.88168947733126,0.0,1.1812673024242424,0.0,0.0
7
- Moirai-2.0,68.13186813186815,31.522824785428526,0.0,2.6483478834375003,0.3076923076923077,0.0
8
- Toto-1.0,67.03296703296705,31.075422556050736,0.0,64.5943206675,0.15384615384615385,0.0
9
- Sundial-Base,55.35714285714286,28.52842382801387,0.0,8.734264116875,0.038461538461538464,0.0
10
- AutoARIMA,39.28571428571428,9.387893004882919,0.0,1914.2135565203125,0.0,15.384615384615385
11
- Stat. Ensemble,37.08791208791208,7.135433823240489,0.0,2087.2821065800895,0.0,15.384615384615385
12
- Seasonal Naive,25.96153846153846,0.0,0.0,1.0154424039285712,0.0,0.0
13
- AutoETS,24.725274725274723,-30.21022947475347,0.0,13.10401027465909,0.0,0.0
14
- AutoTheta,20.05494505494506,-10.643501735785609,0.0,6.5463739134469705,0.0,0.0
15
- Naive,13.873626373626374,-60.57154436477256,0.0,1.012792872857143,0.0,0.0
16
- Drift,3.0219780219780215,-67.3016516015588,0.0,1.0261745340625001,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/leaderboard_WAPE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,88.73626373626375,37.55790132014533,0.0,3.4626783322727275,0.0,0.0
3
- TimesFM-2.5,69.78021978021978,26.657949622489184,0.0,36.094269344715904,0.15384615384615385,0.0
4
- TiRex,69.36813186813188,27.32266216368823,0.0,1.3772318233333332,0.038461538461538464,0.0
5
- Chronos-Bolt,65.93406593406594,26.60685234340968,0.0,1.1812673024242424,0.0,0.0
6
- TabPFN-TS,65.38461538461539,30.329432122077236,0.0,213.1836282907102,0.0,0.0
7
- Moirai-2.0,64.01098901098902,25.999880752887982,0.0,2.6483478834375003,0.3076923076923077,0.0
8
- Sundial-Base,60.85164835164834,28.26554433861671,0.0,8.734264116875,0.038461538461538464,0.0
9
- Toto-1.0,58.79120879120878,24.17115830675096,0.0,64.5943206675,0.15384615384615385,0.0
10
- Stat. Ensemble,44.23076923076924,6.664281180112875,0.0,2087.2821065800895,0.0,15.384615384615385
11
- AutoARIMA,33.51648351648351,2.7340402546184417,0.0,1914.2135565203125,0.0,15.384615384615385
12
- AutoTheta,29.670329670329664,4.200265512517798,0.0,6.5463739134469705,0.0,0.0
13
- Naive,27.884615384615387,-29.595393684781012,0.0,1.012792872857143,0.0,0.0
14
- Seasonal Naive,27.609890109890113,0.0,0.0,1.0154424039285712,0.0,0.0
15
- AutoETS,27.472527472527474,-24.061974944434183,0.0,13.10401027465909,0.0,0.0
16
- Drift,16.75824175824176,-36.91555350097882,0.0,1.0261745340625001,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/leaderboard_WQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,93.6813186813187,45.99295406512989,0.0,3.4626783322727275,0.0,0.0
3
- TiRex,80.35714285714286,37.01592382634481,0.0,1.3772318233333332,0.038461538461538464,0.0
4
- TabPFN-TS,75.54945054945054,39.82364919499898,0.0,213.1836282907102,0.0,0.0
5
- TimesFM-2.5,75.27472527472527,35.74711031825937,0.0,36.094269344715904,0.15384615384615385,0.0
6
- Chronos-Bolt,69.78021978021978,35.346534617319236,0.0,1.1812673024242424,0.0,0.0
7
- Moirai-2.0,67.85714285714288,34.54029719816187,0.0,2.6483478834375003,0.3076923076923077,0.0
8
- Toto-1.0,65.10989010989012,33.6346956535905,0.0,64.5943206675,0.15384615384615385,0.0
9
- Sundial-Base,54.807692307692314,31.42058530075822,0.0,8.734264116875,0.038461538461538464,0.0
10
- Stat. Ensemble,40.10989010989011,9.81188536974501,0.0,2087.2821065800895,0.0,15.384615384615385
11
- AutoARIMA,38.18681318681318,9.717573374612886,0.0,1914.2135565203125,0.0,15.384615384615385
12
- AutoETS,23.626373626373624,-24.269495265740982,0.0,13.10401027465909,0.0,0.0
13
- Seasonal Naive,23.214285714285715,0.0,0.0,1.0154424039285712,0.0,0.0
14
- AutoTheta,21.7032967032967,-4.896832263246642,0.0,6.5463739134469705,0.0,0.0
15
- Naive,14.972527472527469,-50.97673020090387,0.0,1.012792872857143,0.0,0.0
16
- Drift,5.769230769230769,-57.44553174606462,0.0,1.0261745340625001,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/pairwise_MASE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,TiRex,0.769,0.615,0.923,0.137,0.066,0.203
4
- Chronos-2,TimesFM-2.5,0.769,0.577,0.923,0.145,0.072,0.212
5
- Chronos-2,Chronos-Bolt,0.962,0.885,1.0,0.141,0.078,0.202
6
- Chronos-2,TabPFN-TS,0.808,0.654,0.962,0.091,0.047,0.134
7
- Chronos-2,Moirai-2.0,0.923,0.808,1.0,0.158,0.092,0.222
8
- Chronos-2,Sundial-Base,0.846,0.692,0.962,0.127,0.069,0.195
9
- Chronos-2,Toto-1.0,0.846,0.692,0.962,0.17,0.105,0.237
10
- Chronos-2,Stat. Ensemble,0.962,0.885,1.0,0.319,0.239,0.385
11
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.332,0.249,0.404
12
- Chronos-2,Seasonal Naive,0.962,0.885,1.0,0.354,0.278,0.422
13
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.346,0.278,0.408
14
- Chronos-2,AutoETS,0.962,0.885,1.0,0.521,0.418,0.612
15
- Chronos-2,Naive,0.962,0.885,1.0,0.554,0.468,0.626
16
- Chronos-2,Drift,1.0,1.0,1.0,0.576,0.494,0.643
17
- TiRex,Chronos-2,0.231,0.077,0.385,-0.159,-0.255,-0.07
18
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
19
- TiRex,TimesFM-2.5,0.519,0.327,0.731,0.008,-0.015,0.033
20
- TiRex,Chronos-Bolt,0.558,0.385,0.731,0.004,-0.021,0.032
21
- TiRex,TabPFN-TS,0.538,0.346,0.731,-0.054,-0.16,0.039
22
- TiRex,Moirai-2.0,0.558,0.365,0.731,0.024,-0.003,0.056
23
- TiRex,Sundial-Base,0.75,0.596,0.904,-0.012,-0.103,0.066
24
- TiRex,Toto-1.0,0.673,0.5,0.846,0.038,0.01,0.068
25
- TiRex,Stat. Ensemble,0.962,0.885,1.0,0.211,0.152,0.27
26
- TiRex,AutoARIMA,0.923,0.808,1.0,0.225,0.159,0.292
27
- TiRex,Seasonal Naive,0.923,0.808,1.0,0.251,0.191,0.308
28
- TiRex,AutoTheta,1.0,1.0,1.0,0.242,0.194,0.299
29
- TiRex,AutoETS,0.962,0.885,1.0,0.445,0.339,0.543
30
- TiRex,Naive,0.962,0.885,1.0,0.483,0.403,0.556
31
- TiRex,Drift,1.0,1.0,1.0,0.508,0.431,0.578
32
- TimesFM-2.5,Chronos-2,0.231,0.077,0.423,-0.169,-0.269,-0.077
33
- TimesFM-2.5,TiRex,0.481,0.269,0.673,-0.009,-0.034,0.014
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,Chronos-Bolt,0.577,0.404,0.769,-0.004,-0.033,0.024
36
- TimesFM-2.5,TabPFN-TS,0.538,0.346,0.731,-0.063,-0.171,0.028
37
- TimesFM-2.5,Moirai-2.0,0.615,0.442,0.808,0.016,-0.028,0.057
38
- TimesFM-2.5,Sundial-Base,0.596,0.423,0.788,-0.02,-0.113,0.062
39
- TimesFM-2.5,Toto-1.0,0.692,0.519,0.846,0.029,-0.006,0.071
40
- TimesFM-2.5,Stat. Ensemble,0.885,0.731,1.0,0.204,0.139,0.271
41
- TimesFM-2.5,AutoARIMA,0.923,0.808,1.0,0.219,0.146,0.291
42
- TimesFM-2.5,Seasonal Naive,0.923,0.808,1.0,0.245,0.182,0.306
43
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.236,0.18,0.299
44
- TimesFM-2.5,AutoETS,0.962,0.885,1.0,0.44,0.337,0.539
45
- TimesFM-2.5,Naive,0.962,0.885,1.0,0.478,0.392,0.555
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.504,0.424,0.577
47
- Chronos-Bolt,Chronos-2,0.038,0.0,0.115,-0.164,-0.254,-0.085
48
- Chronos-Bolt,TiRex,0.442,0.269,0.615,-0.004,-0.034,0.02
49
- Chronos-Bolt,TimesFM-2.5,0.423,0.231,0.596,0.004,-0.025,0.032
50
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
51
- Chronos-Bolt,TabPFN-TS,0.5,0.308,0.692,-0.058,-0.158,0.022
52
- Chronos-Bolt,Moirai-2.0,0.577,0.423,0.731,0.02,-0.022,0.064
53
- Chronos-Bolt,Sundial-Base,0.558,0.365,0.731,-0.016,-0.106,0.062
54
- Chronos-Bolt,Toto-1.0,0.577,0.404,0.75,0.034,-0.001,0.072
55
- Chronos-Bolt,Stat. Ensemble,0.885,0.731,1.0,0.207,0.148,0.271
56
- Chronos-Bolt,AutoARIMA,1.0,1.0,1.0,0.222,0.154,0.29
57
- Chronos-Bolt,Seasonal Naive,0.923,0.808,1.0,0.248,0.188,0.306
58
- Chronos-Bolt,AutoTheta,1.0,1.0,1.0,0.239,0.19,0.297
59
- Chronos-Bolt,AutoETS,0.923,0.808,1.0,0.443,0.333,0.545
60
- Chronos-Bolt,Naive,0.962,0.885,1.0,0.481,0.396,0.556
61
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.506,0.425,0.578
62
- TabPFN-TS,Chronos-2,0.192,0.038,0.346,-0.1,-0.155,-0.049
63
- TabPFN-TS,TiRex,0.462,0.269,0.654,0.051,-0.04,0.138
64
- TabPFN-TS,TimesFM-2.5,0.462,0.269,0.654,0.059,-0.029,0.146
65
- TabPFN-TS,Chronos-Bolt,0.5,0.308,0.692,0.055,-0.023,0.137
66
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
67
- TabPFN-TS,Moirai-2.0,0.538,0.346,0.731,0.074,-0.012,0.16
68
- TabPFN-TS,Sundial-Base,0.462,0.269,0.654,0.04,-0.055,0.135
69
- TabPFN-TS,Toto-1.0,0.538,0.346,0.731,0.087,0.004,0.175
70
- TabPFN-TS,Stat. Ensemble,0.846,0.692,0.962,0.251,0.16,0.326
71
- TabPFN-TS,AutoARIMA,0.846,0.692,0.962,0.265,0.165,0.351
72
- TabPFN-TS,Seasonal Naive,0.885,0.731,1.0,0.289,0.198,0.368
73
- TabPFN-TS,AutoTheta,0.923,0.808,1.0,0.281,0.201,0.352
74
- TabPFN-TS,AutoETS,0.885,0.731,1.0,0.473,0.356,0.578
75
- TabPFN-TS,Naive,0.923,0.808,1.0,0.509,0.418,0.588
76
- TabPFN-TS,Drift,0.923,0.808,1.0,0.533,0.444,0.61
77
- Moirai-2.0,Chronos-2,0.077,0.0,0.192,-0.188,-0.285,-0.101
78
- Moirai-2.0,TiRex,0.442,0.269,0.635,-0.024,-0.059,0.003
79
- Moirai-2.0,TimesFM-2.5,0.385,0.192,0.558,-0.016,-0.061,0.027
80
- Moirai-2.0,Chronos-Bolt,0.423,0.269,0.577,-0.02,-0.068,0.021
81
- Moirai-2.0,TabPFN-TS,0.462,0.269,0.654,-0.08,-0.19,0.012
82
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
83
- Moirai-2.0,Sundial-Base,0.635,0.462,0.789,-0.036,-0.136,0.048
84
- Moirai-2.0,Toto-1.0,0.615,0.462,0.788,0.014,-0.011,0.043
85
- Moirai-2.0,Stat. Ensemble,0.808,0.654,0.923,0.191,0.118,0.256
86
- Moirai-2.0,AutoARIMA,0.769,0.577,0.923,0.206,0.128,0.282
87
- Moirai-2.0,Seasonal Naive,0.808,0.654,0.924,0.233,0.167,0.293
88
- Moirai-2.0,AutoTheta,0.962,0.885,1.0,0.224,0.168,0.284
89
- Moirai-2.0,AutoETS,0.885,0.769,1.0,0.432,0.314,0.538
90
- Moirai-2.0,Naive,0.923,0.808,1.0,0.47,0.388,0.545
91
- Moirai-2.0,Drift,1.0,1.0,1.0,0.496,0.418,0.568
92
- Sundial-Base,Chronos-2,0.154,0.038,0.308,-0.146,-0.242,-0.074
93
- Sundial-Base,TiRex,0.25,0.096,0.404,0.012,-0.071,0.093
94
- Sundial-Base,TimesFM-2.5,0.404,0.212,0.577,0.02,-0.066,0.101
95
- Sundial-Base,Chronos-Bolt,0.442,0.269,0.635,0.016,-0.066,0.096
96
- Sundial-Base,TabPFN-TS,0.538,0.346,0.731,-0.042,-0.156,0.052
97
- Sundial-Base,Moirai-2.0,0.365,0.211,0.538,0.035,-0.051,0.119
98
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
99
- Sundial-Base,Toto-1.0,0.481,0.308,0.654,0.049,-0.032,0.129
100
- Sundial-Base,Stat. Ensemble,0.846,0.692,0.962,0.22,0.136,0.296
101
- Sundial-Base,AutoARIMA,0.846,0.692,0.962,0.234,0.147,0.311
102
- Sundial-Base,Seasonal Naive,0.885,0.731,1.0,0.26,0.177,0.334
103
- Sundial-Base,AutoTheta,0.962,0.885,1.0,0.251,0.176,0.319
104
- Sundial-Base,AutoETS,0.923,0.808,1.0,0.452,0.336,0.538
105
- Sundial-Base,Naive,0.962,0.885,1.0,0.489,0.393,0.575
106
- Sundial-Base,Drift,1.0,1.0,1.0,0.514,0.421,0.597
107
- Toto-1.0,Chronos-2,0.154,0.038,0.308,-0.205,-0.311,-0.117
108
- Toto-1.0,TiRex,0.327,0.154,0.5,-0.039,-0.073,-0.01
109
- Toto-1.0,TimesFM-2.5,0.308,0.154,0.481,-0.03,-0.077,0.006
110
- Toto-1.0,Chronos-Bolt,0.423,0.25,0.596,-0.035,-0.078,0.001
111
- Toto-1.0,TabPFN-TS,0.462,0.269,0.654,-0.095,-0.212,-0.004
112
- Toto-1.0,Moirai-2.0,0.385,0.212,0.538,-0.014,-0.045,0.011
113
- Toto-1.0,Sundial-Base,0.519,0.346,0.692,-0.051,-0.148,0.031
114
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
115
- Toto-1.0,Stat. Ensemble,0.885,0.769,1.0,0.18,0.109,0.243
116
- Toto-1.0,AutoARIMA,0.885,0.769,1.0,0.195,0.122,0.268
117
- Toto-1.0,Seasonal Naive,0.885,0.731,1.0,0.222,0.159,0.279
118
- Toto-1.0,AutoTheta,0.923,0.808,1.0,0.212,0.153,0.276
119
- Toto-1.0,AutoETS,0.923,0.808,1.0,0.423,0.311,0.528
120
- Toto-1.0,Naive,0.962,0.885,1.0,0.462,0.385,0.537
121
- Toto-1.0,Drift,1.0,1.0,1.0,0.489,0.414,0.559
122
- Stat. Ensemble,Chronos-2,0.038,0.0,0.115,-0.469,-0.625,-0.314
123
- Stat. Ensemble,TiRex,0.038,0.0,0.115,-0.267,-0.369,-0.179
124
- Stat. Ensemble,TimesFM-2.5,0.115,0.0,0.269,-0.256,-0.371,-0.162
125
- Stat. Ensemble,Chronos-Bolt,0.115,0.0,0.269,-0.262,-0.372,-0.173
126
- Stat. Ensemble,TabPFN-TS,0.154,0.038,0.308,-0.335,-0.483,-0.19
127
- Stat. Ensemble,Moirai-2.0,0.192,0.077,0.346,-0.237,-0.345,-0.134
128
- Stat. Ensemble,Sundial-Base,0.154,0.038,0.308,-0.282,-0.419,-0.158
129
- Stat. Ensemble,Toto-1.0,0.115,0.0,0.231,-0.219,-0.322,-0.123
130
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
131
- Stat. Ensemble,AutoARIMA,0.615,0.442,0.788,0.018,-0.016,0.053
132
- Stat. Ensemble,Seasonal Naive,0.692,0.538,0.846,0.051,0.016,0.084
133
- Stat. Ensemble,AutoTheta,0.654,0.462,0.846,0.04,0.001,0.079
134
- Stat. Ensemble,AutoETS,0.846,0.692,0.962,0.297,0.19,0.407
135
- Stat. Ensemble,Naive,0.885,0.769,1.0,0.345,0.25,0.433
136
- Stat. Ensemble,Drift,0.962,0.885,1.0,0.377,0.289,0.468
137
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.496,-0.679,-0.332
138
- AutoARIMA,TiRex,0.077,0.0,0.192,-0.291,-0.413,-0.189
139
- AutoARIMA,TimesFM-2.5,0.077,0.0,0.192,-0.28,-0.41,-0.171
140
- AutoARIMA,Chronos-Bolt,0.0,0.0,0.0,-0.285,-0.409,-0.182
141
- AutoARIMA,TabPFN-TS,0.154,0.038,0.308,-0.36,-0.541,-0.198
142
- AutoARIMA,Moirai-2.0,0.231,0.077,0.423,-0.26,-0.393,-0.147
143
- AutoARIMA,Sundial-Base,0.154,0.038,0.308,-0.306,-0.451,-0.172
144
- AutoARIMA,Toto-1.0,0.115,0.0,0.231,-0.242,-0.366,-0.139
145
- AutoARIMA,Stat. Ensemble,0.385,0.212,0.558,-0.019,-0.056,0.015
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,Seasonal Naive,0.654,0.481,0.808,0.033,0.006,0.059
148
- AutoARIMA,AutoTheta,0.654,0.462,0.808,0.022,-0.033,0.071
149
- AutoARIMA,AutoETS,0.769,0.577,0.923,0.284,0.172,0.395
150
- AutoARIMA,Naive,0.885,0.769,1.0,0.332,0.237,0.426
151
- AutoARIMA,Drift,0.962,0.885,1.0,0.365,0.272,0.453
152
- Seasonal Naive,Chronos-2,0.038,0.0,0.115,-0.548,-0.729,-0.384
153
- Seasonal Naive,TiRex,0.077,0.0,0.192,-0.335,-0.445,-0.237
154
- Seasonal Naive,TimesFM-2.5,0.077,0.0,0.192,-0.324,-0.441,-0.223
155
- Seasonal Naive,Chronos-Bolt,0.077,0.0,0.192,-0.33,-0.44,-0.232
156
- Seasonal Naive,TabPFN-TS,0.115,0.0,0.269,-0.407,-0.582,-0.246
157
- Seasonal Naive,Moirai-2.0,0.192,0.076,0.346,-0.303,-0.415,-0.201
158
- Seasonal Naive,Sundial-Base,0.115,0.0,0.269,-0.351,-0.501,-0.215
159
- Seasonal Naive,Toto-1.0,0.115,0.0,0.269,-0.285,-0.386,-0.189
160
- Seasonal Naive,Stat. Ensemble,0.308,0.154,0.462,-0.054,-0.092,-0.016
161
- Seasonal Naive,AutoARIMA,0.346,0.192,0.519,-0.035,-0.063,-0.006
162
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
163
- Seasonal Naive,AutoTheta,0.423,0.231,0.615,-0.012,-0.067,0.043
164
- Seasonal Naive,AutoETS,0.731,0.538,0.885,0.259,0.139,0.381
165
- Seasonal Naive,Naive,0.75,0.596,0.904,0.309,0.211,0.405
166
- Seasonal Naive,Drift,0.885,0.731,1.0,0.343,0.25,0.435
167
- AutoTheta,Chronos-2,0.0,0.0,0.0,-0.53,-0.689,-0.384
168
- AutoTheta,TiRex,0.0,0.0,0.0,-0.32,-0.427,-0.24
169
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-0.308,-0.427,-0.22
170
- AutoTheta,Chronos-Bolt,0.0,0.0,0.0,-0.314,-0.423,-0.234
171
- AutoTheta,TabPFN-TS,0.077,0.0,0.192,-0.39,-0.542,-0.251
172
- AutoTheta,Moirai-2.0,0.038,0.0,0.115,-0.288,-0.396,-0.202
173
- AutoTheta,Sundial-Base,0.038,0.0,0.115,-0.335,-0.468,-0.214
174
- AutoTheta,Toto-1.0,0.077,0.0,0.192,-0.27,-0.38,-0.18
175
- AutoTheta,Stat. Ensemble,0.346,0.154,0.538,-0.042,-0.085,-0.001
176
- AutoTheta,AutoARIMA,0.346,0.192,0.538,-0.022,-0.077,0.032
177
- AutoTheta,Seasonal Naive,0.577,0.385,0.769,0.012,-0.045,0.062
178
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoTheta,AutoETS,0.769,0.577,0.923,0.268,0.146,0.396
180
- AutoTheta,Naive,0.808,0.654,0.962,0.317,0.221,0.409
181
- AutoTheta,Drift,0.923,0.808,1.0,0.351,0.259,0.44
182
- AutoETS,Chronos-2,0.038,0.0,0.115,-1.089,-1.579,-0.717
183
- AutoETS,TiRex,0.038,0.0,0.115,-0.802,-1.187,-0.513
184
- AutoETS,TimesFM-2.5,0.038,0.0,0.115,-0.787,-1.17,-0.507
185
- AutoETS,Chronos-Bolt,0.077,0.0,0.192,-0.795,-1.196,-0.5
186
- AutoETS,TabPFN-TS,0.115,0.0,0.269,-0.899,-1.372,-0.554
187
- AutoETS,Moirai-2.0,0.115,0.0,0.231,-0.759,-1.164,-0.458
188
- AutoETS,Sundial-Base,0.077,0.0,0.192,-0.823,-1.166,-0.506
189
- AutoETS,Toto-1.0,0.077,0.0,0.192,-0.734,-1.118,-0.451
190
- AutoETS,Stat. Ensemble,0.154,0.038,0.308,-0.423,-0.687,-0.234
191
- AutoETS,AutoARIMA,0.231,0.077,0.423,-0.397,-0.654,-0.208
192
- AutoETS,Seasonal Naive,0.269,0.115,0.462,-0.35,-0.615,-0.161
193
- AutoETS,AutoTheta,0.231,0.077,0.423,-0.366,-0.655,-0.17
194
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
195
- AutoETS,Naive,0.462,0.269,0.654,0.068,-0.078,0.212
196
- AutoETS,Drift,0.577,0.385,0.732,0.114,-0.03,0.253
197
- Naive,Chronos-2,0.038,0.0,0.115,-1.241,-1.671,-0.879
198
- Naive,TiRex,0.038,0.0,0.115,-0.933,-1.254,-0.676
199
- Naive,TimesFM-2.5,0.038,0.0,0.115,-0.917,-1.248,-0.645
200
- Naive,Chronos-Bolt,0.038,0.0,0.115,-0.925,-1.252,-0.656
201
- Naive,TabPFN-TS,0.077,0.0,0.192,-1.037,-1.429,-0.718
202
- Naive,Moirai-2.0,0.077,0.0,0.192,-0.887,-1.2,-0.635
203
- Naive,Sundial-Base,0.038,0.0,0.115,-0.956,-1.353,-0.646
204
- Naive,Toto-1.0,0.038,0.0,0.115,-0.86,-1.159,-0.625
205
- Naive,Stat. Ensemble,0.115,0.0,0.231,-0.526,-0.765,-0.333
206
- Naive,AutoARIMA,0.115,0.0,0.231,-0.498,-0.742,-0.31
207
- Naive,Seasonal Naive,0.25,0.096,0.404,-0.448,-0.68,-0.267
208
- Naive,AutoTheta,0.192,0.038,0.346,-0.465,-0.692,-0.283
209
- Naive,AutoETS,0.538,0.346,0.731,-0.073,-0.269,0.072
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Drift,0.846,0.692,0.962,0.049,0.03,0.073
212
- Drift,Chronos-2,0.0,0.0,0.0,-1.358,-1.804,-0.976
213
- Drift,TiRex,0.0,0.0,0.0,-1.034,-1.369,-0.759
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-1.016,-1.363,-0.736
215
- Drift,Chronos-Bolt,0.0,0.0,0.0,-1.025,-1.372,-0.739
216
- Drift,TabPFN-TS,0.077,0.0,0.192,-1.143,-1.562,-0.797
217
- Drift,Moirai-2.0,0.0,0.0,0.0,-0.985,-1.317,-0.718
218
- Drift,Sundial-Base,0.0,0.0,0.0,-1.057,-1.483,-0.728
219
- Drift,Toto-1.0,0.0,0.0,0.0,-0.957,-1.265,-0.707
220
- Drift,Stat. Ensemble,0.038,0.0,0.115,-0.605,-0.879,-0.406
221
- Drift,AutoARIMA,0.038,0.0,0.115,-0.576,-0.828,-0.373
222
- Drift,Seasonal Naive,0.115,0.0,0.269,-0.523,-0.771,-0.333
223
- Drift,AutoTheta,0.077,0.0,0.192,-0.541,-0.784,-0.35
224
- Drift,AutoETS,0.423,0.268,0.615,-0.128,-0.339,0.029
225
- Drift,Naive,0.154,0.038,0.308,-0.052,-0.078,-0.031
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/pairwise_SQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,TiRex,0.731,0.538,0.885,0.141,0.072,0.204
4
- Chronos-2,TimesFM-2.5,0.769,0.577,0.923,0.159,0.083,0.225
5
- Chronos-2,TabPFN-TS,0.846,0.692,0.962,0.091,0.05,0.131
6
- Chronos-2,Chronos-Bolt,0.923,0.808,1.0,0.161,0.096,0.219
7
- Chronos-2,Moirai-2.0,0.923,0.808,1.0,0.178,0.111,0.242
8
- Chronos-2,Toto-1.0,0.885,0.731,1.0,0.183,0.115,0.25
9
- Chronos-2,Sundial-Base,0.962,0.885,1.0,0.212,0.157,0.275
10
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.379,0.297,0.45
11
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.394,0.317,0.461
12
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.437,0.368,0.497
13
- Chronos-2,AutoETS,1.0,1.0,1.0,0.568,0.468,0.65
14
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.491,0.407,0.574
15
- Chronos-2,Naive,1.0,1.0,1.0,0.649,0.573,0.716
16
- Chronos-2,Drift,1.0,1.0,1.0,0.663,0.591,0.728
17
- TiRex,Chronos-2,0.269,0.115,0.462,-0.164,-0.257,-0.078
18
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
19
- TiRex,TimesFM-2.5,0.673,0.481,0.846,0.021,-0.002,0.045
20
- TiRex,TabPFN-TS,0.577,0.385,0.769,-0.059,-0.164,0.029
21
- TiRex,Chronos-Bolt,0.75,0.577,0.885,0.023,-0.005,0.052
22
- TiRex,Moirai-2.0,0.788,0.615,0.923,0.043,0.014,0.075
23
- TiRex,Toto-1.0,0.788,0.615,0.923,0.049,0.024,0.077
24
- TiRex,Sundial-Base,0.788,0.635,0.923,0.083,0.009,0.153
25
- TiRex,AutoARIMA,0.962,0.885,1.0,0.277,0.21,0.351
26
- TiRex,Stat. Ensemble,0.962,0.885,1.0,0.294,0.229,0.365
27
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.344,0.286,0.404
28
- TiRex,AutoETS,0.962,0.885,1.0,0.497,0.4,0.581
29
- TiRex,AutoTheta,1.0,1.0,1.0,0.408,0.321,0.5
30
- TiRex,Naive,1.0,1.0,1.0,0.592,0.513,0.666
31
- TiRex,Drift,1.0,1.0,1.0,0.608,0.534,0.676
32
- TimesFM-2.5,Chronos-2,0.231,0.077,0.423,-0.189,-0.291,-0.09
33
- TimesFM-2.5,TiRex,0.327,0.154,0.519,-0.021,-0.047,0.002
34
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
35
- TimesFM-2.5,TabPFN-TS,0.538,0.346,0.731,-0.081,-0.195,0.009
36
- TimesFM-2.5,Chronos-Bolt,0.538,0.365,0.712,0.002,-0.028,0.029
37
- TimesFM-2.5,Moirai-2.0,0.615,0.442,0.808,0.022,-0.023,0.067
38
- TimesFM-2.5,Toto-1.0,0.654,0.462,0.808,0.029,-0.01,0.069
39
- TimesFM-2.5,Sundial-Base,0.788,0.635,0.923,0.063,-0.021,0.143
40
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.261,0.191,0.336
41
- TimesFM-2.5,Stat. Ensemble,0.962,0.885,1.0,0.279,0.207,0.357
42
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.33,0.269,0.394
43
- TimesFM-2.5,AutoETS,0.962,0.885,1.0,0.486,0.387,0.574
44
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.395,0.304,0.497
45
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.583,0.5,0.659
46
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.6,0.521,0.672
47
- TabPFN-TS,Chronos-2,0.154,0.038,0.308,-0.1,-0.15,-0.052
48
- TabPFN-TS,TiRex,0.423,0.231,0.615,0.055,-0.03,0.141
49
- TabPFN-TS,TimesFM-2.5,0.462,0.269,0.654,0.075,-0.009,0.163
50
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
51
- TabPFN-TS,Chronos-Bolt,0.615,0.423,0.808,0.077,0.005,0.156
52
- TabPFN-TS,Moirai-2.0,0.538,0.346,0.731,0.096,0.014,0.181
53
- TabPFN-TS,Toto-1.0,0.577,0.385,0.769,0.102,0.017,0.189
54
- TabPFN-TS,Sundial-Base,0.769,0.577,0.923,0.134,0.048,0.222
55
- TabPFN-TS,AutoARIMA,0.923,0.808,1.0,0.317,0.222,0.406
56
- TabPFN-TS,Stat. Ensemble,0.923,0.808,1.0,0.333,0.243,0.417
57
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.381,0.298,0.457
58
- TabPFN-TS,AutoETS,0.923,0.808,1.0,0.524,0.416,0.616
59
- TabPFN-TS,AutoTheta,1.0,1.0,1.0,0.44,0.342,0.539
60
- TabPFN-TS,Naive,1.0,1.0,1.0,0.614,0.529,0.691
61
- TabPFN-TS,Drift,1.0,1.0,1.0,0.63,0.547,0.702
62
- Chronos-Bolt,Chronos-2,0.077,0.0,0.192,-0.192,-0.28,-0.106
63
- Chronos-Bolt,TiRex,0.25,0.115,0.423,-0.024,-0.055,0.005
64
- Chronos-Bolt,TimesFM-2.5,0.462,0.288,0.635,-0.002,-0.03,0.027
65
- Chronos-Bolt,TabPFN-TS,0.385,0.192,0.577,-0.084,-0.185,-0.005
66
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
67
- Chronos-Bolt,Moirai-2.0,0.5,0.327,0.654,0.02,-0.025,0.07
68
- Chronos-Bolt,Toto-1.0,0.538,0.365,0.712,0.026,-0.012,0.068
69
- Chronos-Bolt,Sundial-Base,0.75,0.596,0.904,0.061,-0.024,0.136
70
- Chronos-Bolt,AutoARIMA,1.0,1.0,1.0,0.259,0.191,0.333
71
- Chronos-Bolt,Stat. Ensemble,0.962,0.885,1.0,0.277,0.211,0.349
72
- Chronos-Bolt,Seasonal Naive,1.0,1.0,1.0,0.329,0.273,0.388
73
- Chronos-Bolt,AutoETS,0.962,0.885,1.0,0.485,0.384,0.576
74
- Chronos-Bolt,AutoTheta,1.0,1.0,1.0,0.393,0.304,0.492
75
- Chronos-Bolt,Naive,1.0,1.0,1.0,0.582,0.5,0.654
76
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.599,0.52,0.667
77
- Moirai-2.0,Chronos-2,0.077,0.0,0.192,-0.216,-0.319,-0.125
78
- Moirai-2.0,TiRex,0.212,0.077,0.385,-0.045,-0.081,-0.014
79
- Moirai-2.0,TimesFM-2.5,0.385,0.192,0.558,-0.023,-0.071,0.023
80
- Moirai-2.0,TabPFN-TS,0.462,0.269,0.654,-0.106,-0.221,-0.014
81
- Moirai-2.0,Chronos-Bolt,0.5,0.346,0.673,-0.02,-0.075,0.025
82
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
83
- Moirai-2.0,Toto-1.0,0.577,0.423,0.75,0.006,-0.016,0.035
84
- Moirai-2.0,Sundial-Base,0.75,0.596,0.904,0.042,-0.051,0.124
85
- Moirai-2.0,AutoARIMA,0.923,0.808,1.0,0.244,0.163,0.328
86
- Moirai-2.0,Stat. Ensemble,0.923,0.808,1.0,0.263,0.18,0.345
87
- Moirai-2.0,Seasonal Naive,0.923,0.808,1.0,0.315,0.249,0.385
88
- Moirai-2.0,AutoETS,0.923,0.808,1.0,0.474,0.366,0.571
89
- Moirai-2.0,AutoTheta,0.923,0.808,1.0,0.381,0.287,0.48
90
- Moirai-2.0,Naive,0.962,0.885,1.0,0.574,0.491,0.651
91
- Moirai-2.0,Drift,1.0,1.0,1.0,0.591,0.511,0.663
92
- Toto-1.0,Chronos-2,0.115,0.0,0.269,-0.224,-0.333,-0.13
93
- Toto-1.0,TiRex,0.212,0.077,0.385,-0.051,-0.084,-0.025
94
- Toto-1.0,TimesFM-2.5,0.346,0.192,0.538,-0.029,-0.075,0.01
95
- Toto-1.0,TabPFN-TS,0.423,0.231,0.615,-0.113,-0.232,-0.018
96
- Toto-1.0,Chronos-Bolt,0.462,0.288,0.635,-0.027,-0.073,0.012
97
- Toto-1.0,Moirai-2.0,0.423,0.25,0.577,-0.007,-0.036,0.016
98
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
99
- Toto-1.0,Sundial-Base,0.75,0.596,0.904,0.036,-0.053,0.113
100
- Toto-1.0,AutoARIMA,0.923,0.808,1.0,0.239,0.163,0.318
101
- Toto-1.0,Stat. Ensemble,0.923,0.808,1.0,0.258,0.178,0.337
102
- Toto-1.0,Seasonal Naive,0.962,0.885,1.0,0.311,0.247,0.377
103
- Toto-1.0,AutoETS,0.923,0.808,1.0,0.471,0.366,0.564
104
- Toto-1.0,AutoTheta,0.923,0.808,1.0,0.377,0.277,0.477
105
- Toto-1.0,Naive,1.0,1.0,1.0,0.571,0.49,0.645
106
- Toto-1.0,Drift,1.0,1.0,1.0,0.588,0.51,0.658
107
- Sundial-Base,Chronos-2,0.038,0.0,0.115,-0.269,-0.379,-0.186
108
- Sundial-Base,TiRex,0.212,0.077,0.365,-0.09,-0.18,-0.009
109
- Sundial-Base,TimesFM-2.5,0.212,0.077,0.365,-0.067,-0.168,0.021
110
- Sundial-Base,TabPFN-TS,0.231,0.077,0.423,-0.154,-0.285,-0.05
111
- Sundial-Base,Chronos-Bolt,0.25,0.096,0.404,-0.065,-0.157,0.023
112
- Sundial-Base,Moirai-2.0,0.25,0.096,0.404,-0.044,-0.142,0.048
113
- Sundial-Base,Toto-1.0,0.25,0.096,0.404,-0.037,-0.127,0.05
114
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
115
- Sundial-Base,AutoARIMA,0.769,0.577,0.923,0.211,0.119,0.291
116
- Sundial-Base,Stat. Ensemble,0.808,0.654,0.923,0.23,0.145,0.306
117
- Sundial-Base,Seasonal Naive,0.962,0.885,1.0,0.285,0.209,0.359
118
- Sundial-Base,AutoETS,0.846,0.692,0.962,0.451,0.336,0.535
119
- Sundial-Base,AutoTheta,0.923,0.808,1.0,0.354,0.246,0.448
120
- Sundial-Base,Naive,1.0,1.0,1.0,0.555,0.46,0.641
121
- Sundial-Base,Drift,1.0,1.0,1.0,0.573,0.482,0.653
122
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.609,-0.817,-0.423
123
- AutoARIMA,TiRex,0.038,0.0,0.115,-0.382,-0.541,-0.266
124
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.353,-0.505,-0.237
125
- AutoARIMA,TabPFN-TS,0.077,0.0,0.192,-0.463,-0.683,-0.285
126
- AutoARIMA,Chronos-Bolt,0.0,0.0,0.0,-0.35,-0.499,-0.236
127
- AutoARIMA,Moirai-2.0,0.077,0.0,0.192,-0.323,-0.488,-0.195
128
- AutoARIMA,Toto-1.0,0.077,0.0,0.192,-0.315,-0.466,-0.195
129
- AutoARIMA,Sundial-Base,0.231,0.077,0.423,-0.268,-0.41,-0.135
130
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
131
- AutoARIMA,Stat. Ensemble,0.615,0.423,0.788,0.024,-0.008,0.057
132
- AutoARIMA,Seasonal Naive,0.885,0.769,0.962,0.094,0.068,0.121
133
- AutoARIMA,AutoETS,0.731,0.538,0.885,0.304,0.194,0.406
134
- AutoARIMA,AutoTheta,0.885,0.731,1.0,0.181,0.078,0.284
135
- AutoARIMA,Naive,0.923,0.808,1.0,0.436,0.348,0.519
136
- AutoARIMA,Drift,0.962,0.885,1.0,0.458,0.375,0.539
137
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-0.649,-0.854,-0.464
138
- Stat. Ensemble,TiRex,0.038,0.0,0.115,-0.417,-0.575,-0.297
139
- Stat. Ensemble,TimesFM-2.5,0.038,0.0,0.115,-0.387,-0.556,-0.262
140
- Stat. Ensemble,TabPFN-TS,0.077,0.0,0.192,-0.5,-0.715,-0.32
141
- Stat. Ensemble,Chronos-Bolt,0.038,0.0,0.115,-0.384,-0.536,-0.268
142
- Stat. Ensemble,Moirai-2.0,0.077,0.0,0.192,-0.356,-0.526,-0.219
143
- Stat. Ensemble,Toto-1.0,0.077,0.0,0.192,-0.347,-0.509,-0.217
144
- Stat. Ensemble,Sundial-Base,0.192,0.077,0.346,-0.299,-0.441,-0.169
145
- Stat. Ensemble,AutoARIMA,0.385,0.212,0.577,-0.025,-0.061,0.008
146
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
147
- Stat. Ensemble,Seasonal Naive,0.731,0.577,0.865,0.071,0.03,0.11
148
- Stat. Ensemble,AutoETS,0.769,0.577,0.923,0.287,0.178,0.394
149
- Stat. Ensemble,AutoTheta,0.885,0.731,1.0,0.161,0.074,0.253
150
- Stat. Ensemble,Naive,0.923,0.808,1.0,0.422,0.329,0.507
151
- Stat. Ensemble,Drift,0.962,0.885,1.0,0.445,0.359,0.526
152
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-0.776,-0.987,-0.581
153
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.526,-0.679,-0.401
154
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-0.493,-0.65,-0.368
155
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.615,-0.841,-0.425
156
- Seasonal Naive,Chronos-Bolt,0.0,0.0,0.0,-0.49,-0.633,-0.376
157
- Seasonal Naive,Moirai-2.0,0.077,0.0,0.192,-0.46,-0.627,-0.331
158
- Seasonal Naive,Toto-1.0,0.038,0.0,0.115,-0.451,-0.605,-0.329
159
- Seasonal Naive,Sundial-Base,0.038,0.0,0.115,-0.399,-0.56,-0.264
160
- Seasonal Naive,AutoARIMA,0.115,0.038,0.231,-0.104,-0.138,-0.073
161
- Seasonal Naive,Stat. Ensemble,0.269,0.135,0.423,-0.077,-0.124,-0.031
162
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
163
- Seasonal Naive,AutoETS,0.615,0.423,0.77,0.232,0.1,0.359
164
- Seasonal Naive,AutoTheta,0.615,0.423,0.808,0.096,-0.022,0.215
165
- Seasonal Naive,Naive,0.865,0.75,0.962,0.377,0.284,0.463
166
- Seasonal Naive,Drift,1.0,1.0,1.0,0.402,0.311,0.485
167
- AutoETS,Chronos-2,0.0,0.0,0.0,-1.313,-1.857,-0.881
168
- AutoETS,TiRex,0.038,0.0,0.115,-0.986,-1.387,-0.666
169
- AutoETS,TimesFM-2.5,0.038,0.0,0.115,-0.945,-1.345,-0.631
170
- AutoETS,TabPFN-TS,0.077,0.0,0.192,-1.103,-1.606,-0.713
171
- AutoETS,Chronos-Bolt,0.038,0.0,0.115,-0.94,-1.357,-0.624
172
- AutoETS,Moirai-2.0,0.077,0.0,0.192,-0.902,-1.33,-0.578
173
- AutoETS,Toto-1.0,0.077,0.0,0.192,-0.889,-1.292,-0.576
174
- AutoETS,Sundial-Base,0.154,0.038,0.308,-0.822,-1.149,-0.507
175
- AutoETS,AutoARIMA,0.269,0.115,0.462,-0.437,-0.684,-0.241
176
- AutoETS,Stat. Ensemble,0.231,0.077,0.423,-0.402,-0.651,-0.217
177
- AutoETS,Seasonal Naive,0.385,0.23,0.577,-0.302,-0.559,-0.111
178
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
179
- AutoETS,AutoTheta,0.5,0.308,0.692,-0.177,-0.448,0.013
180
- AutoETS,Naive,0.692,0.5,0.846,0.189,0.022,0.343
181
- AutoETS,Drift,0.885,0.731,1.0,0.222,0.059,0.368
182
- AutoTheta,Chronos-2,0.0,0.0,0.0,-0.965,-1.35,-0.685
183
- AutoTheta,TiRex,0.0,0.0,0.0,-0.688,-0.999,-0.474
184
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-0.652,-0.988,-0.437
185
- AutoTheta,TabPFN-TS,0.0,0.0,0.0,-0.787,-1.168,-0.519
186
- AutoTheta,Chronos-Bolt,0.0,0.0,0.0,-0.648,-0.97,-0.437
187
- AutoTheta,Moirai-2.0,0.077,0.0,0.192,-0.616,-0.923,-0.402
188
- AutoTheta,Toto-1.0,0.077,0.0,0.192,-0.605,-0.913,-0.384
189
- AutoTheta,Sundial-Base,0.077,0.0,0.192,-0.548,-0.811,-0.327
190
- AutoTheta,AutoARIMA,0.115,0.0,0.269,-0.221,-0.397,-0.085
191
- AutoTheta,Stat. Ensemble,0.115,0.0,0.269,-0.191,-0.339,-0.08
192
- AutoTheta,Seasonal Naive,0.385,0.192,0.577,-0.106,-0.273,0.022
193
- AutoTheta,AutoETS,0.5,0.308,0.692,0.15,-0.013,0.309
194
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
195
- AutoTheta,Naive,0.654,0.462,0.846,0.311,0.19,0.426
196
- AutoTheta,Drift,0.808,0.654,0.923,0.339,0.219,0.449
197
- Naive,Chronos-2,0.0,0.0,0.0,-1.852,-2.524,-1.341
198
- Naive,TiRex,0.0,0.0,0.0,-1.45,-1.993,-1.054
199
- Naive,TimesFM-2.5,0.0,0.0,0.0,-1.398,-1.932,-1.0
200
- Naive,TabPFN-TS,0.0,0.0,0.0,-1.593,-2.233,-1.123
201
- Naive,Chronos-Bolt,0.0,0.0,0.0,-1.392,-1.887,-1.001
202
- Naive,Moirai-2.0,0.038,0.0,0.115,-1.345,-1.866,-0.964
203
- Naive,Toto-1.0,0.0,0.0,0.0,-1.33,-1.815,-0.961
204
- Naive,Sundial-Base,0.0,0.0,0.0,-1.247,-1.782,-0.853
205
- Naive,AutoARIMA,0.077,0.0,0.192,-0.772,-1.079,-0.535
206
- Naive,Stat. Ensemble,0.077,0.0,0.192,-0.729,-1.027,-0.491
207
- Naive,Seasonal Naive,0.135,0.038,0.25,-0.606,-0.862,-0.397
208
- Naive,AutoETS,0.308,0.154,0.5,-0.233,-0.521,-0.023
209
- Naive,AutoTheta,0.346,0.154,0.538,-0.451,-0.743,-0.234
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Drift,0.962,0.885,1.0,0.04,0.025,0.057
212
- Drift,Chronos-2,0.0,0.0,0.0,-1.971,-2.673,-1.447
213
- Drift,TiRex,0.0,0.0,0.0,-1.552,-2.088,-1.148
214
- Drift,TimesFM-2.5,0.0,0.0,0.0,-1.499,-2.051,-1.089
215
- Drift,TabPFN-TS,0.0,0.0,0.0,-1.702,-2.354,-1.208
216
- Drift,Chronos-Bolt,0.0,0.0,0.0,-1.493,-2.007,-1.085
217
- Drift,Moirai-2.0,0.0,0.0,0.0,-1.443,-1.963,-1.045
218
- Drift,Toto-1.0,0.0,0.0,0.0,-1.427,-1.926,-1.041
219
- Drift,Sundial-Base,0.0,0.0,0.0,-1.341,-1.884,-0.929
220
- Drift,AutoARIMA,0.038,0.0,0.115,-0.846,-1.169,-0.599
221
- Drift,Stat. Ensemble,0.038,0.0,0.115,-0.802,-1.111,-0.559
222
- Drift,Seasonal Naive,0.0,0.0,0.0,-0.673,-0.942,-0.452
223
- Drift,AutoETS,0.115,0.0,0.269,-0.285,-0.583,-0.063
224
- Drift,AutoTheta,0.192,0.077,0.346,-0.512,-0.816,-0.28
225
- Drift,Naive,0.038,0.0,0.115,-0.042,-0.06,-0.026
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/pairwise_WAPE.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,TimesFM-2.5,0.731,0.538,0.885,0.149,0.075,0.214
4
- Chronos-2,TiRex,0.846,0.692,0.962,0.141,0.074,0.204
5
- Chronos-2,Chronos-Bolt,0.923,0.808,1.0,0.149,0.081,0.219
6
- Chronos-2,TabPFN-TS,0.846,0.692,0.962,0.104,0.034,0.174
7
- Chronos-2,Moirai-2.0,0.885,0.731,1.0,0.156,0.089,0.221
8
- Chronos-2,Sundial-Base,0.846,0.692,0.962,0.13,0.071,0.194
9
- Chronos-2,Toto-1.0,0.846,0.692,0.962,0.177,0.109,0.245
10
- Chronos-2,Stat. Ensemble,0.962,0.885,1.0,0.331,0.242,0.409
11
- Chronos-2,AutoARIMA,0.962,0.885,1.0,0.358,0.267,0.438
12
- Chronos-2,AutoTheta,0.962,0.885,1.0,0.348,0.276,0.418
13
- Chronos-2,Naive,0.885,0.731,1.0,0.518,0.403,0.613
14
- Chronos-2,Seasonal Naive,0.923,0.808,1.0,0.376,0.282,0.449
15
- Chronos-2,AutoETS,0.885,0.731,1.0,0.497,0.365,0.605
16
- Chronos-2,Drift,0.923,0.808,1.0,0.544,0.439,0.636
17
- TimesFM-2.5,Chronos-2,0.269,0.115,0.462,-0.175,-0.273,-0.081
18
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
19
- TimesFM-2.5,TiRex,0.442,0.25,0.635,-0.009,-0.043,0.023
20
- TimesFM-2.5,Chronos-Bolt,0.5,0.327,0.674,0.001,-0.039,0.042
21
- TimesFM-2.5,TabPFN-TS,0.5,0.308,0.692,-0.053,-0.161,0.048
22
- TimesFM-2.5,Moirai-2.0,0.577,0.404,0.75,0.009,-0.046,0.062
23
- TimesFM-2.5,Sundial-Base,0.596,0.423,0.788,-0.022,-0.12,0.067
24
- TimesFM-2.5,Toto-1.0,0.654,0.481,0.808,0.033,-0.012,0.081
25
- TimesFM-2.5,Stat. Ensemble,0.808,0.615,0.962,0.214,0.126,0.294
26
- TimesFM-2.5,AutoARIMA,0.923,0.808,1.0,0.246,0.159,0.324
27
- TimesFM-2.5,AutoTheta,0.962,0.885,1.0,0.234,0.168,0.307
28
- TimesFM-2.5,Naive,0.885,0.731,1.0,0.434,0.317,0.538
29
- TimesFM-2.5,Seasonal Naive,0.923,0.808,1.0,0.267,0.184,0.338
30
- TimesFM-2.5,AutoETS,0.846,0.692,0.962,0.409,0.27,0.532
31
- TimesFM-2.5,Drift,0.885,0.731,1.0,0.464,0.352,0.562
32
- TiRex,Chronos-2,0.154,0.038,0.308,-0.164,-0.256,-0.08
33
- TiRex,TimesFM-2.5,0.558,0.365,0.75,0.009,-0.023,0.041
34
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
35
- TiRex,Chronos-Bolt,0.481,0.308,0.673,0.01,-0.028,0.06
36
- TiRex,TabPFN-TS,0.538,0.346,0.731,-0.043,-0.165,0.065
37
- TiRex,Moirai-2.0,0.442,0.269,0.615,0.018,-0.013,0.051
38
- TiRex,Sundial-Base,0.635,0.462,0.808,-0.013,-0.12,0.076
39
- TiRex,Toto-1.0,0.712,0.538,0.885,0.042,0.009,0.077
40
- TiRex,Stat. Ensemble,0.808,0.654,0.924,0.221,0.134,0.303
41
- TiRex,AutoARIMA,0.885,0.731,1.0,0.253,0.169,0.337
42
- TiRex,AutoTheta,1.0,1.0,1.0,0.241,0.179,0.313
43
- TiRex,Naive,0.885,0.731,1.0,0.439,0.321,0.545
44
- TiRex,Seasonal Naive,0.885,0.731,1.0,0.273,0.184,0.352
45
- TiRex,AutoETS,0.846,0.692,0.962,0.414,0.268,0.54
46
- TiRex,Drift,0.885,0.731,1.0,0.469,0.354,0.568
47
- Chronos-Bolt,Chronos-2,0.077,0.0,0.192,-0.175,-0.28,-0.089
48
- Chronos-Bolt,TimesFM-2.5,0.5,0.326,0.673,-0.001,-0.044,0.038
49
- Chronos-Bolt,TiRex,0.519,0.327,0.692,-0.01,-0.064,0.027
50
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
51
- Chronos-Bolt,TabPFN-TS,0.462,0.269,0.654,-0.053,-0.157,0.036
52
- Chronos-Bolt,Moirai-2.0,0.577,0.404,0.731,0.008,-0.066,0.068
53
- Chronos-Bolt,Sundial-Base,0.519,0.327,0.692,-0.023,-0.128,0.067
54
- Chronos-Bolt,Toto-1.0,0.615,0.442,0.788,0.032,-0.022,0.081
55
- Chronos-Bolt,Stat. Ensemble,0.731,0.538,0.885,0.214,0.122,0.3
56
- Chronos-Bolt,AutoARIMA,0.846,0.692,0.962,0.245,0.16,0.331
57
- Chronos-Bolt,AutoTheta,0.962,0.885,1.0,0.234,0.166,0.312
58
- Chronos-Bolt,Naive,0.846,0.692,0.962,0.434,0.311,0.538
59
- Chronos-Bolt,Seasonal Naive,0.923,0.808,1.0,0.266,0.183,0.339
60
- Chronos-Bolt,AutoETS,0.808,0.654,0.924,0.408,0.263,0.533
61
- Chronos-Bolt,Drift,0.846,0.692,0.962,0.464,0.347,0.563
62
- TabPFN-TS,Chronos-2,0.154,0.038,0.308,-0.116,-0.211,-0.035
63
- TabPFN-TS,TimesFM-2.5,0.5,0.308,0.692,0.05,-0.05,0.139
64
- TabPFN-TS,TiRex,0.462,0.269,0.654,0.041,-0.07,0.142
65
- TabPFN-TS,Chronos-Bolt,0.538,0.346,0.731,0.051,-0.037,0.135
66
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
67
- TabPFN-TS,Moirai-2.0,0.538,0.346,0.731,0.059,-0.054,0.165
68
- TabPFN-TS,Sundial-Base,0.577,0.385,0.769,0.029,-0.091,0.135
69
- TabPFN-TS,Toto-1.0,0.538,0.346,0.731,0.081,-0.023,0.177
70
- TabPFN-TS,Stat. Ensemble,0.846,0.692,0.962,0.254,0.151,0.343
71
- TabPFN-TS,AutoARIMA,0.808,0.654,0.924,0.284,0.181,0.375
72
- TabPFN-TS,AutoTheta,0.846,0.692,0.962,0.273,0.178,0.359
73
- TabPFN-TS,Naive,0.808,0.654,0.923,0.462,0.33,0.57
74
- TabPFN-TS,Seasonal Naive,0.885,0.731,1.0,0.303,0.205,0.384
75
- TabPFN-TS,AutoETS,0.808,0.654,0.962,0.438,0.285,0.563
76
- TabPFN-TS,Drift,0.846,0.692,0.962,0.491,0.367,0.59
77
- Moirai-2.0,Chronos-2,0.115,0.0,0.269,-0.185,-0.284,-0.098
78
- Moirai-2.0,TimesFM-2.5,0.423,0.25,0.596,-0.009,-0.067,0.044
79
- Moirai-2.0,TiRex,0.558,0.385,0.731,-0.018,-0.054,0.013
80
- Moirai-2.0,Chronos-Bolt,0.423,0.269,0.596,-0.008,-0.073,0.062
81
- Moirai-2.0,TabPFN-TS,0.462,0.269,0.654,-0.062,-0.197,0.052
82
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
83
- Moirai-2.0,Sundial-Base,0.596,0.423,0.769,-0.032,-0.143,0.065
84
- Moirai-2.0,Toto-1.0,0.577,0.404,0.75,0.024,-0.008,0.058
85
- Moirai-2.0,Stat. Ensemble,0.692,0.5,0.846,0.207,0.115,0.288
86
- Moirai-2.0,AutoARIMA,0.808,0.654,0.923,0.239,0.149,0.328
87
- Moirai-2.0,AutoTheta,0.962,0.885,1.0,0.228,0.162,0.3
88
- Moirai-2.0,Naive,0.846,0.692,0.962,0.429,0.314,0.532
89
- Moirai-2.0,Seasonal Naive,0.808,0.654,0.924,0.26,0.164,0.341
90
- Moirai-2.0,AutoETS,0.808,0.654,0.923,0.404,0.253,0.532
91
- Moirai-2.0,Drift,0.885,0.731,1.0,0.46,0.35,0.554
92
- Sundial-Base,Chronos-2,0.154,0.038,0.308,-0.149,-0.241,-0.077
93
- Sundial-Base,TimesFM-2.5,0.404,0.212,0.577,0.022,-0.072,0.107
94
- Sundial-Base,TiRex,0.365,0.192,0.538,0.013,-0.083,0.107
95
- Sundial-Base,Chronos-Bolt,0.481,0.308,0.673,0.023,-0.072,0.113
96
- Sundial-Base,TabPFN-TS,0.423,0.231,0.615,-0.03,-0.156,0.083
97
- Sundial-Base,Moirai-2.0,0.404,0.231,0.577,0.031,-0.069,0.125
98
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
99
- Sundial-Base,Toto-1.0,0.519,0.327,0.712,0.054,-0.039,0.141
100
- Sundial-Base,Stat. Ensemble,0.769,0.577,0.923,0.231,0.132,0.318
101
- Sundial-Base,AutoARIMA,0.808,0.654,0.924,0.262,0.168,0.346
102
- Sundial-Base,AutoTheta,0.846,0.692,0.962,0.251,0.169,0.332
103
- Sundial-Base,Naive,0.846,0.692,0.962,0.446,0.316,0.562
104
- Sundial-Base,Seasonal Naive,0.846,0.692,0.962,0.283,0.183,0.364
105
- Sundial-Base,AutoETS,0.808,0.654,0.924,0.422,0.278,0.534
106
- Sundial-Base,Drift,0.846,0.692,0.962,0.476,0.35,0.586
107
- Toto-1.0,Chronos-2,0.154,0.038,0.308,-0.214,-0.324,-0.122
108
- Toto-1.0,TimesFM-2.5,0.346,0.192,0.519,-0.034,-0.089,0.012
109
- Toto-1.0,TiRex,0.288,0.115,0.462,-0.043,-0.084,-0.009
110
- Toto-1.0,Chronos-Bolt,0.385,0.212,0.558,-0.033,-0.088,0.021
111
- Toto-1.0,TabPFN-TS,0.462,0.269,0.654,-0.088,-0.216,0.022
112
- Toto-1.0,Moirai-2.0,0.423,0.25,0.596,-0.025,-0.061,0.008
113
- Toto-1.0,Sundial-Base,0.481,0.288,0.673,-0.057,-0.164,0.038
114
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
115
- Toto-1.0,Stat. Ensemble,0.692,0.5,0.846,0.188,0.094,0.273
116
- Toto-1.0,AutoARIMA,0.846,0.692,0.962,0.22,0.129,0.309
117
- Toto-1.0,AutoTheta,0.846,0.692,0.962,0.208,0.137,0.284
118
- Toto-1.0,Naive,0.808,0.654,0.924,0.415,0.296,0.517
119
- Toto-1.0,Seasonal Naive,0.846,0.654,0.962,0.242,0.151,0.321
120
- Toto-1.0,AutoETS,0.769,0.614,0.923,0.389,0.242,0.513
121
- Toto-1.0,Drift,0.885,0.769,1.0,0.446,0.337,0.542
122
- Stat. Ensemble,Chronos-2,0.038,0.0,0.115,-0.495,-0.692,-0.319
123
- Stat. Ensemble,TimesFM-2.5,0.192,0.038,0.385,-0.273,-0.416,-0.144
124
- Stat. Ensemble,TiRex,0.192,0.076,0.346,-0.284,-0.434,-0.154
125
- Stat. Ensemble,Chronos-Bolt,0.269,0.115,0.462,-0.272,-0.429,-0.139
126
- Stat. Ensemble,TabPFN-TS,0.154,0.038,0.308,-0.34,-0.522,-0.178
127
- Stat. Ensemble,Moirai-2.0,0.308,0.154,0.5,-0.261,-0.405,-0.13
128
- Stat. Ensemble,Sundial-Base,0.231,0.077,0.423,-0.301,-0.467,-0.152
129
- Stat. Ensemble,Toto-1.0,0.308,0.154,0.5,-0.231,-0.376,-0.104
130
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
131
- Stat. Ensemble,AutoARIMA,0.654,0.481,0.827,0.04,0.008,0.076
132
- Stat. Ensemble,AutoTheta,0.731,0.538,0.885,0.026,-0.029,0.075
133
- Stat. Ensemble,Naive,0.808,0.654,0.962,0.28,0.162,0.399
134
- Stat. Ensemble,Seasonal Naive,0.692,0.538,0.846,0.067,0.029,0.103
135
- Stat. Ensemble,AutoETS,0.808,0.654,0.923,0.248,0.12,0.381
136
- Stat. Ensemble,Drift,0.808,0.654,0.962,0.318,0.206,0.435
137
- AutoARIMA,Chronos-2,0.038,0.0,0.115,-0.558,-0.781,-0.363
138
- AutoARIMA,TimesFM-2.5,0.077,0.0,0.192,-0.326,-0.48,-0.19
139
- AutoARIMA,TiRex,0.115,0.0,0.269,-0.338,-0.509,-0.203
140
- AutoARIMA,Chronos-Bolt,0.154,0.038,0.308,-0.325,-0.496,-0.19
141
- AutoARIMA,TabPFN-TS,0.192,0.076,0.346,-0.396,-0.601,-0.222
142
- AutoARIMA,Moirai-2.0,0.192,0.077,0.346,-0.314,-0.489,-0.175
143
- AutoARIMA,Sundial-Base,0.192,0.076,0.346,-0.356,-0.529,-0.202
144
- AutoARIMA,Toto-1.0,0.154,0.038,0.308,-0.283,-0.447,-0.148
145
- AutoARIMA,Stat. Ensemble,0.346,0.173,0.519,-0.042,-0.082,-0.008
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,AutoTheta,0.462,0.269,0.654,-0.015,-0.079,0.04
148
- AutoARIMA,Naive,0.731,0.577,0.885,0.249,0.12,0.379
149
- AutoARIMA,Seasonal Naive,0.692,0.519,0.846,0.027,-0.012,0.062
150
- AutoARIMA,AutoETS,0.615,0.423,0.808,0.216,0.079,0.35
151
- AutoARIMA,Drift,0.731,0.577,0.885,0.29,0.167,0.413
152
- AutoTheta,Chronos-2,0.038,0.0,0.115,-0.534,-0.718,-0.381
153
- AutoTheta,TimesFM-2.5,0.038,0.0,0.115,-0.306,-0.443,-0.202
154
- AutoTheta,TiRex,0.0,0.0,0.0,-0.318,-0.455,-0.218
155
- AutoTheta,Chronos-Bolt,0.038,0.0,0.115,-0.305,-0.452,-0.2
156
- AutoTheta,TabPFN-TS,0.154,0.038,0.308,-0.375,-0.561,-0.216
157
- AutoTheta,Moirai-2.0,0.038,0.0,0.115,-0.295,-0.428,-0.193
158
- AutoTheta,Sundial-Base,0.154,0.038,0.308,-0.335,-0.496,-0.204
159
- AutoTheta,Toto-1.0,0.154,0.038,0.308,-0.263,-0.397,-0.158
160
- AutoTheta,Stat. Ensemble,0.269,0.115,0.462,-0.026,-0.081,0.029
161
- AutoTheta,AutoARIMA,0.538,0.346,0.731,0.015,-0.042,0.073
162
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoTheta,Naive,0.654,0.462,0.846,0.261,0.138,0.381
164
- AutoTheta,Seasonal Naive,0.615,0.423,0.808,0.042,-0.02,0.097
165
- AutoTheta,AutoETS,0.654,0.462,0.808,0.228,0.087,0.37
166
- AutoTheta,Drift,0.808,0.654,0.962,0.3,0.184,0.415
167
- Naive,Chronos-2,0.115,0.0,0.269,-1.075,-1.586,-0.674
168
- Naive,TimesFM-2.5,0.115,0.0,0.269,-0.767,-1.166,-0.465
169
- Naive,TiRex,0.115,0.0,0.269,-0.783,-1.198,-0.474
170
- Naive,Chronos-Bolt,0.154,0.038,0.308,-0.766,-1.162,-0.452
171
- Naive,TabPFN-TS,0.192,0.077,0.346,-0.86,-1.326,-0.492
172
- Naive,Moirai-2.0,0.154,0.038,0.308,-0.751,-1.137,-0.458
173
- Naive,Sundial-Base,0.154,0.038,0.308,-0.807,-1.283,-0.462
174
- Naive,Toto-1.0,0.192,0.076,0.346,-0.709,-1.072,-0.42
175
- Naive,Stat. Ensemble,0.192,0.038,0.346,-0.388,-0.664,-0.193
176
- Naive,AutoARIMA,0.269,0.115,0.423,-0.332,-0.611,-0.137
177
- Naive,AutoTheta,0.346,0.154,0.538,-0.353,-0.616,-0.16
178
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
179
- Naive,Seasonal Naive,0.404,0.231,0.558,-0.296,-0.551,-0.107
180
- Naive,AutoETS,0.577,0.385,0.769,-0.045,-0.271,0.122
181
- Naive,Drift,0.923,0.808,1.0,0.053,0.032,0.078
182
- Seasonal Naive,Chronos-2,0.077,0.0,0.192,-0.601,-0.814,-0.393
183
- Seasonal Naive,TimesFM-2.5,0.077,0.0,0.192,-0.363,-0.511,-0.225
184
- Seasonal Naive,TiRex,0.115,0.0,0.269,-0.376,-0.544,-0.225
185
- Seasonal Naive,Chronos-Bolt,0.077,0.0,0.192,-0.363,-0.513,-0.225
186
- Seasonal Naive,TabPFN-TS,0.115,0.0,0.269,-0.435,-0.622,-0.257
187
- Seasonal Naive,Moirai-2.0,0.192,0.076,0.346,-0.351,-0.517,-0.196
188
- Seasonal Naive,Sundial-Base,0.154,0.038,0.308,-0.394,-0.572,-0.225
189
- Seasonal Naive,Toto-1.0,0.154,0.038,0.346,-0.319,-0.473,-0.178
190
- Seasonal Naive,Stat. Ensemble,0.308,0.154,0.462,-0.071,-0.115,-0.03
191
- Seasonal Naive,AutoARIMA,0.308,0.154,0.481,-0.028,-0.066,0.012
192
- Seasonal Naive,AutoTheta,0.385,0.192,0.577,-0.044,-0.107,0.019
193
- Seasonal Naive,Naive,0.596,0.442,0.769,0.228,0.097,0.355
194
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
195
- Seasonal Naive,AutoETS,0.615,0.423,0.808,0.194,0.048,0.336
196
- Seasonal Naive,Drift,0.692,0.538,0.885,0.27,0.146,0.393
197
- AutoETS,Chronos-2,0.115,0.0,0.269,-0.987,-1.529,-0.575
198
- AutoETS,TimesFM-2.5,0.154,0.038,0.308,-0.692,-1.136,-0.369
199
- AutoETS,TiRex,0.154,0.038,0.308,-0.707,-1.173,-0.365
200
- AutoETS,Chronos-Bolt,0.192,0.076,0.346,-0.69,-1.143,-0.356
201
- AutoETS,TabPFN-TS,0.192,0.038,0.346,-0.781,-1.288,-0.399
202
- AutoETS,Moirai-2.0,0.192,0.077,0.346,-0.677,-1.137,-0.34
203
- AutoETS,Sundial-Base,0.192,0.076,0.346,-0.729,-1.145,-0.385
204
- AutoETS,Toto-1.0,0.231,0.077,0.386,-0.636,-1.055,-0.318
205
- AutoETS,Stat. Ensemble,0.192,0.077,0.346,-0.329,-0.614,-0.136
206
- AutoETS,AutoARIMA,0.385,0.192,0.577,-0.275,-0.538,-0.086
207
- AutoETS,AutoTheta,0.346,0.192,0.538,-0.295,-0.587,-0.095
208
- AutoETS,Naive,0.423,0.231,0.615,0.043,-0.139,0.213
209
- AutoETS,Seasonal Naive,0.385,0.192,0.577,-0.241,-0.505,-0.051
210
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
211
- AutoETS,Drift,0.692,0.5,0.846,0.094,-0.078,0.255
212
- Drift,Chronos-2,0.077,0.0,0.192,-1.193,-1.748,-0.782
213
- Drift,TimesFM-2.5,0.115,0.0,0.269,-0.867,-1.283,-0.544
214
- Drift,TiRex,0.115,0.0,0.269,-0.884,-1.313,-0.548
215
- Drift,Chronos-Bolt,0.154,0.038,0.308,-0.866,-1.29,-0.531
216
- Drift,TabPFN-TS,0.154,0.038,0.308,-0.965,-1.439,-0.579
217
- Drift,Moirai-2.0,0.115,0.0,0.269,-0.85,-1.241,-0.539
218
- Drift,Sundial-Base,0.154,0.038,0.308,-0.909,-1.417,-0.539
219
- Drift,Toto-1.0,0.115,0.0,0.231,-0.806,-1.183,-0.509
220
- Drift,Stat. Ensemble,0.192,0.038,0.346,-0.467,-0.771,-0.259
221
- Drift,AutoARIMA,0.269,0.115,0.423,-0.408,-0.704,-0.2
222
- Drift,AutoTheta,0.192,0.038,0.346,-0.429,-0.71,-0.225
223
- Drift,Naive,0.077,0.0,0.192,-0.056,-0.085,-0.033
224
- Drift,Seasonal Naive,0.308,0.115,0.462,-0.369,-0.648,-0.171
225
- Drift,AutoETS,0.308,0.154,0.5,-0.104,-0.342,0.073
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_energy/pairwise_WQL.csv DELETED
@@ -1,226 +0,0 @@
1
- model_1,model_2,win_rate,win_rate_lower,win_rate_upper,skill_score,skill_score_lower,skill_score_upper
2
- Chronos-2,Chronos-2,0.5,0.5,0.5,0.0,0.0,0.0
3
- Chronos-2,TiRex,0.808,0.654,0.962,0.143,0.076,0.204
4
- Chronos-2,TabPFN-TS,0.808,0.654,0.962,0.103,0.043,0.165
5
- Chronos-2,TimesFM-2.5,0.808,0.654,0.962,0.159,0.086,0.224
6
- Chronos-2,Chronos-Bolt,0.962,0.885,1.0,0.165,0.099,0.23
7
- Chronos-2,Moirai-2.0,0.923,0.808,1.0,0.175,0.108,0.243
8
- Chronos-2,Toto-1.0,0.885,0.769,1.0,0.186,0.119,0.253
9
- Chronos-2,Sundial-Base,0.962,0.885,1.0,0.212,0.16,0.273
10
- Chronos-2,Stat. Ensemble,1.0,1.0,1.0,0.401,0.322,0.47
11
- Chronos-2,AutoARIMA,1.0,1.0,1.0,0.402,0.319,0.474
12
- Chronos-2,AutoETS,0.962,0.885,1.0,0.565,0.464,0.652
13
- Chronos-2,Seasonal Naive,1.0,1.0,1.0,0.46,0.384,0.522
14
- Chronos-2,AutoTheta,1.0,1.0,1.0,0.485,0.417,0.553
15
- Chronos-2,Naive,1.0,1.0,1.0,0.642,0.566,0.711
16
- Chronos-2,Drift,1.0,1.0,1.0,0.657,0.583,0.723
17
- TiRex,Chronos-2,0.192,0.038,0.346,-0.166,-0.256,-0.083
18
- TiRex,TiRex,0.5,0.5,0.5,0.0,0.0,0.0
19
- TiRex,TabPFN-TS,0.462,0.269,0.654,-0.047,-0.16,0.056
20
- TiRex,TimesFM-2.5,0.673,0.5,0.846,0.02,-0.015,0.053
21
- TiRex,Chronos-Bolt,0.712,0.538,0.865,0.026,-0.018,0.077
22
- TiRex,Moirai-2.0,0.75,0.577,0.904,0.038,0.007,0.071
23
- TiRex,Toto-1.0,0.827,0.673,0.962,0.051,0.023,0.082
24
- TiRex,Sundial-Base,0.788,0.635,0.923,0.082,-0.005,0.162
25
- TiRex,Stat. Ensemble,0.923,0.808,1.0,0.302,0.222,0.379
26
- TiRex,AutoARIMA,0.962,0.885,1.0,0.302,0.222,0.38
27
- TiRex,AutoETS,0.962,0.885,1.0,0.493,0.379,0.594
28
- TiRex,Seasonal Naive,1.0,1.0,1.0,0.37,0.299,0.439
29
- TiRex,AutoTheta,1.0,1.0,1.0,0.4,0.33,0.472
30
- TiRex,Naive,1.0,1.0,1.0,0.583,0.496,0.662
31
- TiRex,Drift,1.0,1.0,1.0,0.6,0.519,0.674
32
- TabPFN-TS,Chronos-2,0.192,0.038,0.346,-0.114,-0.198,-0.045
33
- TabPFN-TS,TiRex,0.538,0.346,0.731,0.045,-0.06,0.138
34
- TabPFN-TS,TabPFN-TS,0.5,0.5,0.5,0.0,0.0,0.0
35
- TabPFN-TS,TimesFM-2.5,0.5,0.308,0.692,0.063,-0.03,0.15
36
- TabPFN-TS,Chronos-Bolt,0.577,0.385,0.769,0.069,-0.012,0.148
37
- TabPFN-TS,Moirai-2.0,0.615,0.423,0.808,0.081,-0.022,0.18
38
- TabPFN-TS,Toto-1.0,0.577,0.385,0.769,0.093,-0.007,0.189
39
- TabPFN-TS,Sundial-Base,0.769,0.615,0.923,0.123,0.016,0.219
40
- TabPFN-TS,Stat. Ensemble,0.962,0.885,1.0,0.333,0.245,0.415
41
- TabPFN-TS,AutoARIMA,0.885,0.731,1.0,0.333,0.24,0.418
42
- TabPFN-TS,AutoETS,0.962,0.885,1.0,0.516,0.398,0.612
43
- TabPFN-TS,Seasonal Naive,1.0,1.0,1.0,0.398,0.321,0.473
44
- TabPFN-TS,AutoTheta,1.0,1.0,1.0,0.426,0.336,0.513
45
- TabPFN-TS,Naive,1.0,1.0,1.0,0.601,0.507,0.685
46
- TabPFN-TS,Drift,1.0,1.0,1.0,0.618,0.526,0.695
47
- TimesFM-2.5,Chronos-2,0.192,0.038,0.346,-0.19,-0.289,-0.094
48
- TimesFM-2.5,TiRex,0.327,0.154,0.5,-0.02,-0.056,0.015
49
- TimesFM-2.5,TabPFN-TS,0.5,0.308,0.692,-0.068,-0.177,0.029
50
- TimesFM-2.5,TimesFM-2.5,0.5,0.5,0.5,0.0,0.0,0.0
51
- TimesFM-2.5,Chronos-Bolt,0.577,0.404,0.75,0.006,-0.031,0.046
52
- TimesFM-2.5,Moirai-2.0,0.615,0.442,0.788,0.018,-0.038,0.072
53
- TimesFM-2.5,Toto-1.0,0.654,0.481,0.808,0.032,-0.016,0.081
54
- TimesFM-2.5,Sundial-Base,0.75,0.596,0.904,0.063,-0.029,0.147
55
- TimesFM-2.5,Stat. Ensemble,0.962,0.885,1.0,0.288,0.209,0.366
56
- TimesFM-2.5,AutoARIMA,1.0,1.0,1.0,0.288,0.212,0.365
57
- TimesFM-2.5,AutoETS,0.962,0.885,1.0,0.483,0.376,0.581
58
- TimesFM-2.5,Seasonal Naive,1.0,1.0,1.0,0.357,0.288,0.423
59
- TimesFM-2.5,AutoTheta,1.0,1.0,1.0,0.387,0.31,0.47
60
- TimesFM-2.5,Naive,1.0,1.0,1.0,0.574,0.483,0.657
61
- TimesFM-2.5,Drift,1.0,1.0,1.0,0.592,0.507,0.67
62
- Chronos-Bolt,Chronos-2,0.038,0.0,0.115,-0.197,-0.299,-0.11
63
- Chronos-Bolt,TiRex,0.288,0.135,0.462,-0.027,-0.084,0.018
64
- Chronos-Bolt,TabPFN-TS,0.423,0.231,0.615,-0.074,-0.174,0.012
65
- Chronos-Bolt,TimesFM-2.5,0.423,0.25,0.596,-0.006,-0.048,0.03
66
- Chronos-Bolt,Chronos-Bolt,0.5,0.5,0.5,0.0,0.0,0.0
67
- Chronos-Bolt,Moirai-2.0,0.462,0.308,0.635,0.012,-0.062,0.076
68
- Chronos-Bolt,Toto-1.0,0.577,0.404,0.75,0.026,-0.03,0.079
69
- Chronos-Bolt,Sundial-Base,0.75,0.596,0.904,0.057,-0.039,0.142
70
- Chronos-Bolt,Stat. Ensemble,0.923,0.808,1.0,0.283,0.207,0.358
71
- Chronos-Bolt,AutoARIMA,0.923,0.808,1.0,0.284,0.204,0.362
72
- Chronos-Bolt,AutoETS,0.962,0.885,1.0,0.48,0.368,0.58
73
- Chronos-Bolt,Seasonal Naive,1.0,1.0,1.0,0.353,0.287,0.417
74
- Chronos-Bolt,AutoTheta,1.0,1.0,1.0,0.384,0.307,0.466
75
- Chronos-Bolt,Naive,1.0,1.0,1.0,0.572,0.485,0.651
76
- Chronos-Bolt,Drift,1.0,1.0,1.0,0.589,0.506,0.663
77
- Moirai-2.0,Chronos-2,0.077,0.0,0.192,-0.212,-0.32,-0.122
78
- Moirai-2.0,TiRex,0.25,0.096,0.423,-0.039,-0.077,-0.007
79
- Moirai-2.0,TabPFN-TS,0.385,0.192,0.577,-0.088,-0.219,0.022
80
- Moirai-2.0,TimesFM-2.5,0.385,0.212,0.558,-0.019,-0.077,0.037
81
- Moirai-2.0,Chronos-Bolt,0.538,0.365,0.692,-0.012,-0.082,0.058
82
- Moirai-2.0,Moirai-2.0,0.5,0.5,0.5,0.0,0.0,0.0
83
- Moirai-2.0,Toto-1.0,0.538,0.365,0.712,0.014,-0.016,0.046
84
- Moirai-2.0,Sundial-Base,0.788,0.654,0.923,0.045,-0.054,0.134
85
- Moirai-2.0,Stat. Ensemble,0.885,0.768,1.0,0.274,0.181,0.36
86
- Moirai-2.0,AutoARIMA,0.923,0.808,1.0,0.275,0.183,0.363
87
- Moirai-2.0,AutoETS,0.923,0.808,1.0,0.473,0.354,0.58
88
- Moirai-2.0,Seasonal Naive,0.923,0.808,1.0,0.345,0.266,0.421
89
- Moirai-2.0,AutoTheta,0.923,0.808,1.0,0.376,0.293,0.457
90
- Moirai-2.0,Naive,0.962,0.885,1.0,0.566,0.482,0.648
91
- Moirai-2.0,Drift,1.0,1.0,1.0,0.584,0.502,0.661
92
- Toto-1.0,Chronos-2,0.115,0.0,0.231,-0.229,-0.339,-0.136
93
- Toto-1.0,TiRex,0.173,0.038,0.327,-0.054,-0.089,-0.024
94
- Toto-1.0,TabPFN-TS,0.423,0.231,0.615,-0.103,-0.233,0.007
95
- Toto-1.0,TimesFM-2.5,0.346,0.192,0.519,-0.033,-0.088,0.016
96
- Toto-1.0,Chronos-Bolt,0.423,0.25,0.596,-0.026,-0.086,0.029
97
- Toto-1.0,Moirai-2.0,0.462,0.288,0.635,-0.014,-0.048,0.016
98
- Toto-1.0,Toto-1.0,0.5,0.5,0.5,0.0,0.0,0.0
99
- Toto-1.0,Sundial-Base,0.712,0.557,0.865,0.032,-0.064,0.118
100
- Toto-1.0,Stat. Ensemble,0.846,0.692,0.962,0.264,0.176,0.347
101
- Toto-1.0,AutoARIMA,0.885,0.731,1.0,0.265,0.173,0.351
102
- Toto-1.0,AutoETS,0.885,0.731,1.0,0.466,0.343,0.573
103
- Toto-1.0,Seasonal Naive,0.962,0.885,1.0,0.336,0.258,0.412
104
- Toto-1.0,AutoTheta,0.962,0.885,1.0,0.367,0.282,0.45
105
- Toto-1.0,Naive,0.962,0.885,1.0,0.56,0.475,0.64
106
- Toto-1.0,Drift,0.962,0.885,1.0,0.578,0.497,0.654
107
- Sundial-Base,Chronos-2,0.038,0.0,0.115,-0.27,-0.375,-0.19
108
- Sundial-Base,TiRex,0.212,0.077,0.365,-0.089,-0.193,0.005
109
- Sundial-Base,TabPFN-TS,0.231,0.077,0.385,-0.14,-0.281,-0.016
110
- Sundial-Base,TimesFM-2.5,0.25,0.096,0.404,-0.067,-0.172,0.029
111
- Sundial-Base,Chronos-Bolt,0.25,0.096,0.404,-0.061,-0.166,0.037
112
- Sundial-Base,Moirai-2.0,0.212,0.077,0.346,-0.048,-0.154,0.052
113
- Sundial-Base,Toto-1.0,0.288,0.135,0.443,-0.033,-0.133,0.06
114
- Sundial-Base,Sundial-Base,0.5,0.5,0.5,0.0,0.0,0.0
115
- Sundial-Base,Stat. Ensemble,0.769,0.577,0.923,0.24,0.14,0.328
116
- Sundial-Base,AutoARIMA,0.808,0.654,0.962,0.24,0.142,0.324
117
- Sundial-Base,AutoETS,0.846,0.692,0.962,0.448,0.328,0.539
118
- Sundial-Base,Seasonal Naive,0.885,0.731,1.0,0.314,0.227,0.393
119
- Sundial-Base,AutoTheta,0.923,0.808,1.0,0.346,0.251,0.424
120
- Sundial-Base,Naive,0.962,0.885,1.0,0.546,0.446,0.638
121
- Sundial-Base,Drift,1.0,1.0,1.0,0.564,0.467,0.653
122
- Stat. Ensemble,Chronos-2,0.0,0.0,0.0,-0.67,-0.887,-0.475
123
- Stat. Ensemble,TiRex,0.077,0.0,0.192,-0.432,-0.61,-0.286
124
- Stat. Ensemble,TabPFN-TS,0.038,0.0,0.115,-0.499,-0.711,-0.324
125
- Stat. Ensemble,TimesFM-2.5,0.038,0.0,0.115,-0.404,-0.576,-0.265
126
- Stat. Ensemble,Chronos-Bolt,0.077,0.0,0.192,-0.395,-0.558,-0.261
127
- Stat. Ensemble,Moirai-2.0,0.115,0.0,0.232,-0.378,-0.563,-0.221
128
- Stat. Ensemble,Toto-1.0,0.154,0.038,0.308,-0.359,-0.531,-0.214
129
- Stat. Ensemble,Sundial-Base,0.231,0.077,0.423,-0.315,-0.489,-0.162
130
- Stat. Ensemble,Stat. Ensemble,0.5,0.5,0.5,0.0,0.0,0.0
131
- Stat. Ensemble,AutoARIMA,0.5,0.308,0.692,0.001,-0.03,0.03
132
- Stat. Ensemble,AutoETS,0.808,0.654,0.923,0.274,0.156,0.394
133
- Stat. Ensemble,Seasonal Naive,0.808,0.673,0.923,0.098,0.062,0.134
134
- Stat. Ensemble,AutoTheta,0.846,0.692,0.962,0.14,0.054,0.228
135
- Stat. Ensemble,Naive,0.962,0.885,1.0,0.403,0.302,0.5
136
- Stat. Ensemble,Drift,0.962,0.885,1.0,0.427,0.332,0.521
137
- AutoARIMA,Chronos-2,0.0,0.0,0.0,-0.672,-0.901,-0.469
138
- AutoARIMA,TiRex,0.038,0.0,0.115,-0.433,-0.614,-0.285
139
- AutoARIMA,TabPFN-TS,0.115,0.0,0.269,-0.5,-0.719,-0.316
140
- AutoARIMA,TimesFM-2.5,0.0,0.0,0.0,-0.405,-0.574,-0.269
141
- AutoARIMA,Chronos-Bolt,0.077,0.0,0.192,-0.396,-0.568,-0.256
142
- AutoARIMA,Moirai-2.0,0.077,0.0,0.192,-0.379,-0.569,-0.224
143
- AutoARIMA,Toto-1.0,0.115,0.0,0.269,-0.36,-0.54,-0.209
144
- AutoARIMA,Sundial-Base,0.192,0.038,0.346,-0.316,-0.48,-0.166
145
- AutoARIMA,Stat. Ensemble,0.5,0.308,0.692,-0.001,-0.031,0.029
146
- AutoARIMA,AutoARIMA,0.5,0.5,0.5,0.0,0.0,0.0
147
- AutoARIMA,AutoETS,0.731,0.538,0.885,0.273,0.152,0.393
148
- AutoARIMA,Seasonal Naive,0.846,0.712,0.962,0.097,0.065,0.133
149
- AutoARIMA,AutoTheta,0.846,0.692,0.962,0.139,0.048,0.225
150
- AutoARIMA,Naive,0.885,0.769,1.0,0.402,0.299,0.502
151
- AutoARIMA,Drift,0.923,0.808,1.0,0.427,0.331,0.523
152
- AutoETS,Chronos-2,0.038,0.0,0.115,-1.301,-1.876,-0.865
153
- AutoETS,TiRex,0.038,0.0,0.115,-0.973,-1.463,-0.611
154
- AutoETS,TabPFN-TS,0.038,0.0,0.115,-1.065,-1.575,-0.661
155
- AutoETS,TimesFM-2.5,0.038,0.0,0.115,-0.934,-1.389,-0.604
156
- AutoETS,Chronos-Bolt,0.038,0.0,0.115,-0.922,-1.378,-0.581
157
- AutoETS,Moirai-2.0,0.077,0.0,0.192,-0.898,-1.383,-0.547
158
- AutoETS,Toto-1.0,0.115,0.0,0.269,-0.873,-1.342,-0.522
159
- AutoETS,Sundial-Base,0.154,0.038,0.308,-0.812,-1.168,-0.489
160
- AutoETS,Stat. Ensemble,0.192,0.077,0.346,-0.378,-0.651,-0.185
161
- AutoETS,AutoARIMA,0.269,0.115,0.462,-0.376,-0.647,-0.18
162
- AutoETS,AutoETS,0.5,0.5,0.5,0.0,0.0,0.0
163
- AutoETS,Seasonal Naive,0.423,0.231,0.615,-0.243,-0.498,-0.059
164
- AutoETS,AutoTheta,0.423,0.231,0.615,-0.185,-0.496,0.024
165
- AutoETS,Naive,0.692,0.5,0.846,0.177,-0.005,0.343
166
- AutoETS,Drift,0.769,0.614,0.923,0.211,0.031,0.372
167
- Seasonal Naive,Chronos-2,0.0,0.0,0.0,-0.852,-1.093,-0.622
168
- Seasonal Naive,TiRex,0.0,0.0,0.0,-0.588,-0.782,-0.427
169
- Seasonal Naive,TabPFN-TS,0.0,0.0,0.0,-0.662,-0.896,-0.473
170
- Seasonal Naive,TimesFM-2.5,0.0,0.0,0.0,-0.556,-0.734,-0.404
171
- Seasonal Naive,Chronos-Bolt,0.0,0.0,0.0,-0.547,-0.716,-0.403
172
- Seasonal Naive,Moirai-2.0,0.077,0.0,0.192,-0.528,-0.727,-0.362
173
- Seasonal Naive,Toto-1.0,0.038,0.0,0.115,-0.507,-0.7,-0.348
174
- Seasonal Naive,Sundial-Base,0.115,0.0,0.269,-0.458,-0.646,-0.293
175
- Seasonal Naive,Stat. Ensemble,0.192,0.077,0.327,-0.109,-0.155,-0.066
176
- Seasonal Naive,AutoARIMA,0.154,0.038,0.288,-0.108,-0.153,-0.069
177
- Seasonal Naive,AutoETS,0.577,0.385,0.769,0.195,0.055,0.332
178
- Seasonal Naive,Seasonal Naive,0.5,0.5,0.5,0.0,0.0,0.0
179
- Seasonal Naive,AutoTheta,0.5,0.308,0.692,0.047,-0.058,0.151
180
- Seasonal Naive,Naive,0.75,0.596,0.885,0.338,0.225,0.448
181
- Seasonal Naive,Drift,0.846,0.692,0.962,0.365,0.257,0.469
182
- AutoTheta,Chronos-2,0.0,0.0,0.0,-0.942,-1.236,-0.714
183
- AutoTheta,TiRex,0.0,0.0,0.0,-0.665,-0.895,-0.491
184
- AutoTheta,TabPFN-TS,0.0,0.0,0.0,-0.743,-1.051,-0.506
185
- AutoTheta,TimesFM-2.5,0.0,0.0,0.0,-0.633,-0.887,-0.449
186
- AutoTheta,Chronos-Bolt,0.0,0.0,0.0,-0.622,-0.874,-0.442
187
- AutoTheta,Moirai-2.0,0.077,0.0,0.192,-0.602,-0.84,-0.415
188
- AutoTheta,Toto-1.0,0.038,0.0,0.115,-0.581,-0.819,-0.392
189
- AutoTheta,Sundial-Base,0.077,0.0,0.192,-0.53,-0.735,-0.335
190
- AutoTheta,Stat. Ensemble,0.154,0.038,0.308,-0.163,-0.296,-0.057
191
- AutoTheta,AutoARIMA,0.154,0.038,0.308,-0.162,-0.29,-0.05
192
- AutoTheta,AutoETS,0.577,0.385,0.769,0.156,-0.025,0.332
193
- AutoTheta,Seasonal Naive,0.5,0.308,0.692,-0.049,-0.178,0.054
194
- AutoTheta,AutoTheta,0.5,0.5,0.5,0.0,0.0,0.0
195
- AutoTheta,Naive,0.692,0.5,0.846,0.305,0.169,0.434
196
- AutoTheta,Drift,0.769,0.615,0.923,0.334,0.2,0.458
197
- Naive,Chronos-2,0.0,0.0,0.0,-1.796,-2.464,-1.302
198
- Naive,TiRex,0.0,0.0,0.0,-1.397,-1.954,-0.985
199
- Naive,TabPFN-TS,0.0,0.0,0.0,-1.509,-2.175,-1.028
200
- Naive,TimesFM-2.5,0.0,0.0,0.0,-1.35,-1.917,-0.935
201
- Naive,Chronos-Bolt,0.0,0.0,0.0,-1.335,-1.866,-0.941
202
- Naive,Moirai-2.0,0.038,0.0,0.115,-1.306,-1.84,-0.93
203
- Naive,Toto-1.0,0.038,0.0,0.115,-1.275,-1.776,-0.904
204
- Naive,Sundial-Base,0.038,0.0,0.115,-1.201,-1.761,-0.804
205
- Naive,Stat. Ensemble,0.038,0.0,0.115,-0.674,-1.0,-0.432
206
- Naive,AutoARIMA,0.115,0.0,0.231,-0.672,-1.007,-0.426
207
- Naive,AutoETS,0.308,0.154,0.5,-0.215,-0.523,0.005
208
- Naive,Seasonal Naive,0.25,0.115,0.404,-0.51,-0.811,-0.291
209
- Naive,AutoTheta,0.308,0.154,0.5,-0.439,-0.768,-0.203
210
- Naive,Naive,0.5,0.5,0.5,0.0,0.0,0.0
211
- Naive,Drift,0.962,0.885,1.0,0.041,0.026,0.058
212
- Drift,Chronos-2,0.0,0.0,0.0,-1.915,-2.609,-1.396
213
- Drift,TiRex,0.0,0.0,0.0,-1.5,-2.068,-1.08
214
- Drift,TabPFN-TS,0.0,0.0,0.0,-1.616,-2.276,-1.108
215
- Drift,TimesFM-2.5,0.0,0.0,0.0,-1.45,-2.03,-1.028
216
- Drift,Chronos-Bolt,0.0,0.0,0.0,-1.435,-1.97,-1.024
217
- Drift,Moirai-2.0,0.0,0.0,0.0,-1.405,-1.953,-1.008
218
- Drift,Toto-1.0,0.038,0.0,0.115,-1.372,-1.889,-0.986
219
- Drift,Sundial-Base,0.0,0.0,0.0,-1.296,-1.882,-0.876
220
- Drift,Stat. Ensemble,0.038,0.0,0.115,-0.746,-1.089,-0.497
221
- Drift,AutoARIMA,0.077,0.0,0.192,-0.744,-1.097,-0.495
222
- Drift,AutoETS,0.231,0.077,0.386,-0.267,-0.592,-0.032
223
- Drift,Seasonal Naive,0.154,0.038,0.308,-0.574,-0.885,-0.347
224
- Drift,AutoTheta,0.231,0.077,0.385,-0.501,-0.843,-0.25
225
- Drift,Naive,0.038,0.0,0.115,-0.043,-0.062,-0.027
226
- Drift,Drift,0.5,0.5,0.5,0.0,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_health/leaderboard_MASE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,74.99999999999999,31.785976470244737,0.0,0.33713188125000004,0.0,0.0
3
- TimesFM-2.5,74.99999999999999,32.87474900010455,0.0,1.1526229114583333,0.0,0.0
4
- TiRex,70.0,32.475887751126365,0.0,0.3170949572916667,0.0,0.0
5
- TabPFN-TS,68.57142857142857,30.065972963196263,0.0,28.343891684635416,0.0,0.0
6
- Moirai-2.0,65.35714285714286,33.79488514595498,0.0,0.34801128166666667,0.1,0.0
7
- Toto-1.0,60.71428571428571,31.61987589396246,0.0,9.245004833333333,0.0,0.0
8
- Chronos-Bolt,56.785714285714285,30.041136141125392,0.0,0.406559469375,0.0,0.0
9
- Stat. Ensemble,50.71428571428572,29.22957519821191,0.0,193.737679803125,0.0,0.0
10
- AutoETS,49.28571428571429,28.071655389223615,0.0,2.600062978125,0.0,0.0
11
- Sundial-Base,48.57142857142857,22.707876070444478,0.0,8.29280267,0.0,0.0
12
- AutoARIMA,36.42857142857142,5.881738788340107,0.0,7.909155262500001,0.0,0.0
13
- AutoTheta,30.71428571428571,15.209167110886634,0.0,2.196897983333333,0.0,0.0
14
- Seasonal Naive,21.78571428571429,0.0,0.0,1.1288162752343749,0.0,0.0
15
- Naive,21.071428571428573,0.30982946320681215,0.0,1.196338605,0.0,0.0
16
- Drift,20.0,7.491051478030797,0.0,1.1894594236458333,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_health/leaderboard_SQL.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- Chronos-2,81.42857142857143,38.088755291215016,0.0,0.33713188125000004,0.0,0.0
3
- TimesFM-2.5,76.42857142857142,38.03594076932182,0.0,1.1526229114583333,0.0,0.0
4
- TiRex,70.71428571428572,36.908809578202614,0.0,0.3170949572916667,0.0,0.0
5
- Moirai-2.0,69.64285714285714,38.69101656633476,0.0,0.34801128166666667,0.1,0.0
6
- Toto-1.0,66.42857142857143,37.84147796075432,0.0,9.245004833333333,0.0,0.0
7
- TabPFN-TS,65.71428571428571,35.21500006276798,0.0,28.343891684635416,0.0,0.0
8
- Chronos-Bolt,61.07142857142858,36.49963485983284,0.0,0.406559469375,0.0,0.0
9
- AutoETS,52.85714285714287,31.12404362920507,0.0,2.600062978125,0.0,0.0
10
- Stat. Ensemble,49.285714285714285,31.66665055693333,0.0,193.737679803125,0.0,0.0
11
- Sundial-Base,38.57142857142858,23.555776015856367,0.0,8.29280267,0.0,0.0
12
- AutoARIMA,37.857142857142854,8.155432249884543,0.0,7.909155262500001,0.0,0.0
13
- AutoTheta,26.42857142857143,17.733419423817608,0.0,2.196897983333333,0.0,0.0
14
- Seasonal Naive,21.071428571428573,0.0,0.0,1.1288162752343749,0.0,0.0
15
- Naive,17.5,-7.297793633399574,0.0,1.196338605,0.0,0.0
16
- Drift,15.0,0.4041900778196994,0.0,1.1894594236458333,0.0,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tables/domain_health/leaderboard_WAPE.csv DELETED
@@ -1,16 +0,0 @@
1
- model_name,win_rate,skill_score,median_training_time_s_per100,median_inference_time_s_per100,training_corpus_overlap,num_failures
2
- TimesFM-2.5,72.14285714285714,30.23643324970241,0.0,1.1526229114583333,0.0,0.0
3
- TiRex,67.14285714285715,29.759653338440085,0.0,0.3170949572916667,0.0,0.0
4
- Chronos-2,67.14285714285714,29.12048426647892,0.0,0.33713188125000004,0.0,0.0
5
- Moirai-2.0,66.07142857142858,31.59592125809332,0.0,0.34801128166666667,0.1,0.0
6
- Toto-1.0,64.2857142857143,28.503622669499972,0.0,9.245004833333333,0.0,0.0
7
- Stat. Ensemble,57.85714285714286,29.695571630620133,0.0,193.737679803125,0.0,0.0
8
- TabPFN-TS,57.85714285714286,22.16773042840192,0.0,28.343891684635416,0.0,0.0
9
- Chronos-Bolt,57.50000000000001,25.88326333574207,0.0,0.406559469375,0.0,0.0
10
- AutoETS,57.14285714285714,29.452418664318316,0.0,2.600062978125,0.0,0.0
11
- Sundial-Base,47.14285714285714,16.462367071514482,0.0,8.29280267,0.0,0.0
12
- AutoARIMA,35.0,15.909991992310612,0.0,7.909155262500001,0.0,0.0
13
- AutoTheta,30.0,14.265515884511814,0.0,2.196897983333333,0.0,0.0
14
- Naive,24.642857142857146,0.09410339815076885,0.0,1.196338605,0.0,0.0
15
- Seasonal Naive,23.92857142857143,0.0,0.0,1.1288162752343749,0.0,0.0
16
- Drift,22.142857142857146,6.9951032330727525,0.0,1.1894594236458333,0.0,0.0