Taha Aksu commited on
Commit ·
8c443bb
1
Parent(s): da0a655
add open source information
Browse files- app.py +7 -5
- results/Chronos_small/config.json +2 -1
- results/DLinear/config.json +2 -1
- results/FlowState-9.1M/config.json +2 -1
- results/Lag-Llama/config.json +2 -1
- results/Moirai2/config.json +2 -1
- results/Moirai_base/config.json +2 -1
- results/Moirai_large/config.json +2 -1
- results/Moirai_small/config.json +2 -1
- results/N-BEATS/config.json +2 -1
- results/PatchTST/config.json +2 -1
- results/TSOrchestra-test/config.json +3 -2
- results/TSOrchestra/config.json +3 -2
- results/TTM-R1-Pretrained/config.json +2 -1
- results/TTM-R2-Finetuned/config.json +3 -2
- results/TTM-R2-Pretrained/config.json +2 -1
- results/TiRex/config.json +2 -1
- results/TimeCopilot/config.json +2 -1
- results/TimesFM-2.5/config.json +3 -2
- results/Toto_Open_Base_1.0/config.json +2 -1
- results/YingLong_110m/config.json +2 -1
- results/YingLong_300m/config.json +2 -1
- results/YingLong_50m/config.json +2 -1
- results/YingLong_6m/config.json +2 -1
- results/auto_arima/config.json +2 -1
- results/auto_ets/config.json +2 -1
- results/auto_theta/config.json +2 -1
- results/chronos_base/config.json +2 -1
- results/chronos_bolt_base/config.json +2 -1
- results/chronos_bolt_small/config.json +2 -1
- results/chronos_large/config.json +2 -1
- results/crossformer/config.json +2 -1
- results/deepar/config.json +2 -1
- results/granite-flowstate-r1/config.json +2 -1
- results/iTransformer/config.json +2 -1
- results/naive/config.json +2 -1
- results/seasonal_naive/config.json +2 -1
- results/sundial_base_128m/config.json +2 -1
- results/tabpfn_ts/config.json +2 -1
- results/tempo_ensemble/config.json +2 -1
- results/tft/config.json +2 -1
- results/tide/config.json +2 -1
- results/timesfm/config.json +2 -1
- results/timesfm_2_0_500m/config.json +2 -1
- results/visionts/config.json +2 -1
- src/display/utils.py +2 -1
- src/leaderboard/read_evals.py +4 -1
app.py
CHANGED
|
@@ -121,7 +121,7 @@ def init_leaderboard(ori_dataframe, model_info_df, sort_val: str | list | None =
|
|
| 121 |
merged_df = merged_df[new_cols]
|
| 122 |
if sort_val:
|
| 123 |
if isinstance(sort_val, list):
|
| 124 |
-
assert sort_val[0] == '
|
| 125 |
# ipdb.set_trace()
|
| 126 |
leakage_order = pd.Categorical(merged_df[sort_val[0]], categories=['No', 'Yes', 'N/A'], ordered=True)
|
| 127 |
merged_df['leakage_order'] = leakage_order
|
|
@@ -170,10 +170,11 @@ def init_leaderboard(ori_dataframe, model_info_df, sort_val: str | list | None =
|
|
| 170 |
# ],
|
| 171 |
filter_columns=[
|
| 172 |
ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 173 |
-
ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="
|
|
|
|
| 174 |
],
|
| 175 |
# bool_checkboxgroup_label="",
|
| 176 |
-
column_widths=[
|
| 177 |
interactive=False,
|
| 178 |
)
|
| 179 |
|
|
@@ -185,7 +186,7 @@ with demo:
|
|
| 185 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 186 |
with gr.TabItem('🏅 Overall', elem_id="llm-benchmark-tab-table", id=5):
|
| 187 |
# leaderboard = init_leaderboard(overall_df, model_info_df, sort_val='Rank')
|
| 188 |
-
leaderboard = init_leaderboard(overall_df, model_info_df, sort_val=['
|
| 189 |
print(f'FINAL Overall LEADERBOARD {overall_df}')
|
| 190 |
with gr.TabItem("🏅 By Domain", elem_id="llm-benchmark-tab-table", id=0):
|
| 191 |
leaderboard = init_leaderboard(domain_df, model_info_df)
|
|
@@ -217,7 +218,7 @@ with demo:
|
|
| 217 |
|
| 218 |
if (!target) { return []; } // safety guard
|
| 219 |
|
| 220 |
-
// Ask Gradio
|
| 221 |
target.dispatchEvent(new Event('input', { bubbles: true }));
|
| 222 |
|
| 223 |
return []; // load() must return something
|
|
@@ -225,6 +226,7 @@ with demo:
|
|
| 225 |
"""
|
| 226 |
)
|
| 227 |
|
|
|
|
| 228 |
with gr.Row():
|
| 229 |
with gr.Accordion("📙 Citation", open=False):
|
| 230 |
citation_button = gr.Textbox(
|
|
|
|
| 121 |
merged_df = merged_df[new_cols]
|
| 122 |
if sort_val:
|
| 123 |
if isinstance(sort_val, list):
|
| 124 |
+
assert sort_val[0] == 'Test Leak.'
|
| 125 |
# ipdb.set_trace()
|
| 126 |
leakage_order = pd.Categorical(merged_df[sort_val[0]], categories=['No', 'Yes', 'N/A'], ordered=True)
|
| 127 |
merged_df['leakage_order'] = leakage_order
|
|
|
|
| 170 |
# ],
|
| 171 |
filter_columns=[
|
| 172 |
ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 173 |
+
ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="Test Leak."),
|
| 174 |
+
ColumnFilter(ModelInfoColumn.code_available.name, type="checkboxgroup", label="OSS"),
|
| 175 |
],
|
| 176 |
# bool_checkboxgroup_label="",
|
| 177 |
+
column_widths=[30, 180] + [130 for _ in range(len(merged_df.columns)-2)],
|
| 178 |
interactive=False,
|
| 179 |
)
|
| 180 |
|
|
|
|
| 186 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 187 |
with gr.TabItem('🏅 Overall', elem_id="llm-benchmark-tab-table", id=5):
|
| 188 |
# leaderboard = init_leaderboard(overall_df, model_info_df, sort_val='Rank')
|
| 189 |
+
leaderboard = init_leaderboard(overall_df, model_info_df, sort_val=['Test Leak.', 'MASE_Rank'])
|
| 190 |
print(f'FINAL Overall LEADERBOARD {overall_df}')
|
| 191 |
with gr.TabItem("🏅 By Domain", elem_id="llm-benchmark-tab-table", id=0):
|
| 192 |
leaderboard = init_leaderboard(domain_df, model_info_df)
|
|
|
|
| 218 |
|
| 219 |
if (!target) { return []; } // safety guard
|
| 220 |
|
| 221 |
+
// Ask Gradio's front-end to re-compute its filters:
|
| 222 |
target.dispatchEvent(new Event('input', { bubbles: true }));
|
| 223 |
|
| 224 |
return []; // load() must return something
|
|
|
|
| 226 |
"""
|
| 227 |
)
|
| 228 |
|
| 229 |
+
|
| 230 |
with gr.Row():
|
| 231 |
with gr.Accordion("📙 Citation", open=False):
|
| 232 |
citation_button = gr.Textbox(
|
results/Chronos_small/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/DLinear/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "The Chinese University of Hong Kong",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "The Chinese University of Hong Kong",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/FlowState-9.1M/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-research/flowstate",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/flowstate.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-research/flowstate",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/flowstate.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/Lag-Llama/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/time-series-foundation-models/Lag-Llama",
|
| 6 |
"org": "Morgan Stanley & Service Now",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/time-series-foundation-models/Lag-Llama",
|
| 6 |
"org": "Morgan Stanley & Service Now",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/Moirai2/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-2.0-R-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/uni2ts",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-2.0-R-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/uni2ts",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/Moirai_base/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/Moirai_large/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/Moirai_small/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/moirai.ipynb",
|
| 7 |
"org": "Salesforce AI Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/N-BEATS/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "ServiceNow",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "ServiceNow",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/PatchTST/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Princeton University",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Princeton University",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/TSOrchestra-test/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Melady/TEMPO",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
-
"testdata_leakage": "Yes"
|
| 8 |
-
|
|
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Melady/TEMPO",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "No"
|
| 9 |
+
}
|
results/TSOrchestra/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/DC-research/TSorchestra",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
-
"testdata_leakage": "No"
|
| 8 |
-
|
|
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/DC-research/TSorchestra",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "No"
|
| 9 |
+
}
|
results/TTM-R1-Pretrained/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1",
|
| 6 |
"org": "IBM Research",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1",
|
| 6 |
"org": "IBM Research",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/TTM-R2-Finetuned/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/ttm.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
-
"testdata_leakage": "Yes"
|
| 9 |
-
|
|
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/ttm.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
+
}
|
results/TTM-R2-Pretrained/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2",
|
| 6 |
"org": "IBM Research",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2",
|
| 6 |
"org": "IBM Research",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/TiRex/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/NX-AI/TiRex",
|
| 6 |
"org": "NXAI",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/NX-AI/TiRex",
|
| 6 |
"org": "NXAI",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/TimeCopilot/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "agentic",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/AzulGarza/TimeCopilot",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "agentic",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/AzulGarza/TimeCopilot",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/TimesFM-2.5/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-2.5-200m-pytorch",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/timesfm2p5.ipynb",
|
| 7 |
"org": "Google Research",
|
| 8 |
-
"testdata_leakage": "No"
|
| 9 |
-
|
|
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-2.5-200m-pytorch",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/timesfm2p5.ipynb",
|
| 7 |
"org": "Google Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
+
}
|
results/Toto_Open_Base_1.0/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
|
| 6 |
"org": "Datadog",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Datadog/Toto-Open-Base-1.0",
|
| 6 |
"org": "Datadog",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/YingLong_110m/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_110m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_110m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/YingLong_300m/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_300m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_300m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/YingLong_50m/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_50m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_50m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/YingLong_6m/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_6m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "bf16",
|
| 5 |
"model_link": "https://huggingface.co/qcw2333/YingLong_6m",
|
| 6 |
"org": "Alibaba",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/auto_arima/config.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"model": "Auto_Arima",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
-
"testdata_leakage": "No"
|
|
|
|
| 6 |
}
|
|
|
|
| 2 |
"model": "Auto_Arima",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
+
"testdata_leakage": "No",
|
| 6 |
+
"code_available": "Yes"
|
| 7 |
}
|
results/auto_ets/config.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"model": "Auto_ETS",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
-
"testdata_leakage": "No"
|
|
|
|
| 6 |
}
|
|
|
|
| 2 |
"model": "Auto_ETS",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
+
"testdata_leakage": "No",
|
| 6 |
+
"code_available": "Yes"
|
| 7 |
}
|
results/auto_theta/config.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"model": "Auto_Theta",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
-
"testdata_leakage": "No"
|
|
|
|
| 6 |
}
|
|
|
|
| 2 |
"model": "Auto_Theta",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
+
"testdata_leakage": "No",
|
| 6 |
+
"code_available": "Yes"
|
| 7 |
}
|
results/chronos_base/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/chronos_bolt_base/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-bolt-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-bolt-base",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/chronos_bolt_small/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-bolt-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-bolt-small",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/chronos_large/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/amazon/chronos-t5-large",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/chronos.ipynb",
|
| 7 |
"org": "AWS AI Labs",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/crossformer/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Shanghai Jiao Tong University",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Shanghai Jiao Tong University",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/deepar/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Amazon Research",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Amazon Research",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/granite-flowstate-r1/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-flowstate-r1",
|
| 6 |
"code_link": "https://github.com/ibm-granite/granite-tsfm/blob/main/notebooks/hfdemo/flowstate_gift_eval.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
-
"testdata_leakage": "No"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/ibm-granite/granite-timeseries-flowstate-r1",
|
| 6 |
"code_link": "https://github.com/ibm-granite/granite-tsfm/blob/main/notebooks/hfdemo/flowstate_gift_eval.ipynb",
|
| 7 |
"org": "IBM Research",
|
| 8 |
+
"testdata_leakage": "No",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/iTransformer/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Tsinghua University",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Tsinghua University",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/naive/config.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"model": "Naive",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
-
"testdata_leakage": "No"
|
|
|
|
| 6 |
}
|
|
|
|
| 2 |
"model": "Naive",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
+
"testdata_leakage": "No",
|
| 6 |
+
"code_available": "Yes"
|
| 7 |
}
|
results/seasonal_naive/config.json
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
"model": "Seasonal_Naive",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
-
"testdata_leakage": "No"
|
|
|
|
| 6 |
}
|
|
|
|
| 2 |
"model": "Seasonal_Naive",
|
| 3 |
"model_type": "statistical",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
+
"testdata_leakage": "No",
|
| 6 |
+
"code_available": "Yes"
|
| 7 |
}
|
results/sundial_base_128m/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/thuml/sundial-base-128m",
|
| 6 |
"org": "Tsinghua University",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/thuml/sundial-base-128m",
|
| 6 |
"org": "Tsinghua University",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/tabpfn_ts/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
|
| 6 |
"org": "PriorLabs",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/liam-sbhoo/tabpfn-time-series/tree/main",
|
| 6 |
"org": "PriorLabs",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/tempo_ensemble/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Melady/TEMPO",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/Melady/TEMPO",
|
| 6 |
"org": "Melady Lab @ USC",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "No"
|
| 9 |
}
|
results/tft/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Google Research",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Google Research",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/tide/config.json
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Google Research",
|
| 6 |
-
"testdata_leakage": "No"
|
|
|
|
| 7 |
}
|
|
|
|
| 3 |
"model_type": "deep-learning",
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"org": "Google Research",
|
| 6 |
+
"testdata_leakage": "No",
|
| 7 |
+
"code_available": "Yes"
|
| 8 |
}
|
results/timesfm/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-1.0-200m",
|
| 6 |
"org": "Google Research",
|
| 7 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-1.0-200m",
|
| 6 |
"org": "Google Research",
|
| 7 |
+
"testdata_leakage": "Yes",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
results/timesfm_2_0_500m/config.json
CHANGED
|
@@ -5,5 +5,6 @@
|
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-2.0-500m-jax",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/timesfm.ipynb",
|
| 7 |
"org": "Google Research",
|
| 8 |
-
"testdata_leakage": "Yes"
|
|
|
|
| 9 |
}
|
|
|
|
| 5 |
"model_link": "https://huggingface.co/google/timesfm-2.0-500m-jax",
|
| 6 |
"code_link": "https://github.com/SalesforceAIResearch/gift-eval/blob/main/notebooks/timesfm.ipynb",
|
| 7 |
"org": "Google Research",
|
| 8 |
+
"testdata_leakage": "Yes",
|
| 9 |
+
"code_available": "Yes"
|
| 10 |
}
|
results/visionts/config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/Keytoyze/VisionTS",
|
| 6 |
"org": "Zhejiang University",
|
| 7 |
-
"testdata_leakage": "No"
|
|
|
|
| 8 |
}
|
|
|
|
| 4 |
"model_dtype": "float32",
|
| 5 |
"model_link": "https://github.com/Keytoyze/VisionTS",
|
| 6 |
"org": "Zhejiang University",
|
| 7 |
+
"testdata_leakage": "No",
|
| 8 |
+
"code_available": "Yes"
|
| 9 |
}
|
src/display/utils.py
CHANGED
|
@@ -35,7 +35,8 @@ model_info_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "n
|
|
| 35 |
model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False, True)])
|
| 36 |
model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 37 |
model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
|
| 38 |
-
model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("
|
|
|
|
| 39 |
# model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 40 |
|
| 41 |
# We use make dataclass to dynamically fill the scores from Tasks
|
|
|
|
| 35 |
model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False, True)])
|
| 36 |
model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 37 |
model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
|
| 38 |
+
model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("Test Leak.", "str", True, hidden=False)])
|
| 39 |
+
model_info_dict.append(["code_available", ColumnContent, ColumnContent("OSS", "str", True, hidden=False)])
|
| 40 |
# model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 41 |
|
| 42 |
# We use make dataclass to dynamically fill the scores from Tasks
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -26,6 +26,7 @@ class ModelConfig:
|
|
| 26 |
likes: int = 0
|
| 27 |
num_params: int | str = 0
|
| 28 |
testdata_leakage: str = "NA"
|
|
|
|
| 29 |
|
| 30 |
@classmethod
|
| 31 |
def init_from_json_file(cls, json_filepath):
|
|
@@ -43,8 +44,9 @@ class ModelConfig:
|
|
| 43 |
code_link = data.get("code_link", "")
|
| 44 |
org = data.get("org", "")
|
| 45 |
testdata_leakage = data.get("testdata_leakage", "N/A")
|
|
|
|
| 46 |
return cls(model=model, model_link=model_link, model_type=model_type, code_link=code_link, org=org,
|
| 47 |
-
precision=precision, testdata_leakage=testdata_leakage)
|
| 48 |
|
| 49 |
def to_dict(self):
|
| 50 |
"""Converts the model info to a dict compatible with our dataframe display"""
|
|
@@ -60,6 +62,7 @@ class ModelConfig:
|
|
| 60 |
ModelInfoColumn.params.name: self.num_params,
|
| 61 |
ModelInfoColumn.org.name: self.org,
|
| 62 |
ModelInfoColumn.testdata_leakage.name: self.testdata_leakage,
|
|
|
|
| 63 |
}
|
| 64 |
|
| 65 |
return data_dict
|
|
|
|
| 26 |
likes: int = 0
|
| 27 |
num_params: int | str = 0
|
| 28 |
testdata_leakage: str = "NA"
|
| 29 |
+
code_available: str = "NA"
|
| 30 |
|
| 31 |
@classmethod
|
| 32 |
def init_from_json_file(cls, json_filepath):
|
|
|
|
| 44 |
code_link = data.get("code_link", "")
|
| 45 |
org = data.get("org", "")
|
| 46 |
testdata_leakage = data.get("testdata_leakage", "N/A")
|
| 47 |
+
code_available = data.get("code_available", "NA")
|
| 48 |
return cls(model=model, model_link=model_link, model_type=model_type, code_link=code_link, org=org,
|
| 49 |
+
precision=precision, testdata_leakage=testdata_leakage, code_available=code_available)
|
| 50 |
|
| 51 |
def to_dict(self):
|
| 52 |
"""Converts the model info to a dict compatible with our dataframe display"""
|
|
|
|
| 62 |
ModelInfoColumn.params.name: self.num_params,
|
| 63 |
ModelInfoColumn.org.name: self.org,
|
| 64 |
ModelInfoColumn.testdata_leakage.name: self.testdata_leakage,
|
| 65 |
+
ModelInfoColumn.code_available.name: self.code_available,
|
| 66 |
}
|
| 67 |
|
| 68 |
return data_dict
|