| [ | |
| { | |
| "agent_name": "Single LLM", | |
| "agent_type": "single-LLM", | |
| "base_model": "gpt-4o", | |
| "T1_acc": null, | |
| "T2_acc": null, | |
| "T3_acc": null, | |
| "T4_acc": null, | |
| "FreshRetailNet_T1_acc": 0.6364, | |
| "FreshRetailNet_T2_acc": 0.5227, | |
| "FreshRetailNet_T3_acc": 0.0289, | |
| "FreshRetailNet_T4_acc": 0.1364, | |
| "PSML_T1_acc": 0.675, | |
| "PSML_T2_acc": 0.2067, | |
| "PSML_T3_acc": 0.348, | |
| "PSML_T4_acc": 0.36, | |
| "CausalChambers_T1_acc": 0.1333, | |
| "CausalChambers_T2_acc": 0.2733, | |
| "CausalChambers_T3_acc": 0.352, | |
| "CausalChambers_T4_acc": 0.26, | |
| "MIMIC_T1_acc": 0.4681, | |
| "MIMIC_T2_acc": 0.2128, | |
| "MIMIC_T3_acc": 0.3661, | |
| "MIMIC_T4_acc": 0.2979, | |
| "T2_sMAPE": null, | |
| "T2_MAE": null, | |
| "T2_OW_sMAPE_MIMIC": null, | |
| "T2_OW_RMSSE_MIMIC": null, | |
| "T4_sMAPE": null, | |
| "T4_MAE": null, | |
| "T4_OW_sMAPE_MIMIC": null, | |
| "T4_OW_RMSSE_MIMIC": null, | |
| "FreshRetailNet_T2_MAE": 0.12, | |
| "FreshRetailNet_T2_sMAPE": 1.27, | |
| "FreshRetailNet_T4_MAE": 0.34, | |
| "FreshRetailNet_T4_sMAPE": 1.29, | |
| "PSML_T2_MAE": 0.61, | |
| "PSML_T2_sMAPE": 0.6, | |
| "PSML_T4_MAE": 0.44, | |
| "PSML_T4_sMAPE": 0.37, | |
| "CausalChambers_T2_MAE": 2.48, | |
| "CausalChambers_T2_OW_RMSSE": 0.0000257, | |
| "CausalChambers_T4_MAE": 2.58, | |
| "CausalChambers_T4_OW_RMSSE": 0.0000269, | |
| "MIMIC_T2_OW_sMAPE": 15.2, | |
| "MIMIC_T2_OW_RMSSE": 0.55, | |
| "MIMIC_T4_OW_sMAPE": 16.86, | |
| "MIMIC_T4_OW_RMSSE": 0.63 | |
| }, | |
| { | |
| "agent_name": "TimeSeries Scientist", | |
| "agent_type": "time-series-specific agent", | |
| "base_model": "gpt-4o", | |
| "T1_acc": null, | |
| "T2_acc": null, | |
| "T3_acc": null, | |
| "T4_acc": null, | |
| "FreshRetailNet_T1_acc": 0.3352, | |
| "FreshRetailNet_T2_acc": 0.5682, | |
| "FreshRetailNet_T3_acc": 0.0341, | |
| "FreshRetailNet_T4_acc": 0.5682, | |
| "PSML_T1_acc": 0.28, | |
| "PSML_T2_acc": 0.2667, | |
| "PSML_T3_acc": 0.216, | |
| "PSML_T4_acc": 0.2733, | |
| "CausalChambers_T1_acc": 0.2867, | |
| "CausalChambers_T2_acc": 0.0267, | |
| "CausalChambers_T3_acc": 0.216, | |
| "CausalChambers_T4_acc": 0.0267, | |
| "MIMIC_T1_acc": 0.1011, | |
| "MIMIC_T2_acc": 0.234, | |
| "MIMIC_T3_acc": 0.2887, | |
| "MIMIC_T4_acc": 0.234, | |
| "T2_sMAPE": null, | |
| "T2_MAE": null, | |
| "T2_OW_sMAPE_MIMIC": null, | |
| "T2_OW_RMSSE_MIMIC": null, | |
| "T4_sMAPE": null, | |
| "T4_MAE": null, | |
| "T4_OW_sMAPE_MIMIC": null, | |
| "T4_OW_RMSSE_MIMIC": null, | |
| "FreshRetailNet_T2_MAE": 0.35, | |
| "FreshRetailNet_T2_sMAPE": 1.27, | |
| "FreshRetailNet_T4_MAE": 0.51, | |
| "FreshRetailNet_T4_sMAPE": 1.4, | |
| "PSML_T2_MAE": 1.53, | |
| "PSML_T2_sMAPE": 0.65, | |
| "PSML_T4_MAE": 0.84, | |
| "PSML_T4_sMAPE": 0.48, | |
| "CausalChambers_T2_MAE": 2.44, | |
| "CausalChambers_T2_OW_RMSSE": 0.0000253, | |
| "CausalChambers_T4_MAE": 2.94, | |
| "CausalChambers_T4_OW_RMSSE": 0.0000306, | |
| "MIMIC_T2_OW_sMAPE": 15.81, | |
| "MIMIC_T2_OW_RMSSE": 0.52, | |
| "MIMIC_T4_OW_sMAPE": 17.18, | |
| "MIMIC_T4_OW_RMSSE": 0.64 | |
| }, | |
| { | |
| "agent_name": "AgentScope", | |
| "agent_type": "general agent", | |
| "base_model": "gpt-4o", | |
| "T1_acc": null, | |
| "T2_acc": null, | |
| "T3_acc": null, | |
| "T4_acc": null, | |
| "FreshRetailNet_T1_acc": 0.625, | |
| "FreshRetailNet_T2_acc": 0.1212, | |
| "FreshRetailNet_T3_acc": 0.1364, | |
| "FreshRetailNet_T4_acc": 0.1894, | |
| "PSML_T1_acc": 0.66, | |
| "PSML_T2_acc": 0.2467, | |
| "PSML_T3_acc": 0.272, | |
| "PSML_T4_acc": 0.3533, | |
| "CausalChambers_T1_acc": 0.12, | |
| "CausalChambers_T2_acc": 0.46, | |
| "CausalChambers_T3_acc": 0.44, | |
| "CausalChambers_T4_acc": 0.32, | |
| "MIMIC_T1_acc": 0.4468, | |
| "MIMIC_T2_acc": 0.2128, | |
| "MIMIC_T3_acc": 0.2395, | |
| "MIMIC_T4_acc": 0.227, | |
| "T2_sMAPE": null, | |
| "T2_MAE": null, | |
| "T2_OW_sMAPE_MIMIC": null, | |
| "T2_OW_RMSSE_MIMIC": null, | |
| "T4_sMAPE": null, | |
| "T4_MAE": null, | |
| "T4_OW_sMAPE_MIMIC": null, | |
| "T4_OW_RMSSE_MIMIC": null, | |
| "FreshRetailNet_T2_MAE": 0.12, | |
| "FreshRetailNet_T2_sMAPE": 126.27, | |
| "FreshRetailNet_T4_MAE": 0.2, | |
| "FreshRetailNet_T4_sMAPE": 130.86, | |
| "PSML_T2_MAE": 0.28, | |
| "PSML_T2_sMAPE": 37.38, | |
| "PSML_T4_MAE": 0.35, | |
| "PSML_T4_sMAPE": 30.51, | |
| "CausalChambers_T2_MAE": 2.76, | |
| "CausalChambers_T2_OW_RMSSE": 0.00262, | |
| "CausalChambers_T4_MAE": 2.66, | |
| "CausalChambers_T4_OW_RMSSE": 0.00246, | |
| "MIMIC_T2_OW_sMAPE": 11.05, | |
| "MIMIC_T2_OW_RMSSE": 0.43, | |
| "MIMIC_T4_OW_sMAPE": 12.02, | |
| "MIMIC_T4_OW_RMSSE": 0.49 | |
| }, | |
| { | |
| "agent_name": "MetaGPT", | |
| "agent_type": "general agent", | |
| "base_model": "gpt-4o", | |
| "T1_acc": null, | |
| "T2_acc": null, | |
| "T3_acc": null, | |
| "T4_acc": null, | |
| "FreshRetailNet_T1_acc": 0.625, | |
| "FreshRetailNet_T2_acc": 0.0909, | |
| "FreshRetailNet_T3_acc": 0.0511, | |
| "FreshRetailNet_T4_acc": 0.1439, | |
| "PSML_T1_acc": 0.675, | |
| "PSML_T2_acc": 0.2109, | |
| "PSML_T3_acc": 0.22, | |
| "PSML_T4_acc": 0.3133, | |
| "CausalChambers_T1_acc": 0.1067, | |
| "CausalChambers_T2_acc": 0.5933, | |
| "CausalChambers_T3_acc": 0.452, | |
| "CausalChambers_T4_acc": 0.16, | |
| "MIMIC_T1_acc": 0.4574, | |
| "MIMIC_T2_acc": 0.1702, | |
| "MIMIC_T3_acc": 0.2897, | |
| "MIMIC_T4_acc": 0.2553, | |
| "T2_sMAPE": null, | |
| "T2_MAE": null, | |
| "T2_OW_sMAPE_MIMIC": null, | |
| "T2_OW_RMSSE_MIMIC": null, | |
| "T4_sMAPE": null, | |
| "T4_MAE": null, | |
| "T4_OW_sMAPE_MIMIC": null, | |
| "T4_OW_RMSSE_MIMIC": null, | |
| "FreshRetailNet_T2_MAE": 0.13, | |
| "FreshRetailNet_T2_sMAPE": 126.59, | |
| "FreshRetailNet_T4_MAE": 0.24, | |
| "FreshRetailNet_T4_sMAPE": 127.22, | |
| "PSML_T2_MAE": 0.34, | |
| "PSML_T2_sMAPE": 24.74, | |
| "PSML_T4_MAE": 0.4, | |
| "PSML_T4_sMAPE": 43.47, | |
| "CausalChambers_T2_MAE": 2.62, | |
| "CausalChambers_T2_OW_RMSSE": 0.00272, | |
| "CausalChambers_T4_MAE": 2.76, | |
| "CausalChambers_T4_OW_RMSSE": 0.00287, | |
| "MIMIC_T2_OW_sMAPE": 14.11, | |
| "MIMIC_T2_OW_RMSSE": 0.53, | |
| "MIMIC_T4_OW_sMAPE": 15.4, | |
| "MIMIC_T4_OW_RMSSE": 0.63 | |
| }, | |
| { | |
| "agent_name": "CAMEL", | |
| "agent_type": "general agent", | |
| "base_model": "gpt-4o", | |
| "T1_acc": null, | |
| "T2_acc": null, | |
| "T3_acc": null, | |
| "T4_acc": null, | |
| "FreshRetailNet_T1_acc": 0.642, | |
| "FreshRetailNet_T2_acc": 0.0076, | |
| "FreshRetailNet_T3_acc": 0.0625, | |
| "FreshRetailNet_T4_acc": 0.3106, | |
| "PSML_T1_acc": 0.685, | |
| "PSML_T2_acc": 0.14, | |
| "PSML_T3_acc": 0.184, | |
| "PSML_T4_acc": 0.3067, | |
| "CausalChambers_T1_acc": 0.1, | |
| "CausalChambers_T2_acc": 0.66, | |
| "CausalChambers_T3_acc": 0.42, | |
| "CausalChambers_T4_acc": 0.2667, | |
| "MIMIC_T1_acc": 0.4681, | |
| "MIMIC_T2_acc": 0.2057, | |
| "MIMIC_T3_acc": 0.3014, | |
| "MIMIC_T4_acc": 0.234, | |
| "T2_sMAPE": null, | |
| "T2_MAE": null, | |
| "T2_OW_sMAPE_MIMIC": null, | |
| "T2_OW_RMSSE_MIMIC": null, | |
| "T4_sMAPE": null, | |
| "T4_MAE": null, | |
| "T4_OW_sMAPE_MIMIC": null, | |
| "T4_OW_RMSSE_MIMIC": null, | |
| "FreshRetailNet_T2_MAE": 0.13, | |
| "FreshRetailNet_T2_sMAPE": 126.75, | |
| "FreshRetailNet_T4_MAE": 0.28, | |
| "FreshRetailNet_T4_sMAPE": 128.18, | |
| "PSML_T2_MAE": 0.43, | |
| "PSML_T2_sMAPE": 34.89, | |
| "PSML_T4_MAE": 0.45, | |
| "PSML_T4_sMAPE": 35.78, | |
| "CausalChambers_T2_MAE": 2.99, | |
| "CausalChambers_T2_OW_RMSSE": 0.00311, | |
| "CausalChambers_T4_MAE": 2.5, | |
| "CausalChambers_T4_OW_RMSSE": 0.0026, | |
| "MIMIC_T2_OW_sMAPE": 12.02, | |
| "MIMIC_T2_OW_RMSSE": 0.55, | |
| "MIMIC_T4_OW_sMAPE": 15.74, | |
| "MIMIC_T4_OW_RMSSE": 0.59 | |
| } | |
| ] | |