[ { "agent_name": "Single LLM", "agent_type": "single-LLM", "base_model": "gpt-4o", "T1_acc": null, "T2_acc": null, "T3_acc": null, "T4_acc": null, "FreshRetailNet_T1_acc": 0.6364, "FreshRetailNet_T2_acc": 0.5227, "FreshRetailNet_T3_acc": 0.0289, "FreshRetailNet_T4_acc": 0.1364, "PSML_T1_acc": 0.675, "PSML_T2_acc": 0.2067, "PSML_T3_acc": 0.348, "PSML_T4_acc": 0.36, "CausalChambers_T1_acc": 0.1333, "CausalChambers_T2_acc": 0.2733, "CausalChambers_T3_acc": 0.352, "CausalChambers_T4_acc": 0.26, "MIMIC_T1_acc": 0.4681, "MIMIC_T2_acc": 0.2128, "MIMIC_T3_acc": 0.3661, "MIMIC_T4_acc": 0.2979, "T2_sMAPE": null, "T2_MAE": null, "T2_OW_sMAPE_MIMIC": null, "T2_OW_RMSSE_MIMIC": null, "T4_sMAPE": null, "T4_MAE": null, "T4_OW_sMAPE_MIMIC": null, "T4_OW_RMSSE_MIMIC": null, "FreshRetailNet_T2_MAE": 0.12, "FreshRetailNet_T2_sMAPE": 1.27, "FreshRetailNet_T4_MAE": 0.34, "FreshRetailNet_T4_sMAPE": 1.29, "PSML_T2_MAE": 0.61, "PSML_T2_sMAPE": 0.6, "PSML_T4_MAE": 0.44, "PSML_T4_sMAPE": 0.37, "CausalChambers_T2_MAE": 2.48, "CausalChambers_T2_OW_RMSSE": 0.0000257, "CausalChambers_T4_MAE": 2.58, "CausalChambers_T4_OW_RMSSE": 0.0000269, "MIMIC_T2_OW_sMAPE": 15.2, "MIMIC_T2_OW_RMSSE": 0.55, "MIMIC_T4_OW_sMAPE": 16.86, "MIMIC_T4_OW_RMSSE": 0.63 }, { "agent_name": "TimeSeries Scientist", "agent_type": "time-series-specific agent", "base_model": "gpt-4o", "T1_acc": null, "T2_acc": null, "T3_acc": null, "T4_acc": null, "FreshRetailNet_T1_acc": 0.3352, "FreshRetailNet_T2_acc": 0.5682, "FreshRetailNet_T3_acc": 0.0341, "FreshRetailNet_T4_acc": 0.5682, "PSML_T1_acc": 0.28, "PSML_T2_acc": 0.2667, "PSML_T3_acc": 0.216, "PSML_T4_acc": 0.2733, "CausalChambers_T1_acc": 0.2867, "CausalChambers_T2_acc": 0.0267, "CausalChambers_T3_acc": 0.216, "CausalChambers_T4_acc": 0.0267, "MIMIC_T1_acc": 0.1011, "MIMIC_T2_acc": 0.234, "MIMIC_T3_acc": 0.2887, "MIMIC_T4_acc": 0.234, "T2_sMAPE": null, "T2_MAE": null, "T2_OW_sMAPE_MIMIC": null, "T2_OW_RMSSE_MIMIC": null, "T4_sMAPE": null, "T4_MAE": null, "T4_OW_sMAPE_MIMIC": null, "T4_OW_RMSSE_MIMIC": null, "FreshRetailNet_T2_MAE": 0.35, "FreshRetailNet_T2_sMAPE": 1.27, "FreshRetailNet_T4_MAE": 0.51, "FreshRetailNet_T4_sMAPE": 1.4, "PSML_T2_MAE": 1.53, "PSML_T2_sMAPE": 0.65, "PSML_T4_MAE": 0.84, "PSML_T4_sMAPE": 0.48, "CausalChambers_T2_MAE": 2.44, "CausalChambers_T2_OW_RMSSE": 0.0000253, "CausalChambers_T4_MAE": 2.94, "CausalChambers_T4_OW_RMSSE": 0.0000306, "MIMIC_T2_OW_sMAPE": 15.81, "MIMIC_T2_OW_RMSSE": 0.52, "MIMIC_T4_OW_sMAPE": 17.18, "MIMIC_T4_OW_RMSSE": 0.64 }, { "agent_name": "AgentScope", "agent_type": "general agent", "base_model": "gpt-4o", "T1_acc": null, "T2_acc": null, "T3_acc": null, "T4_acc": null, "FreshRetailNet_T1_acc": 0.625, "FreshRetailNet_T2_acc": 0.1212, "FreshRetailNet_T3_acc": 0.1364, "FreshRetailNet_T4_acc": 0.1894, "PSML_T1_acc": 0.66, "PSML_T2_acc": 0.2467, "PSML_T3_acc": 0.272, "PSML_T4_acc": 0.3533, "CausalChambers_T1_acc": 0.12, "CausalChambers_T2_acc": 0.46, "CausalChambers_T3_acc": 0.44, "CausalChambers_T4_acc": 0.32, "MIMIC_T1_acc": 0.4468, "MIMIC_T2_acc": 0.2128, "MIMIC_T3_acc": 0.2395, "MIMIC_T4_acc": 0.227, "T2_sMAPE": null, "T2_MAE": null, "T2_OW_sMAPE_MIMIC": null, "T2_OW_RMSSE_MIMIC": null, "T4_sMAPE": null, "T4_MAE": null, "T4_OW_sMAPE_MIMIC": null, "T4_OW_RMSSE_MIMIC": null, "FreshRetailNet_T2_MAE": 0.12, "FreshRetailNet_T2_sMAPE": 126.27, "FreshRetailNet_T4_MAE": 0.2, "FreshRetailNet_T4_sMAPE": 130.86, "PSML_T2_MAE": 0.28, "PSML_T2_sMAPE": 37.38, "PSML_T4_MAE": 0.35, "PSML_T4_sMAPE": 30.51, "CausalChambers_T2_MAE": 2.76, "CausalChambers_T2_OW_RMSSE": 0.00262, "CausalChambers_T4_MAE": 2.66, "CausalChambers_T4_OW_RMSSE": 0.00246, "MIMIC_T2_OW_sMAPE": 11.05, "MIMIC_T2_OW_RMSSE": 0.43, "MIMIC_T4_OW_sMAPE": 12.02, "MIMIC_T4_OW_RMSSE": 0.49 }, { "agent_name": "MetaGPT", "agent_type": "general agent", "base_model": "gpt-4o", "T1_acc": null, "T2_acc": null, "T3_acc": null, "T4_acc": null, "FreshRetailNet_T1_acc": 0.625, "FreshRetailNet_T2_acc": 0.0909, "FreshRetailNet_T3_acc": 0.0511, "FreshRetailNet_T4_acc": 0.1439, "PSML_T1_acc": 0.675, "PSML_T2_acc": 0.2109, "PSML_T3_acc": 0.22, "PSML_T4_acc": 0.3133, "CausalChambers_T1_acc": 0.1067, "CausalChambers_T2_acc": 0.5933, "CausalChambers_T3_acc": 0.452, "CausalChambers_T4_acc": 0.16, "MIMIC_T1_acc": 0.4574, "MIMIC_T2_acc": 0.1702, "MIMIC_T3_acc": 0.2897, "MIMIC_T4_acc": 0.2553, "T2_sMAPE": null, "T2_MAE": null, "T2_OW_sMAPE_MIMIC": null, "T2_OW_RMSSE_MIMIC": null, "T4_sMAPE": null, "T4_MAE": null, "T4_OW_sMAPE_MIMIC": null, "T4_OW_RMSSE_MIMIC": null, "FreshRetailNet_T2_MAE": 0.13, "FreshRetailNet_T2_sMAPE": 126.59, "FreshRetailNet_T4_MAE": 0.24, "FreshRetailNet_T4_sMAPE": 127.22, "PSML_T2_MAE": 0.34, "PSML_T2_sMAPE": 24.74, "PSML_T4_MAE": 0.4, "PSML_T4_sMAPE": 43.47, "CausalChambers_T2_MAE": 2.62, "CausalChambers_T2_OW_RMSSE": 0.00272, "CausalChambers_T4_MAE": 2.76, "CausalChambers_T4_OW_RMSSE": 0.00287, "MIMIC_T2_OW_sMAPE": 14.11, "MIMIC_T2_OW_RMSSE": 0.53, "MIMIC_T4_OW_sMAPE": 15.4, "MIMIC_T4_OW_RMSSE": 0.63 }, { "agent_name": "CAMEL", "agent_type": "general agent", "base_model": "gpt-4o", "T1_acc": null, "T2_acc": null, "T3_acc": null, "T4_acc": null, "FreshRetailNet_T1_acc": 0.642, "FreshRetailNet_T2_acc": 0.0076, "FreshRetailNet_T3_acc": 0.0625, "FreshRetailNet_T4_acc": 0.3106, "PSML_T1_acc": 0.685, "PSML_T2_acc": 0.14, "PSML_T3_acc": 0.184, "PSML_T4_acc": 0.3067, "CausalChambers_T1_acc": 0.1, "CausalChambers_T2_acc": 0.66, "CausalChambers_T3_acc": 0.42, "CausalChambers_T4_acc": 0.2667, "MIMIC_T1_acc": 0.4681, "MIMIC_T2_acc": 0.2057, "MIMIC_T3_acc": 0.3014, "MIMIC_T4_acc": 0.234, "T2_sMAPE": null, "T2_MAE": null, "T2_OW_sMAPE_MIMIC": null, "T2_OW_RMSSE_MIMIC": null, "T4_sMAPE": null, "T4_MAE": null, "T4_OW_sMAPE_MIMIC": null, "T4_OW_RMSSE_MIMIC": null, "FreshRetailNet_T2_MAE": 0.13, "FreshRetailNet_T2_sMAPE": 126.75, "FreshRetailNet_T4_MAE": 0.28, "FreshRetailNet_T4_sMAPE": 128.18, "PSML_T2_MAE": 0.43, "PSML_T2_sMAPE": 34.89, "PSML_T4_MAE": 0.45, "PSML_T4_sMAPE": 35.78, "CausalChambers_T2_MAE": 2.99, "CausalChambers_T2_OW_RMSSE": 0.00311, "CausalChambers_T4_MAE": 2.5, "CausalChambers_T4_OW_RMSSE": 0.0026, "MIMIC_T2_OW_sMAPE": 12.02, "MIMIC_T2_OW_RMSSE": 0.55, "MIMIC_T4_OW_sMAPE": 15.74, "MIMIC_T4_OW_RMSSE": 0.59 } ]