|
|
|
|
|
python examples/experiment_eval.py \ |
|
|
--gin_file sotopia_conf/generation_utils_conf/generate.gin \ |
|
|
--gin_file sotopia_conf/server_conf/server.gin \ |
|
|
--gin_file sotopia_conf/run_async_server_in_batch.gin \ |
|
|
'--gin.ENV_IDS=["01H7VFHP1JEP91TTK5PEK39D2S", "01H7VFHNH8A88C4XJ7X4PVAHV4", "01H7VFHNR1RJKDZ9V9MDTJ1SJP", "01H7VFHN3Q7498S3R4R2VDSXA0", "01H7VFHNKVTCAGBA299VQG1QS2", "01H7VFHQ1Q67B1ADNBD9WBAG3X", "01H7VFHNPMPQWSW003M7DBMVNT", "01H7VFHPEMV6QHBGM9J094FRM4", "01H7VFHNNYH3W0VRWVY178K2TK", "01H7VFHPQQQY6H4DNC6NBQ8XTG", "01H7VFHNK78PAEH6MRVYMTSEFX", "01H7VFHP43QEZA1WZB3B3J2D9X", "01H7VFHNTHZAA4B5RWJ4T539F1", "01H7VFHPP9SPQ8W6583JFZ7HZC", "01H7VFHPRFCSA3BTT39BRBZX7H", "01H7VFHPNHZ2YYRHP0GXARD550", "01H7VFHNYABRSZYBFAJCK9NR1D", "01H7VFHNSAKNYEHV7B1VA8R3J2", "01H7VFHPKA2GGPPNVJWV967HZC", "01H7VFHPBTC4ES406NQ4ET12EQ", "01H7VFHP9PAVDN6VYYBE3MPD15", "01H7VFHNHTF9NKPG4KW2Z4NBQJ", "01H7VFHN7A1ZX5KSMT2YN9RXC4", "01H7VFHN4FHYG2MBD0K4HJ5F08", "01H7VFHPCKKZNRD5G8CKPR8WE5", "01H7VFHN2D3MJB8HM910MNEVA8", "01H7VFHNN7XTR99319DS8KZCQM", "01H7VFHPM3NVVKSGCCB4S10465", "01H7VFHPFYB1K1KMPZG7E31WDB", "01H7VFHPB2RC4RHAJ80ESYF1HW", "01H7VFHQ0BGQA0AD9FC1R4M12F", "01H7VFHNBBK14NGV72BWXEXXJC", "01H7VFHPZMXNGV8PM19WHPQ2W3", "01H7VFHNVN788RJ3KXF66BPE9S", "01H7VFHPJKR16MD1KC71V4ZRCF", "01H7VFHP29TCH457PBDVF7WFDS", "01H7VFHNJHK2W1P8JSWKAMBG4Z", "01H7VFHN8MPMGJTPVN043KBKGM", "01H7VFHPGABSWQXTACCC8C3X2F", "01H7VFHNXMZQ5Q61B3J4NNTC1A", "01H7VFHQ11NAMZS4A2RDGDB01V", "01H7VFHNGJEVGSVPPT0784H6P8", "01H7VFHP0TRETZPZMEJ5RZA2G7", "01H7VFHNDRE1M02MKTPF0Q7CZA", "01H7VFHPS5WJW2694R1MNC8JFY", "01H7VFHP2XBZ6KDPGEAZ2FN1P2", "01H7VFHNW84GTR4E23KQYJ8BBN", "01H7VFHNSV5BKMP61H535PPTSG", "01H7VFHN7WJK7VWVRZZTQ6DX9T", "01H7VFHP0AW0C23DV6ZG0B4HCE", "01H7VFHN94S6Z5T6ZNC23238NT", "01H7VFHP66D5XEX2Z32SKRT2XY", "01H7VFHP4TX1J43FS1QQJ1QFND", "01H7VFHN2YQV0R5QWWRQZ1VRHW", "01H7VFHP8AN5643B0NR0NP00VE", "01H7VFHNCN97BJ2PXKHJPX2VYY", "01H7VFHNMHDJ8T9Q6F9S3E8XZC", "01H7VFHNV13MHN97GAH73E3KM8", "01H7VFHN6NYWSTWCZJE2DCQKTD", "01H7VFHN56ZT2Z4C0EFX79Q31F", "01H7VFHPMS6AJY0PFGGCFFK5GX", "01H7VFHPDE1AM74JSR8KBJJF3A", "01H7VFHNF4G18PC9JHGRC8A1R6", "01H7VFHPSWGDGEYRP63H2DJKV0", "01H7VFHNEEK6M3E96CT17AKDBD", "01H7VFHNWX3KVZGH26KYNK2XNB", "01H7VFHPDZVVCDZR3AARA547CY", "01H7VFHNBYXD48NDRY02VCWXFN", "01H7VFHPQ1712DHGTMPQFTXH02", "01H7VFHNQA4CJEANQ1B1J1TBWV", "01H7VFHNAH7V4JNA0705SF36Y1", "01H7VFHNZQ3PQ3DHQ7H2W9ES97", "01H7VFHP3DPGRXH1Y500VQKFZA", "01H7VFHP90434Q69V7ADY0VWZJ", "01H7VFHNRKB8BJ854JPEWY8AR3", "01H7VFHN5WVC5HKKVBHZBA553R", "01H7VFHN1PK2FXY7TPWQK343BQ", "01H7VFHP6XZVT1P4R7YKAH65HJ", "01H7VFHPTKDPQ5PZWA1M1XHT1M", "01H7VFHPF8YEVH5VVNY37Q7Z1M", "01H7VFHPH567HKQRE0C745KH9C", "01H7VFHND24JAWG23XMPYGG5HK", "01H7VFHPAD4RA819KYESWBFRYS", "01H7VFHNFVGFY578101R2PCV3T", "01H7VFHP7K0EN9QX5JTD8B9NSQ", "01H7VFHNZ1XA77AG7A97M4E6C3", "01H7VFHPHWA2CYG7BC82NS4XH1", "01H7VFHN9W0WAFZCBT09PKJJNK", "01H7VFHP5H5GY9Z62J4NJYJQN1", "01H7VFHQ2EA3TTFZQ3M6DF3YCD"]' \ |
|
|
'--gin.ENV_MODEL="gpt-4o"' \ |
|
|
'--gin.AGENT1_MODEL="agent1-model"' \ |
|
|
'--gin.AGENT2_MODEL="agent2-model"' \ |
|
|
'--gin.BATCH_SIZE=20' \ |
|
|
'--gin.TAG="xx"' \ |
|
|
'--gin.PUSH_TO_DB=True' \ |
|
|
'--gin.TAG_TO_CHECK_EXISTING_EPISODES="xx"' |
|
|
|
|
|
|
|
|
python examples/experiment_eval.py \ |
|
|
--gin_file sotopia_conf/generation_utils_conf/generate.gin \ |
|
|
--gin_file sotopia_conf/server_conf/server.gin \ |
|
|
--gin_file sotopia_conf/run_async_server_in_batch.gin \ |
|
|
'--gin.ENV_IDS=["01H7VFHP1JEP91TTK5PEK39D2S", "01H7VFHNH8A88C4XJ7X4PVAHV4", "01H7VFHNR1RJKDZ9V9MDTJ1SJP", "01H7VFHN3Q7498S3R4R2VDSXA0", "01H7VFHNKVTCAGBA299VQG1QS2", "01H7VFHQ1Q67B1ADNBD9WBAG3X", "01H7VFHNPMPQWSW003M7DBMVNT", "01H7VFHPEMV6QHBGM9J094FRM4", "01H7VFHNNYH3W0VRWVY178K2TK", "01H7VFHPQQQY6H4DNC6NBQ8XTG", "01H7VFHNK78PAEH6MRVYMTSEFX", "01H7VFHP43QEZA1WZB3B3J2D9X", "01H7VFHNTHZAA4B5RWJ4T539F1", "01H7VFHPP9SPQ8W6583JFZ7HZC", "01H7VFHPRFCSA3BTT39BRBZX7H", "01H7VFHPNHZ2YYRHP0GXARD550", "01H7VFHNYABRSZYBFAJCK9NR1D", "01H7VFHNSAKNYEHV7B1VA8R3J2", "01H7VFHPKA2GGPPNVJWV967HZC", "01H7VFHPBTC4ES406NQ4ET12EQ", "01H7VFHP9PAVDN6VYYBE3MPD15", "01H7VFHNHTF9NKPG4KW2Z4NBQJ", "01H7VFHN7A1ZX5KSMT2YN9RXC4", "01H7VFHN4FHYG2MBD0K4HJ5F08", "01H7VFHPCKKZNRD5G8CKPR8WE5", "01H7VFHN2D3MJB8HM910MNEVA8", "01H7VFHNN7XTR99319DS8KZCQM", "01H7VFHPM3NVVKSGCCB4S10465", "01H7VFHPFYB1K1KMPZG7E31WDB", "01H7VFHPB2RC4RHAJ80ESYF1HW", "01H7VFHQ0BGQA0AD9FC1R4M12F", "01H7VFHNBBK14NGV72BWXEXXJC", "01H7VFHPZMXNGV8PM19WHPQ2W3", "01H7VFHNVN788RJ3KXF66BPE9S", "01H7VFHPJKR16MD1KC71V4ZRCF", "01H7VFHP29TCH457PBDVF7WFDS", "01H7VFHNJHK2W1P8JSWKAMBG4Z", "01H7VFHN8MPMGJTPVN043KBKGM", "01H7VFHPGABSWQXTACCC8C3X2F", "01H7VFHNXMZQ5Q61B3J4NNTC1A", "01H7VFHQ11NAMZS4A2RDGDB01V", "01H7VFHNGJEVGSVPPT0784H6P8", "01H7VFHP0TRETZPZMEJ5RZA2G7", "01H7VFHNDRE1M02MKTPF0Q7CZA", "01H7VFHPS5WJW2694R1MNC8JFY", "01H7VFHP2XBZ6KDPGEAZ2FN1P2", "01H7VFHNW84GTR4E23KQYJ8BBN", "01H7VFHNSV5BKMP61H535PPTSG", "01H7VFHN7WJK7VWVRZZTQ6DX9T", "01H7VFHP0AW0C23DV6ZG0B4HCE", "01H7VFHN94S6Z5T6ZNC23238NT", "01H7VFHP66D5XEX2Z32SKRT2XY", "01H7VFHP4TX1J43FS1QQJ1QFND", "01H7VFHN2YQV0R5QWWRQZ1VRHW", "01H7VFHP8AN5643B0NR0NP00VE", "01H7VFHNCN97BJ2PXKHJPX2VYY", "01H7VFHNMHDJ8T9Q6F9S3E8XZC", "01H7VFHNV13MHN97GAH73E3KM8", "01H7VFHN6NYWSTWCZJE2DCQKTD", "01H7VFHN56ZT2Z4C0EFX79Q31F", "01H7VFHPMS6AJY0PFGGCFFK5GX", "01H7VFHPDE1AM74JSR8KBJJF3A", "01H7VFHNF4G18PC9JHGRC8A1R6", "01H7VFHPSWGDGEYRP63H2DJKV0", "01H7VFHNEEK6M3E96CT17AKDBD", "01H7VFHNWX3KVZGH26KYNK2XNB", "01H7VFHPDZVVCDZR3AARA547CY", "01H7VFHNBYXD48NDRY02VCWXFN", "01H7VFHPQ1712DHGTMPQFTXH02", "01H7VFHNQA4CJEANQ1B1J1TBWV", "01H7VFHNAH7V4JNA0705SF36Y1", "01H7VFHNZQ3PQ3DHQ7H2W9ES97", "01H7VFHP3DPGRXH1Y500VQKFZA", "01H7VFHP90434Q69V7ADY0VWZJ", "01H7VFHNRKB8BJ854JPEWY8AR3", "01H7VFHN5WVC5HKKVBHZBA553R", "01H7VFHN1PK2FXY7TPWQK343BQ", "01H7VFHP6XZVT1P4R7YKAH65HJ", "01H7VFHPTKDPQ5PZWA1M1XHT1M", "01H7VFHPF8YEVH5VVNY37Q7Z1M", "01H7VFHPH567HKQRE0C745KH9C", "01H7VFHND24JAWG23XMPYGG5HK", "01H7VFHPAD4RA819KYESWBFRYS", "01H7VFHNFVGFY578101R2PCV3T", "01H7VFHP7K0EN9QX5JTD8B9NSQ", "01H7VFHNZ1XA77AG7A97M4E6C3", "01H7VFHPHWA2CYG7BC82NS4XH1", "01H7VFHN9W0WAFZCBT09PKJJNK", "01H7VFHP5H5GY9Z62J4NJYJQN1", "01H7VFHQ2EA3TTFZQ3M6DF3YCD"]' \ |
|
|
'--gin.ENV_MODEL="gpt-4o"' \ |
|
|
'--gin.AGENT1_MODEL="agent1-model"' \ |
|
|
'--gin.AGENT2_MODEL="agent2-model"' \ |
|
|
'--gin.BATCH_SIZE=20' \ |
|
|
'--gin.TAG="xx"' \ |
|
|
'--gin.PUSH_TO_DB=True' \ |
|
|
'--gin.TAG_TO_CHECK_EXISTING_EPISODES="qwen-sft-qwen-sft-3-26-v2"' |
|
|
|