omnibench-env / scripts /sample_actions_web.json
AGIreflex's picture
Sync from GitHub via hub-sync
fe21eda verified
{
"domain": "web",
"scenario_id": "SearchGlitch",
"mission_id": "searchglitch_web_sample",
"reset_payload": {
"seed": 42,
"scenario_id": "SearchGlitch",
"mission_id": "searchglitch_web_sample",
"options": {
"env_id": "omnibench_aegis_env:web.searchglitch",
"max_steps": 5,
"target_score": 1,
"domain": "web"
}
},
"action_examples": {
"canonical": [
{
"name": "inspect_query",
"args": {}
},
{
"name": "fetch_results_page",
"args": {
"page": 1
}
},
{
"name": "deduplicate_results",
"args": {}
},
{
"name": "verify_source_consistency",
"args": {}
},
{
"name": "submit_result",
"args": {
"include_citations": true
}
}
],
"shorthand": [
{
"action": "inspect_query"
},
{
"action": "fetch_results_page",
"page": 1
},
{
"action": "deduplicate_results"
},
{
"action": "verify_source_consistency"
},
{
"action": "submit_result",
"include_citations": true
}
]
},
"expected_good_trajectory": [
"inspect_query",
"fetch_results_page",
"deduplicate_results",
"verify_source_consistency",
"submit_result"
],
"expected_bad_trajectory": [
"fetch_results_page",
"submit_result"
],
"expected_flow": [
"health",
"reset",
"step",
"state"
],
"notes": [
"Aligned fixture for the SearchGlitch web domain.",
"The good trajectory checks the query, fetches results, removes duplicates, verifies consistency, and submits a sourced answer.",
"Submitting before deduplication or verification should remain a weak trajectory."
]
}