Manojb's picture
Upload folder using huggingface_hub
221ca5c verified
[
{
"id": "T1",
"name": "Wikipedia Info Extraction",
"difficulty": "easy",
"message": "Go to https://en.wikipedia.org/wiki/Apple_Inc. and find: 1) When was Apple founded? 2) Who are the founders? 3) What is the current CEO? Report your findings.",
"timeout": 180,
"pass_criteria": "Extracts at least 2 of 3 facts correctly and calls stop_loop"
},
{
"id": "T2",
"name": "DuckDuckGo Search",
"difficulty": "medium",
"message": "Go to https://lite.duckduckgo.com and search for weather in Dubai. Tell me the temperature.",
"timeout": 180,
"pass_criteria": "Navigates, types query, submits, extracts temperature"
},
{
"id": "T3",
"name": "Hacker News Top Story",
"difficulty": "easy",
"message": "Go to https://news.ycombinator.com and tell me the title of the #1 story on the front page right now.",
"timeout": 120,
"pass_criteria": "Navigates and identifies top story title"
},
{
"id": "T4",
"name": "Cat Image Detection (Falcon Perception)",
"difficulty": "medium",
"message": "Go to https://en.wikipedia.org/wiki/Cat and use vision_detect to find all images of cats on the page. Tell me how many cat images Falcon Perception detected.",
"timeout": 180,
"pass_criteria": "Navigates and calls vision_detect, reports count",
"requires_falcon": true
},
{
"id": "T5",
"name": "Form Filling",
"difficulty": "medium",
"message": "Go to https://httpbin.org/forms/post and fill in: Customer=John Doe, Size=Medium, Topping=Cheese, then click Submit Order. Tell me the response.",
"timeout": 240,
"pass_criteria": "Fills at least 2 fields and clicks submit"
},
{
"id": "T6",
"name": "reCAPTCHA Challenge",
"difficulty": "hard",
"message": "Go to https://www.google.com/recaptcha/api2/demo and solve the reCAPTCHA. Click the checkbox, use vision_detect to find targets in the image grid, then click_coordinates_batch all detected centers plus VERIFY button.",
"timeout": 300,
"pass_criteria": "Clicks checkbox, uses vision_detect, attempts batch click",
"requires_falcon": true
}
]