[ { "id": "T1", "name": "Wikipedia Info Extraction", "difficulty": "easy", "message": "Go to https://en.wikipedia.org/wiki/Apple_Inc. and find: 1) When was Apple founded? 2) Who are the founders? 3) What is the current CEO? Report your findings.", "timeout": 180, "pass_criteria": "Extracts at least 2 of 3 facts correctly and calls stop_loop" }, { "id": "T2", "name": "DuckDuckGo Search", "difficulty": "medium", "message": "Go to https://lite.duckduckgo.com and search for weather in Dubai. Tell me the temperature.", "timeout": 180, "pass_criteria": "Navigates, types query, submits, extracts temperature" }, { "id": "T3", "name": "Hacker News Top Story", "difficulty": "easy", "message": "Go to https://news.ycombinator.com and tell me the title of the #1 story on the front page right now.", "timeout": 120, "pass_criteria": "Navigates and identifies top story title" }, { "id": "T4", "name": "Cat Image Detection (Falcon Perception)", "difficulty": "medium", "message": "Go to https://en.wikipedia.org/wiki/Cat and use vision_detect to find all images of cats on the page. Tell me how many cat images Falcon Perception detected.", "timeout": 180, "pass_criteria": "Navigates and calls vision_detect, reports count", "requires_falcon": true }, { "id": "T5", "name": "Form Filling", "difficulty": "medium", "message": "Go to https://httpbin.org/forms/post and fill in: Customer=John Doe, Size=Medium, Topping=Cheese, then click Submit Order. Tell me the response.", "timeout": 240, "pass_criteria": "Fills at least 2 fields and clicks submit" }, { "id": "T6", "name": "reCAPTCHA Challenge", "difficulty": "hard", "message": "Go to https://www.google.com/recaptcha/api2/demo and solve the reCAPTCHA. Click the checkbox, use vision_detect to find targets in the image grid, then click_coordinates_batch all detected centers plus VERIFY button.", "timeout": 300, "pass_criteria": "Clicks checkbox, uses vision_detect, attempts batch click", "requires_falcon": true } ]