| [ |
| { |
| "id": "T1", |
| "name": "Wikipedia Info Extraction", |
| "difficulty": "easy", |
| "message": "Go to https://en.wikipedia.org/wiki/Apple_Inc. and find: 1) When was Apple founded? 2) Who are the founders? 3) What is the current CEO? Report your findings.", |
| "timeout": 180, |
| "pass_criteria": "Extracts at least 2 of 3 facts correctly and calls stop_loop" |
| }, |
| { |
| "id": "T2", |
| "name": "DuckDuckGo Search", |
| "difficulty": "medium", |
| "message": "Go to https://lite.duckduckgo.com and search for weather in Dubai. Tell me the temperature.", |
| "timeout": 180, |
| "pass_criteria": "Navigates, types query, submits, extracts temperature" |
| }, |
| { |
| "id": "T3", |
| "name": "Hacker News Top Story", |
| "difficulty": "easy", |
| "message": "Go to https://news.ycombinator.com and tell me the title of the #1 story on the front page right now.", |
| "timeout": 120, |
| "pass_criteria": "Navigates and identifies top story title" |
| }, |
| { |
| "id": "T4", |
| "name": "Cat Image Detection (Falcon Perception)", |
| "difficulty": "medium", |
| "message": "Go to https://en.wikipedia.org/wiki/Cat and use vision_detect to find all images of cats on the page. Tell me how many cat images Falcon Perception detected.", |
| "timeout": 180, |
| "pass_criteria": "Navigates and calls vision_detect, reports count", |
| "requires_falcon": true |
| }, |
| { |
| "id": "T5", |
| "name": "Form Filling", |
| "difficulty": "medium", |
| "message": "Go to https://httpbin.org/forms/post and fill in: Customer=John Doe, Size=Medium, Topping=Cheese, then click Submit Order. Tell me the response.", |
| "timeout": 240, |
| "pass_criteria": "Fills at least 2 fields and clicks submit" |
| }, |
| { |
| "id": "T6", |
| "name": "reCAPTCHA Challenge", |
| "difficulty": "hard", |
| "message": "Go to https://www.google.com/recaptcha/api2/demo and solve the reCAPTCHA. Click the checkbox, use vision_detect to find targets in the image grid, then click_coordinates_batch all detected centers plus VERIFY button.", |
| "timeout": 300, |
| "pass_criteria": "Clicks checkbox, uses vision_detect, attempts batch click", |
| "requires_falcon": true |
| } |
| ] |
|
|