File size: 2,146 Bytes
221ca5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
[
  {
    "id": "T1",
    "name": "Wikipedia Info Extraction",
    "difficulty": "easy",
    "message": "Go to https://en.wikipedia.org/wiki/Apple_Inc. and find: 1) When was Apple founded? 2) Who are the founders? 3) What is the current CEO? Report your findings.",
    "timeout": 180,
    "pass_criteria": "Extracts at least 2 of 3 facts correctly and calls stop_loop"
  },
  {
    "id": "T2",
    "name": "DuckDuckGo Search",
    "difficulty": "medium",
    "message": "Go to https://lite.duckduckgo.com and search for weather in Dubai. Tell me the temperature.",
    "timeout": 180,
    "pass_criteria": "Navigates, types query, submits, extracts temperature"
  },
  {
    "id": "T3",
    "name": "Hacker News Top Story",
    "difficulty": "easy",
    "message": "Go to https://news.ycombinator.com and tell me the title of the #1 story on the front page right now.",
    "timeout": 120,
    "pass_criteria": "Navigates and identifies top story title"
  },
  {
    "id": "T4",
    "name": "Cat Image Detection (Falcon Perception)",
    "difficulty": "medium",
    "message": "Go to https://en.wikipedia.org/wiki/Cat and use vision_detect to find all images of cats on the page. Tell me how many cat images Falcon Perception detected.",
    "timeout": 180,
    "pass_criteria": "Navigates and calls vision_detect, reports count",
    "requires_falcon": true
  },
  {
    "id": "T5",
    "name": "Form Filling",
    "difficulty": "medium",
    "message": "Go to https://httpbin.org/forms/post and fill in: Customer=John Doe, Size=Medium, Topping=Cheese, then click Submit Order. Tell me the response.",
    "timeout": 240,
    "pass_criteria": "Fills at least 2 fields and clicks submit"
  },
  {
    "id": "T6",
    "name": "reCAPTCHA Challenge",
    "difficulty": "hard",
    "message": "Go to https://www.google.com/recaptcha/api2/demo and solve the reCAPTCHA. Click the checkbox, use vision_detect to find targets in the image grid, then click_coordinates_batch all detected centers plus VERIFY button.",
    "timeout": 300,
    "pass_criteria": "Clicks checkbox, uses vision_detect, attempts batch click",
    "requires_falcon": true
  }
]