jpeper commited on
Commit
c9a41bc
·
verified ·
1 Parent(s): e74b832

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +13 -5
  2. annotation_data/.DS_Store +0 -0
  3. annotation_data/carcassonne_tier1/test_2001.json +15 -0
  4. annotation_data/carcassonne_tier1/test_2002.json +15 -0
  5. annotation_data/carcassonne_tier1/test_2003.json +15 -0
  6. annotation_data/carcassonne_tier1/test_2004.json +15 -0
  7. annotation_data/carcassonne_tier1/test_2005.json +15 -0
  8. annotation_data/carcassonne_tier1/test_2006.json +15 -0
  9. annotation_data/carcassonne_tier1/test_2007.json +15 -0
  10. annotation_data/carcassonne_tier1/test_2008.json +15 -0
  11. annotation_data/carcassonne_tier1/test_2009.json +15 -0
  12. annotation_data/carcassonne_tier1/test_2010.json +15 -0
  13. annotation_data/carcassonne_tier1/test_2011.json +15 -0
  14. annotation_data/carcassonne_tier1/test_2012.json +15 -0
  15. annotation_data/carcassonne_tier1/test_2013.json +15 -0
  16. annotation_data/carcassonne_tier1/test_2014.json +15 -0
  17. annotation_data/carcassonne_tier1/test_2015.json +15 -0
  18. annotation_data/carcassonne_tier1/test_2016.json +15 -0
  19. annotation_data/carcassonne_tier1/test_2017.json +15 -0
  20. annotation_data/carcassonne_tier1/test_2018.json +15 -0
  21. annotation_data/carcassonne_tier1/test_2019.json +15 -0
  22. annotation_data/carcassonne_tier1/test_2020.json +15 -0
  23. annotation_data/carcassonne_tier1/test_2021.json +15 -0
  24. annotation_data/carcassonne_tier1/test_2022.json +15 -0
  25. annotation_data/carcassonne_tier1/test_2023.json +15 -0
  26. annotation_data/carcassonne_tier1/test_2024.json +15 -0
  27. annotation_data/carcassonne_tier1/test_2025.json +15 -0
  28. annotation_data/carcassonne_tier1/test_2026.json +15 -0
  29. annotation_data/carcassonne_tier1/test_2027.json +15 -0
  30. annotation_data/carcassonne_tier1/test_2028.json +15 -0
  31. annotation_data/carcassonne_tier1/test_2029.json +15 -0
  32. annotation_data/carcassonne_tier1/test_2030.json +15 -0
  33. annotation_data/carcassonne_tier1/test_2031.json +15 -0
  34. annotation_data/carcassonne_tier1/test_2032.json +15 -0
  35. annotation_data/carcassonne_tier1/test_2033.json +15 -0
  36. annotation_data/carcassonne_tier1/test_2034.json +15 -0
  37. annotation_data/carcassonne_tier1/test_2035.json +15 -0
  38. annotation_data/carcassonne_tier1/test_2036.json +15 -0
  39. annotation_data/carcassonne_tier1/test_2037.json +15 -0
  40. annotation_data/carcassonne_tier1/test_2038.json +15 -0
  41. annotation_data/carcassonne_tier1/test_2039.json +15 -0
  42. annotation_data/carcassonne_tier1/test_2040.json +15 -0
  43. annotation_data/carcassonne_tier2/test_2101.json +15 -0
  44. annotation_data/carcassonne_tier2/test_2102.json +15 -0
  45. annotation_data/carcassonne_tier2/test_2103.json +15 -0
  46. annotation_data/carcassonne_tier2/test_2104.json +15 -0
  47. annotation_data/carcassonne_tier2/test_2105.json +15 -0
  48. annotation_data/carcassonne_tier2/test_2106.json +15 -0
  49. annotation_data/carcassonne_tier2/test_2107.json +15 -0
  50. annotation_data/carcassonne_tier2/test_2108.json +15 -0
README.md CHANGED
@@ -1,10 +1,18 @@
1
  ---
2
- title: LudoBench Test
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
  ---
2
+ title: "LudoBench: Board Game Reasoning Benchmark"
3
+ emoji: "\U0001F3B2"
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: static
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # LudoBench
12
+
13
+ A multimodal board-game reasoning benchmark evaluating LLM/VLM reasoning across 5 strategy games and 3 difficulty tiers.
14
+
15
+ - 638 annotated QA pairs
16
+ - 5 games: Kingdomino, Res Arcana, Pax Renaissance, Carcassonne, Catan
17
+ - 3 tiers: Environment Perception, Rules Integration, Short-Horizon Optimization
18
+ - 9 models benchmarked across 3 modalities (None, Text, Image)
annotation_data/.DS_Store ADDED
Binary file (10.2 kB). View file
 
annotation_data/carcassonne_tier1/test_2001.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles are present in the given landscape?",
6
+ "Answer": "9",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2001
15
+ }
annotation_data/carcassonne_tier1/test_2002.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "What is the size of the minimum rectangle that can enclose all the tiles in the given landscape? Provide the answer in the format 'width x height'.",
6
+ "Answer": "4 x 4",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2002
15
+ }
annotation_data/carcassonne_tier1/test_2003.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles have roads shown on them?",
6
+ "Answer": "6",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2003
15
+ }
annotation_data/carcassonne_tier1/test_2004.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles have grass present on them?",
6
+ "Answer": "8",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2004
15
+ }
annotation_data/carcassonne_tier1/test_2005.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles contain only grass on them?",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2005
15
+ }
annotation_data/carcassonne_tier1/test_2006.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the landscape, how many tiles contain a shield (or coat of arms) symbol?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2006
15
+ }
annotation_data/carcassonne_tier1/test_2007.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles are present in the given landscape?",
6
+ "Answer": "6",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2007
15
+ }
annotation_data/carcassonne_tier1/test_2008.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles do not contain any roads?",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2008
15
+ }
annotation_data/carcassonne_tier1/test_2009.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles contain more than one road passing through them?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2009
15
+ }
annotation_data/carcassonne_tier1/test_2010.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "What is the size of the minimum rectangle that can enclose all the tiles in the given landscape? Provide the answer in the format 'width x height'.",
6
+ "Answer": "3 x 2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2010
15
+ }
annotation_data/carcassonne_tier1/test_2011.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles have a building present on them in the given landscape?",
6
+ "Answer": "1",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2011
15
+ }
annotation_data/carcassonne_tier1/test_2012.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "In the given landscape, how many tiles does the longest continuous road pass through?",
6
+ "Answer": "4",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2012
15
+ }
annotation_data/carcassonne_tier1/test_2013.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles are present in the given landscape?",
6
+ "Answer": "7",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2013
15
+ }
annotation_data/carcassonne_tier1/test_2014.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current landscape, how many tiles contain a single road?",
6
+ "Answer": "3",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2014
15
+ }
annotation_data/carcassonne_tier1/test_2015.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles contain grass but no road present on them in the given landscape?",
6
+ "Answer": "3",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2015
15
+ }
annotation_data/carcassonne_tier1/test_2016.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles contain a shield (or coat of arms) symbol in the given landscape?",
6
+ "Answer": "1",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2016
15
+ }
annotation_data/carcassonne_tier1/test_2017.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that the goal is to get the current landscape to have a rectangular shape with no gaps, how many additional tiles are needed to be placed in order to achieve this?",
6
+ "Answer": "5",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2017
15
+ }
annotation_data/carcassonne_tier1/test_2018.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many meeples are currently placed on the tiles in the given landscape?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2018
15
+ }
annotation_data/carcassonne_tier1/test_2019.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Which color meeple is present on the tile containing only a road segment in the given landscape?",
6
+ "Answer": "red",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2019
15
+ }
annotation_data/carcassonne_tier1/test_2020.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles do not contain any meeples in the given landscape?",
6
+ "Answer": "4",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2020
15
+ }
annotation_data/carcassonne_tier1/test_2021.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "From the given landscape, how many tiles contain sleeping meeples?",
6
+ "Answer": "1",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2021
15
+ }
annotation_data/carcassonne_tier1/test_2022.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that the goal is to create a rectangular shape with no gaps, how many additional tiles are needed to be placed to achieve this?",
6
+ "Answer": "5",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2022
15
+ }
annotation_data/carcassonne_tier1/test_2023.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Which color meeples is present on the tile containing at least one road segment in the given landscape?",
6
+ "Answer": "red, blue",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2023
15
+ }
annotation_data/carcassonne_tier1/test_2024.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many standing yellow meeples are present in the given landscape?",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2024
15
+ }
annotation_data/carcassonne_tier1/test_2025.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the scoreboard, How many players are playing this game?",
6
+ "Answer": "3",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2025
15
+ }
annotation_data/carcassonne_tier1/test_2026.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many points does the green player have according to the scoreboard?",
6
+ "Answer": "8",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2026
15
+ }
annotation_data/carcassonne_tier1/test_2027.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many points does the blue player have according to the scoreboard?",
6
+ "Answer": "30",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_1.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2027
15
+ }
annotation_data/carcassonne_tier1/test_2028.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many points does the blue player have according to the scoreboard?",
6
+ "Answer": "40",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2028
15
+ }
annotation_data/carcassonne_tier1/test_2029.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many meeples are currently in the standing position on the scoreboard?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2029
15
+ }
annotation_data/carcassonne_tier1/test_2030.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that meeples in any orientation (standing or sleeping) are considered to be same, how many points does the red player have according to the scoreboard?",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2030
15
+ }
annotation_data/carcassonne_tier1/test_2031.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Given the current scoreboard, how many players have their meeples in sleeping position?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2031
15
+ }
annotation_data/carcassonne_tier1/test_2032.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that meeples in any orientation (standing or sleeping) are considered to be same, which player has the highest points according to the scoreboard?",
6
+ "Answer": "Blue",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2032
15
+ }
annotation_data/carcassonne_tier1/test_2033.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that meeples in any orientation (standing or sleeping) are considered to be same, how many players have scored less than 10 points according to the scoreboard?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2033
15
+ }
annotation_data/carcassonne_tier1/test_2034.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that meeples in any orientation (standing or sleeping) are considered to be same, what is the score of the green player according to the scoreboard?",
6
+ "Answer": "6",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2034
15
+ }
annotation_data/carcassonne_tier1/test_2035.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "Assuming that meeples in any orientation (standing or sleeping) are considered to be same, which player has the second highest points according to the scoreboard?",
6
+ "Answer": "Green",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2035
15
+ }
annotation_data/carcassonne_tier1/test_2036.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many tiles are present in the given gamestate image?",
6
+ "Answer": "31",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/farmers_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2036
15
+ }
annotation_data/carcassonne_tier1/test_2037.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many standing meeples are present in the given gamestate image?",
6
+ "Answer": "1",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/farmers_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2037
15
+ }
annotation_data/carcassonne_tier1/test_2038.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many meeples are present in the given gamestate image? Consider that both standing and lying meeples are counted as meeples.",
6
+ "Answer": "4",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/farmers_2.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2038
15
+ }
annotation_data/carcassonne_tier1/test_2039.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "How many red meeples are present in the given gamestate image? Consider that both standing and lying meeples are counted as meeples.",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/possible_tiles_3.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2039
15
+ }
annotation_data/carcassonne_tier1/test_2040.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding. Counting the number of dominoes in the player's kingdom.",
4
+ "Difficulty Notes": "Very easy counting",
5
+ "Question": "In the given gamestate image, how many standing red meeples are present?",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/possible_tiles_4.png",
9
+ "Game State Difficulty": 1,
10
+ "Rules Difficulty": 0,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 1,
13
+ "json_game_state_url": "",
14
+ "ID": 2040
15
+ }
annotation_data/carcassonne_tier2/test_2101.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "How many completed cities are there on the given landscape?",
6
+ "Answer": "1",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2101
15
+ }
annotation_data/carcassonne_tier2/test_2102.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "How many completed roads are present in the given landscape?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2102
15
+ }
annotation_data/carcassonne_tier2/test_2103.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "How many tiles are surrounding the monastery in the given landscape?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/features_1.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2103
15
+ }
annotation_data/carcassonne_tier2/test_2104.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "Considering that the placement of the tile and meeple in this turn is completed, during scoring of this turn, how many points will the blue player earn?",
6
+ "Answer": "10",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2104
15
+ }
annotation_data/carcassonne_tier2/test_2105.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "Considering that the placement of the tile and meeple in this turn is completed, during scoring of this turn, how many points will the yellow player earn? (Assume game does not end after this turn)",
6
+ "Answer": "0",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/meeple_landscape_2.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2105
15
+ }
annotation_data/carcassonne_tier2/test_2106.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "According to the given scoreboard, which player has the highest score at this point in the game?",
6
+ "Answer": "green",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_3.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2106
15
+ }
annotation_data/carcassonne_tier2/test_2107.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "How many players have a score of more than 25 points according to the given scoreboard?",
6
+ "Answer": "2",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/players_2.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2107
15
+ }
annotation_data/carcassonne_tier2/test_2108.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Game": "carcassonne",
3
+ "Test Description": "Testing basic game understanding and rule understanding.",
4
+ "Difficulty Notes": "simple rule application",
5
+ "Question": "Given the landscape, what are the possible tiles a player can place in the position marked with an `X`? Output the valid tiles in alphabetical order separated by commas.",
6
+ "Answer": "A,C",
7
+ "Rationale": "",
8
+ "game_state_url": "gamestates/images/carcassonne/possible_tiles_1.png",
9
+ "Game State Difficulty": 2,
10
+ "Rules Difficulty": 2,
11
+ "Strategy Difficulty": 0,
12
+ "tier": 2,
13
+ "json_game_state_url": "",
14
+ "ID": 2108
15
+ }